blob: 4a5da002a658b7f12e26528c10c2535f7a9d8ae2 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Integration &mdash; Airflow Documentation</title>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Metrics" href="metrics.html" />
<link rel="prev" title="Experimental Rest API" href="api.html" />
<script src="_static/js/modernizr.min.js"></script>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '13']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo -->
<link rel="canonical" href="https://airflow.apache.org/docs/apache-airflow/stable/integration.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.2
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="howto/index.html">How-to Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="api.html">Experimental Rest API</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Integration</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#reverse-proxy">Reverse Proxy</a></li>
<li class="toctree-l2"><a class="reference internal" href="#azure-microsoft-azure">Azure: Microsoft Azure</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#azure-blob-storage">Azure Blob Storage</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#wasbblobsensor">WasbBlobSensor</a></li>
<li class="toctree-l4"><a class="reference internal" href="#wasbprefixsensor">WasbPrefixSensor</a></li>
<li class="toctree-l4"><a class="reference internal" href="#filetowasboperator">FileToWasbOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#wasbhook">WasbHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#azure-file-share">Azure File Share</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#azurefilesharehook">AzureFileShareHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#logging">Logging</a></li>
<li class="toctree-l3"><a class="reference internal" href="#azure-cosmosdb">Azure CosmosDB</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#azurecosmosdbhook">AzureCosmosDBHook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#azurecosmosinsertdocumentoperator">AzureCosmosInsertDocumentOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#azurecosmosdocumentsensor">AzureCosmosDocumentSensor</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#azure-data-lake">Azure Data Lake</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#azuredatalakehook">AzureDataLakeHook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#azuredatalakestoragelistoperator">AzureDataLakeStorageListOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#adlstogooglecloudstorageoperator">AdlsToGoogleCloudStorageOperator</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#aws-amazon-web-services">AWS: Amazon Web Services</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#aws-emr">AWS EMR</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#emraddstepsoperator">EmrAddStepsOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#emrcreatejobflowoperator">EmrCreateJobFlowOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#emrterminatejobflowoperator">EmrTerminateJobFlowOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#emrhook">EmrHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-s3">AWS S3</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#s3hook">S3Hook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3filetransformoperator">S3FileTransformOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3listoperator">S3ListOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3togooglecloudstorageoperator">S3ToGoogleCloudStorageOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3togooglecloudstoragetransferoperator">S3ToGoogleCloudStorageTransferOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3tohivetransfer">S3ToHiveTransfer</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-ec2-container-service">AWS EC2 Container Service</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#ecsoperator">ECSOperator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-batch-service">AWS Batch Service</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#awsbatchoperator">AWSBatchOperator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#aws-redshift">AWS RedShift</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#awsredshiftclustersensor">AwsRedshiftClusterSensor</a></li>
<li class="toctree-l4"><a class="reference internal" href="#redshifthook">RedshiftHook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#redshifttos3transfer">RedshiftToS3Transfer</a></li>
<li class="toctree-l4"><a class="reference internal" href="#s3toredshifttransfer">S3ToRedshiftTransfer</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#amazon-sagemaker">Amazon SageMaker</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#sagemakerhook">SageMakerHook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#sagemakertrainingoperator">SageMakerTrainingOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#sagemakertuningoperator">SageMakerTuningOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#sagemakermodeloperator">SageMakerModelOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#sagemakertransformoperator">SageMakerTransformOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#sagemakerendpointconfigoperator">SageMakerEndpointConfigOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#sagemakerendpointoperator">SageMakerEndpointOperator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#id32">Amazon SageMaker</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#id34">SageMakerHook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#id36">SageMakerTrainingOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#id38">SageMakerTuningOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#id40">SageMakerModelOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#id42">SageMakerTransformOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#id44">SageMakerEndpointConfigOperator</a></li>
<li class="toctree-l4"><a class="reference internal" href="#id46">SageMakerEndpointOperator</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#databricks">Databricks</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#databrickssubmitrunoperator">DatabricksSubmitRunOperator</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#gcp-google-cloud-platform">GCP: Google Cloud Platform</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#id49">Logging</a></li>
<li class="toctree-l3"><a class="reference internal" href="#googlecloudbasehook">GoogleCloudBaseHook</a></li>
<li class="toctree-l3"><a class="reference internal" href="#bigquery">BigQuery</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#bigquery-operators">BigQuery Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#bigqueryhook">BigQueryHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-spanner">Cloud Spanner</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-spanner-operators">Cloud Spanner Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloudspannerhook">CloudSpannerHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-sql">Cloud SQL</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-sql-operators">Cloud SQL Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloud-sql-hooks">Cloud SQL Hooks</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-bigtable">Cloud Bigtable</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-bigtable-operators">Cloud Bigtable Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloud-bigtable-hook">Cloud Bigtable Hook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#compute-engine">Compute Engine</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#compute-engine-operators">Compute Engine Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#compute-engine-hook">Compute Engine Hook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-functions">Cloud Functions</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-functions-operators">Cloud Functions Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloud-functions-hook">Cloud Functions Hook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-dataflow">Cloud DataFlow</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#dataflow-operators">DataFlow Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#dataflowhook">DataFlowHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-dataproc">Cloud DataProc</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#dataproc-operators">DataProc Operators</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-datastore">Cloud Datastore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#datastore-operators">Datastore Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#datastorehook">DatastoreHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-ml-engine">Cloud ML Engine</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-operators">Cloud ML Engine Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-hook">Cloud ML Engine Hook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#cloud-storage">Cloud Storage</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#storage-operators">Storage Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#googlecloudstoragehook">GoogleCloudStorageHook</a></li>
<li class="toctree-l4"><a class="reference internal" href="#gcptransferservicehook">GCPTransferServiceHook</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#google-kubernetes-engine">Google Kubernetes Engine</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-cluster-operators">Google Kubernetes Engine Cluster Operators</a></li>
<li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-hook">Google Kubernetes Engine Hook</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#qubole">Qubole</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#quboleoperator">QuboleOperator</a></li>
<li class="toctree-l3"><a class="reference internal" href="#qubolepartitionsensor">QubolePartitionSensor</a></li>
<li class="toctree-l3"><a class="reference internal" href="#qubolefilesensor">QuboleFileSensor</a></li>
<li class="toctree-l3"><a class="reference internal" href="#qubolecheckoperator">QuboleCheckOperator</a></li>
<li class="toctree-l3"><a class="reference internal" href="#qubolevaluecheckoperator">QuboleValueCheckOperator</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="code.html">API Reference</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li>Integration</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/integration.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="integration">
<h1>Integration<a class="headerlink" href="#integration" title="Permalink to this headline"></a></h1>
<ul class="simple">
<li><a class="reference internal" href="#reverseproxy"><span class="std std-ref">Reverse Proxy</span></a></li>
<li><a class="reference internal" href="#azure"><span class="std std-ref">Azure: Microsoft Azure</span></a></li>
<li><a class="reference internal" href="#aws"><span class="std std-ref">AWS: Amazon Web Services</span></a></li>
<li><a class="reference internal" href="#databricks"><span class="std std-ref">Databricks</span></a></li>
<li><a class="reference internal" href="#gcp"><span class="std std-ref">GCP: Google Cloud Platform</span></a></li>
<li><a class="reference internal" href="#qubole"><span class="std std-ref">Qubole</span></a></li>
</ul>
<div class="section" id="reverse-proxy">
<span id="reverseproxy"></span><h2>Reverse Proxy<a class="headerlink" href="#reverse-proxy" title="Permalink to this headline"></a></h2>
<p>Airflow can be set up behind a reverse proxy, with the ability to set its endpoint with great
flexibility.</p>
<p>For example, you can configure your reverse proxy to get:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">lab</span><span class="o">.</span><span class="n">mycompany</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span><span class="o">/</span>
</pre></div>
</div>
<p>To do so, you need to set the following setting in your <cite>airflow.cfg</cite>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">base_url</span> <span class="o">=</span> <span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="n">my_host</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span>
</pre></div>
</div>
<p>Additionally if you use Celery Executor, you can get Flower in <cite>/myorg/flower</cite> with:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">flower_url_prefix</span> <span class="o">=</span> <span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">flower</span>
</pre></div>
</div>
<p>Your reverse proxy (ex: nginx) should be configured as follow:</p>
<ul>
<li><p class="first">pass the url and http header as it for the Airflow webserver, without any rewrite, for example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server {
listen 80;
server_name lab.mycompany.com;
location /myorg/airflow/ {
proxy_pass http://localhost:8080;
proxy_set_header Host $host;
proxy_redirect off;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection &quot;upgrade&quot;;
}
}
</pre></div>
</div>
</li>
<li><p class="first">rewrite the url for the flower endpoint:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server {
listen 80;
server_name lab.mycompany.com;
location /myorg/flower/ {
rewrite ^/myorg/flower/(.*)$ /$1 break; # remove prefix from http header
proxy_pass http://localhost:5555;
proxy_set_header Host $host;
proxy_redirect off;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection &quot;upgrade&quot;;
}
}
</pre></div>
</div>
</li>
</ul>
<p>To ensure that Airflow generates URLs with the correct scheme when
running behind a TLS-terminating proxy, you should configure the proxy
to set the <cite>X-Forwarded-Proto</cite> header, and enable the <cite>ProxyFix</cite>
middleware in your <cite>airflow.cfg</cite>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">enable_proxy_fix</span> <span class="o">=</span> <span class="kc">True</span>
</pre></div>
</div>
<p>Note: you should only enable the <cite>ProxyFix</cite> middleware when running
Airflow behind a trusted proxy (AWS ELB, nginx, etc.).</p>
</div>
<div class="section" id="azure-microsoft-azure">
<span id="azure"></span><h2>Azure: Microsoft Azure<a class="headerlink" href="#azure-microsoft-azure" title="Permalink to this headline"></a></h2>
<p>Airflow has limited support for Microsoft Azure: interfaces exist only for Azure Blob
Storage and Azure Data Lake. Hook, Sensor and Operator for Blob Storage and
Azure Data Lake Hook are in contrib section.</p>
<div class="section" id="azure-blob-storage">
<h3>Azure Blob Storage<a class="headerlink" href="#azure-blob-storage" title="Permalink to this headline"></a></h3>
<p>All classes communicate via the Window Azure Storage Blob protocol. Make sure that a
Airflow connection of type <cite>wasb</cite> exists. Authorization can be done by supplying a
login (=Storage account name) and password (=KEY), or login and SAS token in the extra
field (see connection <cite>wasb_default</cite> for an example).</p>
<ul class="simple">
<li><a class="reference internal" href="#wasbblobsensor"><span class="std std-ref">WasbBlobSensor</span></a>: Checks if a blob is present on Azure Blob storage.</li>
<li><a class="reference internal" href="#wasbprefixsensor"><span class="std std-ref">WasbPrefixSensor</span></a>: Checks if blobs matching a prefix are present on Azure Blob storage.</li>
<li><a class="reference internal" href="#filetowasboperator"><span class="std std-ref">FileToWasbOperator</span></a>: Uploads a local file to a container as a blob.</li>
<li><a class="reference internal" href="#wasbhook"><span class="std std-ref">WasbHook</span></a>: Interface with Azure Blob Storage.</li>
</ul>
<div class="section" id="wasbblobsensor">
<span id="id1"></span><h4>WasbBlobSensor<a class="headerlink" href="#wasbblobsensor" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbBlobSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p>
<p>Waits for a blob to arrive on Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li>
<li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that
<cite>WasbHook.check_for_blob()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke">
<code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke" title="Permalink to this definition"></a></dt>
<dd><p>Function that the sensors defined while deriving this class should
override.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="wasbprefixsensor">
<span id="id2"></span><h4>WasbPrefixSensor<a class="headerlink" href="#wasbprefixsensor" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbPrefixSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p>
<p>Waits for blobs matching a prefix to arrive on Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li>
<li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li>
<li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that
<cite>WasbHook.check_for_prefix()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke">
<code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke" title="Permalink to this definition"></a></dt>
<dd><p>Function that the sensors defined while deriving this class should
override.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="filetowasboperator">
<span id="id3"></span><h4>FileToWasbOperator<a class="headerlink" href="#filetowasboperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_wasb.</code><code class="descname">FileToWasbOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Uploads a file to Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to load. (templated)</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container. (templated)</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob. (templated)</li>
<li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li>
<li><strong>load_options</strong> (<em>dict</em>) – Optional keyword arguments that
<cite>WasbHook.load_file()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure Blob Storage.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="wasbhook">
<span id="id4"></span><h4>WasbHook<a class="headerlink" href="#wasbhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.wasb_hook.</code><code class="descname">WasbHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Interacts with Azure Blob Storage through the wasb:// protocol.</p>
<p>Additional options passed in the ‘extra’ field of the connection will be
passed to the <cite>BlockBlockService()</cite> constructor. For example, authenticate
using a SAS token by adding {“sas_token”: “YOUR_TOKEN”}.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob">
<code class="descname">check_for_blob</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_blob"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob" title="Permalink to this definition"></a></dt>
<dd><p>Check if a blob exists on Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.exists()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the blob exists, False otherwise.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix">
<code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>container_name</em>, <em>prefix</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix" title="Permalink to this definition"></a></dt>
<dd><p>Check if a prefix exists on Azure Blob storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.list_blobs()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if blobs matching the prefix exist, False otherwise.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.delete_file">
<code class="descname">delete_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>is_prefix=False</em>, <em>ignore_if_missing=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.delete_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.delete_file" title="Permalink to this definition"></a></dt>
<dd><p>Delete a file from Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>is_prefix</strong> (<em>bool</em>) – If blob_name is a prefix, delete all matching files</li>
<li><strong>ignore_if_missing</strong> – if True, then return success even if the</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>blob does not exist.
:type ignore_if_missing: bool
:param kwargs: Optional keyword arguments that</p>
<blockquote>
<div><cite>BlockBlobService.create_blob_from_path()</cite> takes.</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Return the BlockBlobService object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_file">
<code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_file" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to download.</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_file">
<code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_file" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_string">
<code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_string" title="Permalink to this definition"></a></dt>
<dd><p>Upload a string to Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>string_data</strong> (<em>str</em>) – String to load.</li>
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_text()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.wasb_hook.WasbHook.read_file">
<code class="descname">read_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.read_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.read_file" title="Permalink to this definition"></a></dt>
<dd><p>Read a file from Azure Blob Storage and return as a string.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li>
<li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>BlockBlobService.create_blob_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="azure-file-share">
<h3>Azure File Share<a class="headerlink" href="#azure-file-share" title="Permalink to this headline"></a></h3>
<p>Cloud variant of a SMB file share. Make sure that a Airflow connection of
type <cite>wasb</cite> exists. Authorization can be done by supplying a login (=Storage account name)
and password (=Storage account key), or login and SAS token in the extra field
(see connection <cite>wasb_default</cite> for an example).</p>
<div class="section" id="azurefilesharehook">
<h4>AzureFileShareHook<a class="headerlink" href="#azurefilesharehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_fileshare_hook.</code><code class="descname">AzureFileShareHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Interacts with Azure FileShare Storage.</p>
<p>Additional options passed in the ‘extra’ field of the connection will be
passed to the <cite>FileService()</cite> constructor.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory">
<code class="descname">check_for_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory" title="Permalink to this definition"></a></dt>
<dd><p>Check if a directory exists on Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.exists()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the file exists, False otherwise.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file">
<code class="descname">check_for_file</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file" title="Permalink to this definition"></a></dt>
<dd><p>Check if a file exists on Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.exists()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the file exists, False otherwise.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory">
<code class="descname">create_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.create_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory" title="Permalink to this definition"></a></dt>
<dd><p>Create a new directory on a Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_directory()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A list of files and directories</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Return the FileService object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file">
<code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Where to store the file.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.get_file_to_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream">
<code class="descname">get_file_to_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file_to_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>stream</strong> (<em>file-like object</em>) – A filehandle to store the file to.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.get_file_to_stream()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files">
<code class="descname">list_directories_and_files</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.list_directories_and_files"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files" title="Permalink to this definition"></a></dt>
<dd><p>Return the list of directories and files stored on a Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.list_directories_and_files()</cite> takes.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A list of files and directories</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file">
<code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_file_from_path()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream">
<code class="descname">load_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>count</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream" title="Permalink to this definition"></a></dt>
<dd><p>Upload a stream to Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>stream</strong> (<em>file-like</em>) – Opened file/stream to upload as the file content.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>count</strong> (<em>int</em>) – Size of the stream in bytes</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_file_from_stream()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string">
<code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string" title="Permalink to this definition"></a></dt>
<dd><p>Upload a string to Azure File Share.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>string_data</strong> (<em>str</em>) – String to load.</li>
<li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li>
<li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li>
<li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li>
<li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that
<cite>FileService.create_file_from_text()</cite> takes.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="logging">
<h3>Logging<a class="headerlink" href="#logging" title="Permalink to this headline"></a></h3>
<p>Airflow can be configured to read and write task logs in Azure Blob Storage.
See <a class="reference internal" href="howto/write-logs.html#write-logs-azure"><span class="std std-ref">Writing Logs to Azure Blob Storage</span></a>.</p>
</div>
<div class="section" id="azure-cosmosdb">
<h3>Azure CosmosDB<a class="headerlink" href="#azure-cosmosdb" title="Permalink to this headline"></a></h3>
<p>AzureCosmosDBHook communicates via the Azure Cosmos library. Make sure that a
Airflow connection of type <cite>azure_cosmos</cite> exists. Authorization can be done by supplying a
login (=Endpoint uri), password (=secret key) and extra fields database_name and collection_name to specify the
default database and collection to use (see connection <cite>azure_cosmos_default</cite> for an example).</p>
<ul class="simple">
<li><a class="reference internal" href="#azurecosmosdbhook"><span class="std std-ref">AzureCosmosDBHook</span></a>: Interface with Azure CosmosDB.</li>
<li><span class="xref std std-ref">AzureCosmosInsertDocumentOperator</span>: Simple operator to insert document into CosmosDB.</li>
<li><span class="xref std std-ref">AzureCosmosDocumentSensor</span>: Simple sensor to detect document existence in CosmosDB.</li>
</ul>
<div class="section" id="azurecosmosdbhook">
<span id="id5"></span><h4>AzureCosmosDBHook<a class="headerlink" href="#azurecosmosdbhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_cosmos_hook.</code><code class="descname">AzureCosmosDBHook</code><span class="sig-paren">(</span><em>azure_cosmos_conn_id='azure_cosmos_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Interacts with Azure CosmosDB.</p>
<p>login should be the endpoint uri, password should be the master key
optionally, you can use the following extras to default these values
{“database_name”: “&lt;DATABASE_NAME&gt;”, “collection_name”: “COLLECTION_NAME”}.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_cosmos_conn_id</strong> (<em>str</em>) – Reference to the Azure CosmosDB connection.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_collection">
<code class="descname">create_collection</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.create_collection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_collection" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new collection in the CosmosDB database.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_database">
<code class="descname">create_database</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.create_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_database" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new database in CosmosDB.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_collection">
<code class="descname">delete_collection</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_collection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_collection" title="Permalink to this definition"></a></dt>
<dd><p>Deletes an existing collection in the CosmosDB database.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_database">
<code class="descname">delete_database</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_database" title="Permalink to this definition"></a></dt>
<dd><p>Deletes an existing database in CosmosDB.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_document">
<code class="descname">delete_document</code><span class="sig-paren">(</span><em>document_id</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_document" title="Permalink to this definition"></a></dt>
<dd><p>Delete an existing document out of a collection in the CosmosDB database.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_collection_exist">
<code class="descname">does_collection_exist</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.does_collection_exist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_collection_exist" title="Permalink to this definition"></a></dt>
<dd><p>Checks if a collection exists in CosmosDB.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_database_exist">
<code class="descname">does_database_exist</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.does_database_exist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_database_exist" title="Permalink to this definition"></a></dt>
<dd><p>Checks if a database exists in CosmosDB.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Return a cosmos db client.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_document">
<code class="descname">get_document</code><span class="sig-paren">(</span><em>document_id</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_document" title="Permalink to this definition"></a></dt>
<dd><p>Get a document from an existing collection in the CosmosDB database.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_documents">
<code class="descname">get_documents</code><span class="sig-paren">(</span><em>sql_string</em>, <em>database_name=None</em>, <em>collection_name=None</em>, <em>partition_key=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_documents"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_documents" title="Permalink to this definition"></a></dt>
<dd><p>Get a list of documents from an existing collection in the CosmosDB database via SQL query.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.insert_documents">
<code class="descname">insert_documents</code><span class="sig-paren">(</span><em>documents</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.insert_documents"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.insert_documents" title="Permalink to this definition"></a></dt>
<dd><p>Insert a list of new documents into an existing collection in the CosmosDB database.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.upsert_document">
<code class="descname">upsert_document</code><span class="sig-paren">(</span><em>document</em>, <em>database_name=None</em>, <em>collection_name=None</em>, <em>document_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.upsert_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.upsert_document" title="Permalink to this definition"></a></dt>
<dd><p>Inserts a new document (or updates an existing one) into an existing
collection in the CosmosDB database.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="azurecosmosinsertdocumentoperator">
<h4>AzureCosmosInsertDocumentOperator<a class="headerlink" href="#azurecosmosinsertdocumentoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.azure_cosmos_operator.AzureCosmosInsertDocumentOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.azure_cosmos_operator.</code><code class="descname">AzureCosmosInsertDocumentOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/azure_cosmos_operator.html#AzureCosmosInsertDocumentOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.azure_cosmos_operator.AzureCosmosInsertDocumentOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Inserts a new document into the specified Cosmos database and collection
It will create both the database and collection if they do not already exist</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>database_name</strong> (<em>str</em>) – The name of the database. (templated)</li>
<li><strong>collection_name</strong> (<em>str</em>) – The name of the collection. (templated)</li>
<li><strong>document</strong> (<em>dict</em>) – The document to insert</li>
<li><strong>azure_cosmos_conn_id</strong> (<em>str</em>) – reference to a CosmosDB connection.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="azurecosmosdocumentsensor">
<h4>AzureCosmosDocumentSensor<a class="headerlink" href="#azurecosmosdocumentsensor" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.azure_cosmos_sensor.</code><code class="descname">AzureCosmosDocumentSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/azure_cosmos_sensor.html#AzureCosmosDocumentSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p>
<p>Checks for the existence of a document which
matches the given query in CosmosDB. Example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">azure_cosmos_sensor</span> <span class="o">=</span> <span class="n">AzureCosmosDocumentSensor</span><span class="p">(</span><span class="n">database_name</span><span class="o">=</span><span class="s2">&quot;somedatabase_name&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">collection_name</span><span class="o">=</span><span class="s2">&quot;somecollection_name&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">document_id</span><span class="o">=</span><span class="s2">&quot;unique-doc-id&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">azure_cosmos_conn_id</span><span class="o">=</span><span class="s2">&quot;azure_cosmos_default&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">task_id</span><span class="o">=</span><span class="s2">&quot;azure_cosmos_sensor&quot;</span><span class="p">)</span>
</pre></div>
</div>
<dl class="method">
<dt id="airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor.poke">
<code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/azure_cosmos_sensor.html#AzureCosmosDocumentSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor.poke" title="Permalink to this definition"></a></dt>
<dd><p>Function that the sensors defined while deriving this class should
override.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="azure-data-lake">
<h3>Azure Data Lake<a class="headerlink" href="#azure-data-lake" title="Permalink to this headline"></a></h3>
<p>AzureDataLakeHook communicates via a REST API compatible with WebHDFS. Make sure that a
Airflow connection of type <cite>azure_data_lake</cite> exists. Authorization can be done by supplying a
login (=Client ID), password (=Client Secret) and extra fields tenant (Tenant) and account_name (Account Name)</p>
<blockquote>
<div>(see connection <cite>azure_data_lake_default</cite> for an example).</div></blockquote>
<ul class="simple">
<li><a class="reference internal" href="#azuredatalakehook"><span class="std std-ref">AzureDataLakeHook</span></a>: Interface with Azure Data Lake.</li>
<li><a class="reference internal" href="#azuredatalakestoragelistoperator"><span class="std std-ref">AzureDataLakeStorageListOperator</span></a>: Lists the files located in a specified Azure Data Lake path.</li>
<li><a class="reference internal" href="#adlstogooglecloudstorageoperator"><span class="std std-ref">AdlsToGoogleCloudStorageOperator</span></a>: Copies files from an Azure Data Lake path to a Google Cloud Storage bucket.</li>
</ul>
<div class="section" id="azuredatalakehook">
<span id="id6"></span><h4>AzureDataLakeHook<a class="headerlink" href="#azuredatalakehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_data_lake_hook.</code><code class="descname">AzureDataLakeHook</code><span class="sig-paren">(</span><em>azure_data_lake_conn_id='azure_data_lake_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Interacts with Azure Data Lake.</p>
<p>Client ID and client secret should be in user and password parameters.
Tenant and account name should be extra field as
{“tenant”: “&lt;TENANT&gt;”, “account_name”: “ACCOUNT_NAME”}.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – Reference to the Azure Data Lake connection.</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file">
<code class="descname">check_for_file</code><span class="sig-paren">(</span><em>file_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file" title="Permalink to this definition"></a></dt>
<dd><p>Check if a file exists on Azure Data Lake.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>file_path</strong> (<em>str</em>) – Path and name of the file.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">True if the file exists, False otherwise.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">bool</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file">
<code class="descname">download_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.download_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file" title="Permalink to this definition"></a></dt>
<dd><p>Download a file from Azure Blob Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>local_path</strong> (<em>str</em>) – local path. If downloading a single file, will write to this
specific file, unless it is an existing directory, in which case a file is
created within it. If downloading multiple files, this is the root
directory to write within. Will create directories as required.</li>
<li><strong>remote_path</strong> (<em>str</em>) – remote path/globstring to use to find remote files.
Recursive glob patterns using <cite>**</cite> are not supported.</li>
<li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li>
<li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories.
If False and remote path is a directory, will quit regardless if any files
would be overwritten or not. If True, only matching filenames are actually
overwritten.</li>
<li><strong>buffersize</strong> (<em>int</em>) – int [2**22]
Number of bytes for internal buffer. This block cannot be bigger than
a chunk and cannot be smaller than a block.</li>
<li><strong>blocksize</strong> (<em>int</em>) – int [2**22]
Number of bytes for a block. Within each chunk, we write a smaller
block for each API call. This block cannot be bigger than a chunk.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Return a AzureDLFileSystem object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file">
<code class="descname">upload_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.upload_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file" title="Permalink to this definition"></a></dt>
<dd><p>Upload a file to Azure Data Lake.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>local_path</strong> (<em>str</em>) – local path. Can be single file, directory (in which case,
upload recursively) or glob pattern. Recursive glob patterns using <cite>**</cite>
are not supported.</li>
<li><strong>remote_path</strong> (<em>str</em>) – Remote path to upload to; if multiple files, this is the
dircetory root to write within.</li>
<li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li>
<li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories.
If False and remote path is a directory, will quit regardless if any files
would be overwritten or not. If True, only matching filenames are actually
overwritten.</li>
<li><strong>buffersize</strong> (<em>int</em>) – int [2**22]
Number of bytes for internal buffer. This block cannot be bigger than
a chunk and cannot be smaller than a block.</li>
<li><strong>blocksize</strong> (<em>int</em>) – int [2**22]
Number of bytes for a block. Within each chunk, we write a smaller
block for each API call. This block cannot be bigger than a chunk.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="azuredatalakestoragelistoperator">
<span id="id7"></span><h4>AzureDataLakeStorageListOperator<a class="headerlink" href="#azuredatalakestoragelistoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.adls_list_operator.</code><code class="descname">AzureDataLakeStorageListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/adls_list_operator.html#AzureDataLakeStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>List all files from the specified path</p>
<dl class="docutils">
<dt>This operator returns a python list with the names of files which can be used by</dt>
<dd><cite>xcom</cite> in the downstream tasks.</dd>
</dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>path</strong> (<em>str</em>) – The Azure Data Lake path to find the objects. Supports glob
strings (templated)</li>
<li><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – The connection ID to use when
connecting to Azure Data Lake Storage.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Example</strong>:</dt>
<dd><p class="first">The following Operator would list all the Parquet files from <code class="docutils literal notranslate"><span class="pre">folder/output/</span></code>
folder in the specified ADLS account</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">adls_files</span> <span class="o">=</span> <span class="n">AzureDataLakeStorageListOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;adls_files&#39;</span><span class="p">,</span>
<span class="n">path</span><span class="o">=</span><span class="s1">&#39;folder/output/*.parquet&#39;</span><span class="p">,</span>
<span class="n">azure_data_lake_conn_id</span><span class="o">=</span><span class="s1">&#39;azure_data_lake_default&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="adlstogooglecloudstorageoperator">
<span id="id8"></span><h4>AdlsToGoogleCloudStorageOperator<a class="headerlink" href="#adlstogooglecloudstorageoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.adls_to_gcs.AdlsToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.adls_to_gcs.</code><code class="descname">AdlsToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/adls_to_gcs.html#AdlsToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.adls_to_gcs.AdlsToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator" title="airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator</span></code></a></p>
<p>Synchronizes an Azure Data Lake Storage path with a GCS bucket</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>src_adls</strong> (<em>str</em>) – The Azure Data Lake path to find the objects (templated)</li>
<li><strong>dest_gcs</strong> (<em>str</em>) – The Google Cloud Storage bucket and prefix to
store the objects. (templated)</li>
<li><strong>replace</strong> (<em>bool</em>) – If true, replaces same-named files in GCS</li>
<li><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – The connection ID to use when
connecting to Azure Data Lake Storage.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>str</em>) – The connection ID to use when
connecting to Google Cloud Storage.</li>
<li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Examples</strong>:</dt>
<dd><p class="first">The following Operator would copy a single file named
<code class="docutils literal notranslate"><span class="pre">hello/world.avro</span></code> from ADLS to the GCS bucket <code class="docutils literal notranslate"><span class="pre">mybucket</span></code>. Its full
resulting gcs path will be <code class="docutils literal notranslate"><span class="pre">gs://mybucket/hello/world.avro</span></code></p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">AdlsToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;copy_single_file&#39;</span><span class="p">,</span>
<span class="n">src_adls</span><span class="o">=</span><span class="s1">&#39;hello/world.avro&#39;</span><span class="p">,</span>
<span class="n">dest_gcs</span><span class="o">=</span><span class="s1">&#39;gs://mybucket&#39;</span><span class="p">,</span>
<span class="n">replace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">azure_data_lake_conn_id</span><span class="o">=</span><span class="s1">&#39;azure_data_lake_default&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;google_cloud_default&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
<p>The following Operator would copy all parquet files from ADLS
to the GCS bucket <code class="docutils literal notranslate"><span class="pre">mybucket</span></code>.</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span> copy_all_files = AdlsToGoogleCloudStorageOperator(
task_id=&#39;copy_all_files&#39;,
src_adls=&#39;*.parquet&#39;,
dest_gcs=&#39;gs://mybucket&#39;,
replace=False,
azure_data_lake_conn_id=&#39;azure_data_lake_default&#39;,
google_cloud_storage_conn_id=&#39;google_cloud_default&#39;
)
The following Operator would copy all parquet files from ADLS
path ``/hello/world``to the GCS bucket ``mybucket``. ::
copy_world_files = AdlsToGoogleCloudStorageOperator(
task_id=&#39;copy_world_files&#39;,
src_adls=&#39;hello/world/*.parquet&#39;,
dest_gcs=&#39;gs://mybucket&#39;,
replace=False,
azure_data_lake_conn_id=&#39;azure_data_lake_default&#39;,
google_cloud_storage_conn_id=&#39;google_cloud_default&#39;
)
</pre></div>
</div>
</dd>
</dl>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="aws-amazon-web-services">
<span id="aws"></span><h2>AWS: Amazon Web Services<a class="headerlink" href="#aws-amazon-web-services" title="Permalink to this headline"></a></h2>
<p>Airflow has extensive support for Amazon Web Services. But note that the Hooks, Sensors and
Operators are in the contrib section.</p>
<div class="section" id="aws-emr">
<h3>AWS EMR<a class="headerlink" href="#aws-emr" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#emraddstepsoperator"><span class="std std-ref">EmrAddStepsOperator</span></a> : Adds steps to an existing EMR JobFlow.</li>
<li><a class="reference internal" href="#emrcreatejobflowoperator"><span class="std std-ref">EmrCreateJobFlowOperator</span></a> : Creates an EMR JobFlow, reading the config from the EMR connection.</li>
<li><a class="reference internal" href="#emrterminatejobflowoperator"><span class="std std-ref">EmrTerminateJobFlowOperator</span></a> : Terminates an EMR JobFlow.</li>
<li><a class="reference internal" href="#emrhook"><span class="std std-ref">EmrHook</span></a> : Interact with AWS EMR.</li>
</ul>
<div class="section" id="emraddstepsoperator">
<span id="id9"></span><h4>EmrAddStepsOperator<a class="headerlink" href="#emraddstepsoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_add_steps_operator.</code><code class="descname">EmrAddStepsOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_add_steps_operator.html#EmrAddStepsOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>An operator that adds steps to an existing EMR job_flow.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>job_flow_id</strong> (<em>str</em>) – id of the JobFlow to add steps to. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li>
<li><strong>steps</strong> (<em>list</em>) – boto3 style steps to be added to the jobflow. (templated)</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="emrcreatejobflowoperator">
<span id="id10"></span><h4>EmrCreateJobFlowOperator<a class="headerlink" href="#emrcreatejobflowoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_create_job_flow_operator.</code><code class="descname">EmrCreateJobFlowOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_create_job_flow_operator.html#EmrCreateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates an EMR JobFlow, reading the config from the EMR connection.
A dictionary of JobFlow overrides can be passed that override
the config from the connection.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li>
<li><strong>emr_conn_id</strong> (<em>str</em>) – emr connection to use</li>
<li><strong>job_flow_overrides</strong> (<em>dict</em>) – boto3 style arguments to override
emr_connection extra. (templated)</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="emrterminatejobflowoperator">
<span id="id11"></span><h4>EmrTerminateJobFlowOperator<a class="headerlink" href="#emrterminatejobflowoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_terminate_job_flow_operator.</code><code class="descname">EmrTerminateJobFlowOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_terminate_job_flow_operator.html#EmrTerminateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator to terminate EMR JobFlows.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>job_flow_id</strong> (<em>str</em>) – id of the JobFlow to terminate. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="emrhook">
<span id="id12"></span><h4>EmrHook<a class="headerlink" href="#emrhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.emr_hook.EmrHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.emr_hook.</code><code class="descname">EmrHook</code><span class="sig-paren">(</span><em>emr_conn_id=None</em>, <em>region_name=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with AWS EMR. emr_conn_id is only necessary for using the
create_job_flow method.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow">
<code class="descname">create_job_flow</code><span class="sig-paren">(</span><em>job_flow_overrides</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook.create_job_flow"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow" title="Permalink to this definition"></a></dt>
<dd><p>Creates a job flow using the config from the EMR connection.
Keys of the json extra hash may have the arguments of the boto3
run_job_flow method.
Overrides for this config may be passed as the job_flow_overrides.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="aws-s3">
<h3>AWS S3<a class="headerlink" href="#aws-s3" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#s3hook"><span class="std std-ref">S3Hook</span></a> : Interact with AWS S3.</li>
<li><a class="reference internal" href="#s3filetransformoperator"><span class="std std-ref">S3FileTransformOperator</span></a> : Copies data from a source S3 location to a temporary location on the local filesystem.</li>
<li><a class="reference internal" href="#s3listoperator"><span class="std std-ref">S3ListOperator</span></a> : Lists the files matching a key prefix from a S3 location.</li>
<li><a class="reference internal" href="#s3togooglecloudstorageoperator"><span class="std std-ref">S3ToGoogleCloudStorageOperator</span></a> : Syncs an S3 location with a Google Cloud Storage bucket.</li>
<li><a class="reference internal" href="#s3togooglecloudstoragetransferoperator"><span class="std std-ref">S3ToGoogleCloudStorageTransferOperator</span></a> : Syncs an S3 bucket with a Google Cloud Storage bucket using the GCP Storage Transfer Service.</li>
<li><a class="reference internal" href="#s3tohivetransfer"><span class="std std-ref">S3ToHiveTransfer</span></a> : Moves data from S3 to Hive. The operator downloads a file from S3, stores the file locally before loading it into a Hive table.</li>
</ul>
<div class="section" id="s3hook">
<span id="id13"></span><h4>S3Hook<a class="headerlink" href="#s3hook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.hooks.S3_hook.S3Hook">
<em class="property">class </em><code class="descclassname">airflow.hooks.S3_hook.</code><code class="descname">S3Hook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>verify=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with AWS S3, using the boto3 library.</p>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_bucket">
<code class="descname">check_for_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Check if bucket_name exists.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_key">
<code class="descname">check_for_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_key" title="Permalink to this definition"></a></dt>
<dd><p>Checks if a key exists in a bucket</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_prefix">
<code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix</em>, <em>delimiter</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_prefix" title="Permalink to this definition"></a></dt>
<dd><p>Checks that a prefix exists in a bucket</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
<li><strong>prefix</strong> (<em>str</em>) – a key prefix</li>
<li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key">
<code class="descname">check_for_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key" title="Permalink to this definition"></a></dt>
<dd><p>Checks that a key matching a wildcard expression exists in a bucket</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li>
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
<li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.copy_object">
<code class="descname">copy_object</code><span class="sig-paren">(</span><em>source_bucket_key</em>, <em>dest_bucket_key</em>, <em>source_bucket_name=None</em>, <em>dest_bucket_name=None</em>, <em>source_version_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.copy_object"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.copy_object" title="Permalink to this definition"></a></dt>
<dd><p>Creates a copy of an object that is already stored in S3.</p>
<p>Note: the S3 connection used here needs to have access to both
source and destination bucket/key.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket_key</strong> (<em>str</em>) – <p>The key of the source object.</p>
<p>It can be either full s3:// style url or relative path from root level.</p>
<p>When it’s specified as a full s3:// url, please omit source_bucket_name.</p>
</li>
<li><strong>dest_bucket_key</strong> (<em>str</em>) – <p>The key of the object to copy to.</p>
<p>The convention to specify <cite>dest_bucket_key</cite> is the same
as <cite>source_bucket_key</cite>.</p>
</li>
<li><strong>source_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket where the source object is in.</p>
<p>It should be omitted when <cite>source_bucket_key</cite> is provided as a full s3:// url.</p>
</li>
<li><strong>dest_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket to where the object is copied.</p>
<p>It should be omitted when <cite>dest_bucket_key</cite> is provided as a full s3:// url.</p>
</li>
<li><strong>source_version_id</strong> (<em>str</em>) – Version ID of the source object (OPTIONAL)</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.create_bucket">
<code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>region_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.create_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Creates an Amazon S3 bucket.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>str</em>) – The name of the bucket</li>
<li><strong>region_name</strong> (<em>str</em>) – The name of the aws region in which to create the bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.delete_objects">
<code class="descname">delete_objects</code><span class="sig-paren">(</span><em>bucket</em>, <em>keys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.delete_objects"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.delete_objects" title="Permalink to this definition"></a></dt>
<dd><table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>str</em>) – Name of the bucket in which you are going to delete object(s)</li>
<li><strong>keys</strong> (<em>str</em><em> or </em><em>list</em>) – <p>The key(s) to delete from S3 bucket.</p>
<p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a string, it’s supposed to be the key name of
the single object to delete.</p>
<p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a list, it’s supposed to be the list of the
keys to delete.</p>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.get_bucket">
<code class="descname">get_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Returns a boto3.S3.Bucket object</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.get_key">
<code class="descname">get_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_key" title="Permalink to this definition"></a></dt>
<dd><p>Returns a boto3.s3.Object</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>key</strong> (<em>str</em>) – the path to the key</li>
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.get_wildcard_key">
<code class="descname">get_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_wildcard_key" title="Permalink to this definition"></a></dt>
<dd><p>Returns a boto3.s3.Object object matching the wildcard expression</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li>
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
<li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.list_keys">
<code class="descname">list_keys</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_keys" title="Permalink to this definition"></a></dt>
<dd><p>Lists keys in a bucket under prefix and not containing delimiter</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
<li><strong>prefix</strong> (<em>str</em>) – a key prefix</li>
<li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li>
<li><strong>page_size</strong> (<em>int</em>) – pagination size</li>
<li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.list_prefixes">
<code class="descname">list_prefixes</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_prefixes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_prefixes" title="Permalink to this definition"></a></dt>
<dd><p>Lists prefixes in a bucket under prefix</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li>
<li><strong>prefix</strong> (<em>str</em>) – a key prefix</li>
<li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li>
<li><strong>page_size</strong> (<em>int</em>) – pagination size</li>
<li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_bytes">
<code class="descname">load_bytes</code><span class="sig-paren">(</span><em>bytes_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_bytes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_bytes" title="Permalink to this definition"></a></dt>
<dd><p>Loads bytes to S3</p>
<p>This is provided as a convenience to drop a string in S3. It uses the
boto infrastructure to ship a file to s3.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bytes_data</strong> (<em>bytes</em>) – bytes to set as content for the key.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key
if it already exists</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side
by S3 and will be stored in an encrypted form while at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_file">
<code class="descname">load_file</code><span class="sig-paren">(</span><em>filename</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file" title="Permalink to this definition"></a></dt>
<dd><p>Loads a local file to S3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>filename</strong> (<em>str</em>) – name of the file to load.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key
if it already exists. If replace is False and the key exists, an
error will be raised.</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side
by S3 and will be stored in an encrypted form while at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_file_obj">
<code class="descname">load_file_obj</code><span class="sig-paren">(</span><em>file_obj</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file_obj"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file_obj" title="Permalink to this definition"></a></dt>
<dd><p>Loads a file object to S3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>file_obj</strong> (<em>file-like object</em>) – The file-like object to set as the content for the S3 key.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag that indicates whether to overwrite the key
if it already exists.</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, S3 encrypts the file on the server,
and the file is stored in encrypted form at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.load_string">
<code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em>, <em>encoding='utf-8'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_string" title="Permalink to this definition"></a></dt>
<dd><p>Loads a string to S3</p>
<p>This is provided as a convenience to drop a string in S3. It uses the
boto infrastructure to ship a file to s3.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>string_data</strong> (<em>str</em>) – string to set as content for the key.</li>
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li>
<li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key
if it already exists</li>
<li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side
by S3 and will be stored in an encrypted form while at rest in S3.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.read_key">
<code class="descname">read_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.read_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.read_key" title="Permalink to this definition"></a></dt>
<dd><p>Reads a key from S3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.hooks.S3_hook.S3Hook.select_key">
<code class="descname">select_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em>, <em>expression='SELECT * FROM S3Object'</em>, <em>expression_type='SQL'</em>, <em>input_serialization=None</em>, <em>output_serialization=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.select_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.select_key" title="Permalink to this definition"></a></dt>
<dd><p>Reads a key with S3 Select.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li>
<li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li>
<li><strong>expression</strong> (<em>str</em>) – S3 Select expression</li>
<li><strong>expression_type</strong> (<em>str</em>) – S3 Select expression type</li>
<li><strong>input_serialization</strong> (<em>dict</em>) – S3 Select input data serialization format</li>
<li><strong>output_serialization</strong> (<em>dict</em>) – S3 Select output data serialization format</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">retrieved subset of original data by S3 Select</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str</p>
</td>
</tr>
</tbody>
</table>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more details about S3 Select parameters:
<a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content">http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content</a></p>
</div>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="s3filetransformoperator">
<span id="id14"></span><h4>S3FileTransformOperator<a class="headerlink" href="#s3filetransformoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.s3_file_transform_operator.S3FileTransformOperator">
<em class="property">class </em><code class="descclassname">airflow.operators.s3_file_transform_operator.</code><code class="descname">S3FileTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_file_transform_operator.html#S3FileTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_file_transform_operator.S3FileTransformOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copies data from a source S3 location to a temporary location on the
local filesystem. Runs a transformation on this file as specified by
the transformation script and uploads the output to a destination S3
location.</p>
<p>The locations of the source and the destination files in the local
filesystem is provided as an first and second arguments to the
transformation script. The transformation script is expected to read the
data from source, transform it and write the output to the local
destination file. The operator then takes over control and uploads the
local destination file to S3.</p>
<p>S3 Select is also available to filter the source contents. Users can
omit the transformation script if S3 Select expression is specified.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li>
<li><strong>source_aws_conn_id</strong> (<em>str</em>) – source s3 connection</li>
<li><strong>source_verify</strong> (<em>bool</em><em> or </em><em>str</em>) – <p>Whether or not to verify SSL certificates for S3 connetion.
By default SSL certificates are verified.
You can provide the following values:</p>
<ul>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">False</span></code>: do not validate SSL certificates. SSL will still be used</dt>
<dd>(unless use_ssl is False), but SSL certificates will not be
verified.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">path/to/cert/bundle.pem</span></code>: A filename of the CA cert bundle to uses.</dt>
<dd>You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.</dd>
</dl>
</li>
</ul>
<p>This is also applicable to <code class="docutils literal notranslate"><span class="pre">dest_verify</span></code>.</p>
</li>
<li><strong>dest_s3_key</strong> (<em>str</em>) – The key to be written from S3. (templated)</li>
<li><strong>dest_aws_conn_id</strong> (<em>str</em>) – destination s3 connection</li>
<li><strong>replace</strong> (<em>bool</em>) – Replace dest S3 key if it already exists</li>
<li><strong>transform_script</strong> (<em>str</em>) – location of the executable transformation script</li>
<li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="s3listoperator">
<span id="id15"></span><h4>S3ListOperator<a class="headerlink" href="#s3listoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.s3_list_operator.S3ListOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_list_operator.</code><code class="descname">S3ListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_list_operator.html#S3ListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>List all objects from the bucket with the given string prefix in name.</p>
<p>This operator returns a python list with the name of objects which can be
used by <cite>xcom</cite> in the downstream task.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li>
<li><strong>prefix</strong> (<em>string</em>) – Prefix string to filters the objects whose name begin with
such prefix. (templated)</li>
<li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – The connection ID to use when connecting to S3 storage.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- False: do not validate SSL certificates. SSL will still be used</p>
<blockquote>
<div><p>(unless use_ssl is False), but SSL certificates will not be
verified.</p>
</div></blockquote>
<ul class="last simple">
<li><dl class="first docutils">
<dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt>
<dd>You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.</dd>
</dl>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Example</strong>:</dt>
<dd><p class="first">The following operator would list all the files
(excluding subfolders) from the S3
<code class="docutils literal notranslate"><span class="pre">customers/2018/04/</span></code> key in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">s3_file</span> <span class="o">=</span> <span class="n">S3ListOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;list_3s_files&#39;</span><span class="p">,</span>
<span class="n">bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">prefix</span><span class="o">=</span><span class="s1">&#39;customers/2018/04/&#39;</span><span class="p">,</span>
<span class="n">delimiter</span><span class="o">=</span><span class="s1">&#39;/&#39;</span><span class="p">,</span>
<span class="n">aws_conn_id</span><span class="o">=</span><span class="s1">&#39;aws_customers_conn&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="s3togooglecloudstorageoperator">
<span id="id16"></span><h4>S3ToGoogleCloudStorageOperator<a class="headerlink" href="#s3togooglecloudstorageoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_to_gcs_operator.</code><code class="descname">S3ToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_operator.html#S3ToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="airflow.contrib.operators.s3_list_operator.S3ListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.s3_list_operator.S3ListOperator</span></code></a></p>
<p>Synchronizes an S3 key, possibly a prefix, with a Google Cloud Storage
destination path.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li>
<li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with
such prefix. (templated)</li>
<li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – The source S3 connection</li>
<li><strong>dest_gcs_conn_id</strong> (<em>string</em>) – The destination connection ID to use
when connecting to Google Cloud Storage.</li>
<li><strong>dest_gcs</strong> (<em>string</em>) – The destination Google Cloud Storage bucket and prefix
where you want to store the files. (templated)</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>replace</strong> (<em>bool</em>) – Whether you want to replace existing destination files
or not.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- False: do not validate SSL certificates. SSL will still be used</p>
<blockquote>
<div><p>(unless use_ssl is False), but SSL certificates will not be
verified.</p>
</div></blockquote>
<ul class="last simple">
<li><dl class="first docutils">
<dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt>
<dd>You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.</dd>
</dl>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p><strong>Example</strong>:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">s3_to_gcs_op</span> <span class="o">=</span> <span class="n">S3ToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;s3_to_gcs_example&#39;</span><span class="p">,</span>
<span class="n">bucket</span><span class="o">=</span><span class="s1">&#39;my-s3-bucket&#39;</span><span class="p">,</span>
<span class="n">prefix</span><span class="o">=</span><span class="s1">&#39;data/customers-201804&#39;</span><span class="p">,</span>
<span class="n">dest_gcs_conn_id</span><span class="o">=</span><span class="s1">&#39;google_cloud_default&#39;</span><span class="p">,</span>
<span class="n">dest_gcs</span><span class="o">=</span><span class="s1">&#39;gs://my.gcs.bucket/some/customers/&#39;</span><span class="p">,</span>
<span class="n">replace</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<p>Note that <code class="docutils literal notranslate"><span class="pre">bucket</span></code>, <code class="docutils literal notranslate"><span class="pre">prefix</span></code>, <code class="docutils literal notranslate"><span class="pre">delimiter</span></code> and <code class="docutils literal notranslate"><span class="pre">dest_gcs</span></code> are
templated, so you can use variables in them if you wish.</p>
</dd></dl>
</div>
<div class="section" id="s3togooglecloudstoragetransferoperator">
<span id="id17"></span><h4>S3ToGoogleCloudStorageTransferOperator<a class="headerlink" href="#s3togooglecloudstoragetransferoperator" title="Permalink to this headline"></a></h4>
</div>
<div class="section" id="s3tohivetransfer">
<span id="id18"></span><h4>S3ToHiveTransfer<a class="headerlink" href="#s3tohivetransfer" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.s3_to_hive_operator.S3ToHiveTransfer">
<em class="property">class </em><code class="descclassname">airflow.operators.s3_to_hive_operator.</code><code class="descname">S3ToHiveTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_hive_operator.html#S3ToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_hive_operator.S3ToHiveTransfer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Moves data from S3 to Hive. The operator downloads a file from S3,
stores the file locally before loading it into a Hive table.
If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>,
a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated.
Hive data types are inferred from the cursor’s metadata from.</p>
<p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code>
which isn’t the most efficient serialization format. If a
large amount of data is loaded and/or if the tables gets
queried considerably, you may want to use this operator only to
stage the data into a temporary table before loading it into its
final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li>
<li><strong>field_dict</strong> (<em>dict</em>) – A dictionary of the fields name in the file
as keys and their Hive types as values</li>
<li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a
specific database. (templated)</li>
<li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li>
<li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every
execution</li>
<li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns
and values. (templated)</li>
<li><strong>headers</strong> (<em>bool</em>) – whether the file contains column names on the first
line</li>
<li><strong>check_headers</strong> (<em>bool</em>) – whether the column names on the first line should be
checked against the keys of field_dict</li>
<li><strong>wildcard_match</strong> (<em>bool</em>) – whether the s3_key should be interpreted as a Unix
wildcard pattern</li>
<li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – source s3 connection</li>
<li><strong>hive_cli_conn_id</strong> (<em>str</em>) – destination hive connection</li>
<li><strong>input_compressed</strong> (<em>bool</em>) – Boolean to determine if file decompression is
required to process headers</li>
<li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li>
<li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- False: do not validate SSL certificates. SSL will still be used</p>
<blockquote>
<div><p>(unless use_ssl is False), but SSL certificates will not be
verified.</p>
</div></blockquote>
<ul class="last simple">
<li><dl class="first docutils">
<dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt>
<dd>You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.</dd>
</dl>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="aws-ec2-container-service">
<h3>AWS EC2 Container Service<a class="headerlink" href="#aws-ec2-container-service" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#ecsoperator"><span class="std std-ref">ECSOperator</span></a> : Execute a task on AWS EC2 Container Service.</li>
</ul>
<div class="section" id="ecsoperator">
<span id="id19"></span><h4>ECSOperator<a class="headerlink" href="#ecsoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.ecs_operator.ECSOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.ecs_operator.</code><code class="descname">ECSOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Execute a task on AWS EC2 Container Service</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>task_definition</strong> (<em>str</em>) – the task definition name on EC2 Container Service</li>
<li><strong>cluster</strong> (<em>str</em>) – the cluster name on EC2 Container Service</li>
<li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive (templated):
<a class="reference external" href="http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task">http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task</a></li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None,
credential boto3 strategy will be used
(<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li>
<li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook.
Override the region_name in connection (if provided)</li>
<li><strong>launch_type</strong> (<em>str</em>) – the launch type on which to run your task (‘EC2’ or ‘FARGATE’)</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="aws-batch-service">
<h3>AWS Batch Service<a class="headerlink" href="#aws-batch-service" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#awsbatchoperator"><span class="std std-ref">AWSBatchOperator</span></a> : Execute a task on AWS Batch Service.</li>
</ul>
<div class="section" id="awsbatchoperator">
<span id="id20"></span><h4>AWSBatchOperator<a class="headerlink" href="#awsbatchoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.awsbatch_operator.</code><code class="descname">AWSBatchOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Execute a job on AWS Batch Service</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>job_name</strong> (<em>str</em>) – the name for the job that will run on AWS Batch</li>
<li><strong>job_definition</strong> (<em>str</em>) – the job definition name on AWS Batch</li>
<li><strong>job_queue</strong> (<em>str</em>) – the queue name on AWS Batch</li>
<li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive on
containerOverrides (templated):
<a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job">http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job</a></li>
<li><strong>max_retries</strong> (<em>int</em>) – exponential backoff retries while waiter is not
merged, 4200 = 48 hours</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None,
credential boto3 strategy will be used
(<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li>
<li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook.
Override the region_name in connection (if provided)</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="aws-redshift">
<h3>AWS RedShift<a class="headerlink" href="#aws-redshift" title="Permalink to this headline"></a></h3>
<ul class="simple">
<li><a class="reference internal" href="#awsredshiftclustersensor"><span class="std std-ref">AwsRedshiftClusterSensor</span></a> : Waits for a Redshift cluster to reach a specific status.</li>
<li><a class="reference internal" href="#redshifthook"><span class="std std-ref">RedshiftHook</span></a> : Interact with AWS Redshift, using the boto3 library.</li>
<li><a class="reference internal" href="#redshifttos3transfer"><span class="std std-ref">RedshiftToS3Transfer</span></a> : Executes an unload command to S3 as CSV with or without headers.</li>
<li><a class="reference internal" href="#s3toredshifttransfer"><span class="std std-ref">S3ToRedshiftTransfer</span></a> : Executes an copy command from S3 as CSV with or without headers.</li>
</ul>
<div class="section" id="awsredshiftclustersensor">
<span id="id21"></span><h4>AwsRedshiftClusterSensor<a class="headerlink" href="#awsredshiftclustersensor" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.aws_redshift_cluster_sensor.</code><code class="descname">AwsRedshiftClusterSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p>
<p>Waits for a Redshift cluster to reach a specific status.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_identifier</strong> (<em>str</em>) – The identifier for the cluster being pinged.</li>
<li><strong>target_status</strong> (<em>str</em>) – The cluster status desired.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke">
<code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke" title="Permalink to this definition"></a></dt>
<dd><p>Function that the sensors defined while deriving this class should
override.</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="redshifthook">
<span id="id22"></span><h4>RedshiftHook<a class="headerlink" href="#redshifthook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.redshift_hook.</code><code class="descname">RedshiftHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>verify=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with AWS Redshift, using the boto3 library</p>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status">
<code class="descname">cluster_status</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.cluster_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status" title="Permalink to this definition"></a></dt>
<dd><p>Return status of a cluster</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot">
<code class="descname">create_cluster_snapshot</code><span class="sig-paren">(</span><em>snapshot_identifier</em>, <em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.create_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot" title="Permalink to this definition"></a></dt>
<dd><p>Creates a snapshot of a cluster</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li>
<li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster">
<code class="descname">delete_cluster</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>skip_final_cluster_snapshot=True</em>, <em>final_cluster_snapshot_identifier=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.delete_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster" title="Permalink to this definition"></a></dt>
<dd><p>Delete a cluster and optionally create a snapshot</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li>
<li><strong>skip_final_cluster_snapshot</strong> (<em>bool</em>) – determines cluster snapshot creation</li>
<li><strong>final_cluster_snapshot_identifier</strong> (<em>str</em>) – name of final cluster snapshot</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots">
<code class="descname">describe_cluster_snapshots</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.describe_cluster_snapshots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots" title="Permalink to this definition"></a></dt>
<dd><p>Gets a list of snapshots for a cluster</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot">
<code class="descname">restore_from_cluster_snapshot</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>snapshot_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.restore_from_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot" title="Permalink to this definition"></a></dt>
<dd><p>Restores a cluster from its snapshot</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li>
<li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="redshifttos3transfer">
<span id="id23"></span><h4>RedshiftToS3Transfer<a class="headerlink" href="#redshifttos3transfer" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer">
<em class="property">class </em><code class="descclassname">airflow.operators.redshift_to_s3_operator.</code><code class="descname">RedshiftToS3Transfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/redshift_to_s3_operator.html#RedshiftToS3Transfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Executes an UNLOAD command to s3 as a CSV with headers</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li>
<li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li>
<li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li>
<li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li>
<li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li>
<li><strong>unload_options</strong> (<em>list</em>) – reference to a list of UNLOAD options</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- False: do not validate SSL certificates. SSL will still be used</p>
<blockquote>
<div><p>(unless use_ssl is False), but SSL certificates will not be
verified.</p>
</div></blockquote>
<ul class="last simple">
<li><dl class="first docutils">
<dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt>
<dd>You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.</dd>
</dl>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="s3toredshifttransfer">
<span id="id24"></span><h4>S3ToRedshiftTransfer<a class="headerlink" href="#s3toredshifttransfer" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer">
<em class="property">class </em><code class="descclassname">airflow.operators.s3_to_redshift_operator.</code><code class="descname">S3ToRedshiftTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_redshift_operator.html#S3ToRedshiftTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Executes an COPY command to load files from s3 to Redshift</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li>
<li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li>
<li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li>
<li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li>
<li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li>
<li><strong>copy_options</strong> (<em>list</em>) – reference to a list of COPY options</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- False: do not validate SSL certificates. SSL will still be used</p>
<blockquote>
<div><p>(unless use_ssl is False), but SSL certificates will not be
verified.</p>
</div></blockquote>
<ul class="last simple">
<li><dl class="first docutils">
<dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt>
<dd>You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.</dd>
</dl>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="amazon-sagemaker">
<h3>Amazon SageMaker<a class="headerlink" href="#amazon-sagemaker" title="Permalink to this headline"></a></h3>
<p>For more instructions on using Amazon SageMaker in Airflow, please see <a class="reference external" href="https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/workflow/README.rst">the SageMaker Python SDK README</a>.</p>
<ul class="simple">
<li><span class="xref std std-ref">SageMakerHook</span> : Interact with Amazon SageMaker.</li>
<li><span class="xref std std-ref">SageMakerTrainingOperator</span> : Create a SageMaker training job.</li>
<li><span class="xref std std-ref">SageMakerTuningOperator</span> : Create a SageMaker tuning job.</li>
<li><span class="xref std std-ref">SageMakerModelOperator</span> : Create a SageMaker model.</li>
<li><span class="xref std std-ref">SageMakerTransformOperator</span> : Create a SageMaker transform job.</li>
<li><span class="xref std std-ref">SageMakerEndpointConfigOperator</span> : Create a SageMaker endpoint config.</li>
<li><span class="xref std std-ref">SageMakerEndpointOperator</span> : Create a SageMaker endpoint.</li>
</ul>
<div class="section" id="sagemakerhook">
<span id="id25"></span><h4>SageMakerHook<a class="headerlink" href="#sagemakerhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.sagemaker_hook.</code><code class="descname">SageMakerHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with Amazon SageMaker.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_s3_url">
<code class="descname">check_s3_url</code><span class="sig-paren">(</span><em>s3url</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_s3_url"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_s3_url" title="Permalink to this definition"></a></dt>
<dd><p>Check if an S3 URL exists</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>s3url</strong> (<em>str</em>) – S3 url</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">bool</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_status">
<code class="descname">check_status</code><span class="sig-paren">(</span><em>job_name</em>, <em>key</em>, <em>describe_function</em>, <em>check_interval</em>, <em>max_ingestion_time</em>, <em>non_terminal_states=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_status" title="Permalink to this definition"></a></dt>
<dd><p>Check status of a SageMaker job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>job_name</strong> (<em>str</em>) – name of the job to check status</li>
<li><strong>key</strong> (<em>str</em>) – the key of the response dict
that points to the state</li>
<li><strong>describe_function</strong> (<em>python callable</em>) – the function used to retrieve the status</li>
<li><strong>args</strong> – the arguments for the function</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
<li><strong>non_terminal_states</strong> (<em>set</em>) – the set of nonterminal states</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">response of describe call after job is done</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_config">
<code class="descname">check_training_config</code><span class="sig-paren">(</span><em>training_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_config" title="Permalink to this definition"></a></dt>
<dd><p>Check if a training configuration is valid</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>training_config</strong> (<em>dict</em>) – training_config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_status_with_log">
<code class="descname">check_training_status_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>non_terminal_states</em>, <em>failed_states</em>, <em>wait_for_completion</em>, <em>check_interval</em>, <em>max_ingestion_time</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_status_with_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_status_with_log" title="Permalink to this definition"></a></dt>
<dd><p>Display the logs for a given training job, optionally tailing them until the
job is complete.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>job_name</strong> (<em>str</em>) – name of the training job to check status and display logs for</li>
<li><strong>non_terminal_states</strong> (<em>set</em>) – the set of non_terminal states</li>
<li><strong>failed_states</strong> (<em>set</em>) – the set of failed states</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether to keep looking for new log entries
until the job completes</li>
<li><strong>check_interval</strong> (<em>int</em>) – The interval in seconds between polling for new log entries and job completion</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_tuning_config">
<code class="descname">check_tuning_config</code><span class="sig-paren">(</span><em>tuning_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_tuning_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_tuning_config" title="Permalink to this definition"></a></dt>
<dd><p>Check if a tuning configuration is valid</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>tuning_config</strong> (<em>dict</em>) – tuning_config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.configure_s3_resources">
<code class="descname">configure_s3_resources</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.configure_s3_resources"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.configure_s3_resources" title="Permalink to this definition"></a></dt>
<dd><p>Extract the S3 operations from the configuration and execute them.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – config of SageMaker operation</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint">
<code class="descname">create_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint" title="Permalink to this definition"></a></dt>
<dd><p>Create an endpoint</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint_config">
<code class="descname">create_endpoint_config</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint_config" title="Permalink to this definition"></a></dt>
<dd><p>Create an endpoint config</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for endpoint-config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to endpoint config creation</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_model">
<code class="descname">create_model</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_model" title="Permalink to this definition"></a></dt>
<dd><p>Create a model job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for model</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to model creation</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_training_job">
<code class="descname">create_training_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>print_log=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_training_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_training_job" title="Permalink to this definition"></a></dt>
<dd><p>Create a training job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for training</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to training job creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_transform_job">
<code class="descname">create_transform_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_transform_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_transform_job" title="Permalink to this definition"></a></dt>
<dd><p>Create a transform job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for transform job</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to transform job creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_tuning_job">
<code class="descname">create_tuning_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_tuning_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_tuning_job" title="Permalink to this definition"></a></dt>
<dd><p>Create a tuning job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for tuning</li>
<li><strong>wait_for_completion</strong> – if the program should keep running until job finishes</li>
<li><strong>wait_for_completion</strong> – bool</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to tuning job creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint">
<code class="descname">describe_endpoint</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint" title="Permalink to this definition"></a></dt>
<dd><table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint_config">
<code class="descname">describe_endpoint_config</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint_config" title="Permalink to this definition"></a></dt>
<dd><p>Return the endpoint config info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint config info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_model">
<code class="descname">describe_model</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_model" title="Permalink to this definition"></a></dt>
<dd><p>Return the SageMaker model info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the SageMaker model</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the model info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job">
<code class="descname">describe_training_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job" title="Permalink to this definition"></a></dt>
<dd><p>Return the training job info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – the name of the training job</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the training job info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job_with_log">
<code class="descname">describe_training_job_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>positions</em>, <em>stream_names</em>, <em>instance_count</em>, <em>state</em>, <em>last_description</em>, <em>last_describe_job_call</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job_with_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job_with_log" title="Permalink to this definition"></a></dt>
<dd><p>Return the training job info associated with job_name and print CloudWatch logs</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_transform_job">
<code class="descname">describe_transform_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_transform_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_transform_job" title="Permalink to this definition"></a></dt>
<dd><p>Return the transform job info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the transform job</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the transform job info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_tuning_job">
<code class="descname">describe_tuning_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_tuning_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_tuning_job" title="Permalink to this definition"></a></dt>
<dd><p>Return the tuning job info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the tuning job</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the tuning job info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Establish an AWS connection for SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-class docutils literal notranslate"><span class="pre">SageMaker.Client</span></code></a></td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_log_conn">
<code class="descname">get_log_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_log_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_log_conn" title="Permalink to this definition"></a></dt>
<dd><p>Establish an AWS connection for retrieving logs during training</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><code class="xref py py-class docutils literal notranslate"><span class="pre">CloudWatchLog.Client</span></code></td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.log_stream">
<code class="descname">log_stream</code><span class="sig-paren">(</span><em>log_group</em>, <em>stream_name</em>, <em>start_time=0</em>, <em>skip=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.log_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.log_stream" title="Permalink to this definition"></a></dt>
<dd><p>A generator for log items in a single stream. This will yield all the
items that are available at the current moment.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li>
<li><strong>stream_name</strong> (<em>str</em>) – The name of the specific stream.</li>
<li><strong>start_time</strong> (<em>int</em>) – The time stamp value to start reading the logs from (default: 0).</li>
<li><strong>skip</strong> (<em>int</em>) – The number of log entries to skip at the start (default: 0).
This is for when there are multiple entries at the same timestamp.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">dict</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><div class="line-block">
<div class="line">A CloudWatch log event with the following key-value pairs:</div>
<div class="line-block">
<div class="line">’timestamp’ (int): The time in milliseconds of the event.</div>
<div class="line">’message’ (str): The log event data.</div>
<div class="line">’ingestionTime’ (int): The time in milliseconds the event was ingested.</div>
</div>
</div>
</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.multi_stream_iter">
<code class="descname">multi_stream_iter</code><span class="sig-paren">(</span><em>log_group</em>, <em>streams</em>, <em>positions=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.multi_stream_iter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.multi_stream_iter" title="Permalink to this definition"></a></dt>
<dd><p>Iterate over the available events coming from a set of log streams in a single log group
interleaving the events from each stream so they’re yielded in timestamp order.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li>
<li><strong>streams</strong> (<em>list</em>) – A list of the log stream names. The position of the stream in this list is
the stream number.</li>
<li><strong>positions</strong> (<em>list</em>) – A list of pairs of (timestamp, skip) which represents the last record
read from each stream.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A tuple of (stream number, cloudwatch log event).</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.tar_and_s3_upload">
<code class="descname">tar_and_s3_upload</code><span class="sig-paren">(</span><em>path</em>, <em>key</em>, <em>bucket</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.tar_and_s3_upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.tar_and_s3_upload" title="Permalink to this definition"></a></dt>
<dd><p>Tar the local file or directory and upload to s3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path</strong> (<em>str</em>) – local file or directory</li>
<li><strong>key</strong> (<em>str</em>) – s3 key</li>
<li><strong>bucket</strong> (<em>str</em>) – s3 bucket</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.update_endpoint">
<code class="descname">update_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.update_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.update_endpoint" title="Permalink to this definition"></a></dt>
<dd><p>Update an endpoint</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint update</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="sagemakertrainingoperator">
<span id="id26"></span><h4>SageMakerTrainingOperator<a class="headerlink" href="#sagemakertrainingoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.sagemaker_training_operator.SageMakerTrainingOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_training_operator.</code><code class="descname">SageMakerTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_training_operator.html#SageMakerTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_training_operator.SageMakerTrainingOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Initiate a SageMaker training job.</p>
<p>This operator returns The ARN of the training job created in Amazon SageMaker.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a training job (templated).</p>
<p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_training_job()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – If wait is set to True, the time interval, in seconds,
that the operation waits to check the status of the training job.</li>
<li><strong>print_log</strong> (<em>bool</em>) – if the operator should print the cloudwatch log during training</li>
<li><strong>check_interval</strong> (<em>int</em>) – if wait is set to be true, this is the time interval
in seconds which the operator will check the status of the training job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails if the training job
doesn’t finish within max_ingestion_time seconds. If you set this parameter to None,
the operation does not timeout.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="sagemakertuningoperator">
<span id="id27"></span><h4>SageMakerTuningOperator<a class="headerlink" href="#sagemakertuningoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.sagemaker_tuning_operator.SageMakerTuningOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_tuning_operator.</code><code class="descname">SageMakerTuningOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_tuning_operator.html#SageMakerTuningOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_tuning_operator.SageMakerTuningOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Initiate a SageMaker hyperparameter tuning job.</p>
<p>This operator returns The ARN of the tuning job created in Amazon SageMaker.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a tuning job (templated).</p>
<p>For details of the configuration parameter see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_hyper_parameter_tuning_job()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the tuning job finishes.</li>
<li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds,
that this operation waits to check the status of the tuning job.</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails
if the tuning job doesn’t finish within max_ingestion_time seconds. If you
set this parameter to None, the operation does not timeout.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="sagemakermodeloperator">
<span id="id28"></span><h4>SageMakerModelOperator<a class="headerlink" href="#sagemakermodeloperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.sagemaker_model_operator.SageMakerModelOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_model_operator.</code><code class="descname">SageMakerModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_model_operator.html#SageMakerModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_model_operator.SageMakerModelOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Create a SageMaker model.</p>
<p>This operator returns The ARN of the model created in Amazon SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create a model.</p>
<p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="sagemakertransformoperator">
<span id="id29"></span><h4>SageMakerTransformOperator<a class="headerlink" href="#sagemakertransformoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.sagemaker_transform_operator.SageMakerTransformOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_transform_operator.</code><code class="descname">SageMakerTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_transform_operator.html#SageMakerTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_transform_operator.SageMakerTransformOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Initiate a SageMaker transform job.</p>
<p>This operator returns The ARN of the model created in Amazon SageMaker.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a transform job (templated).</p>
<p>If you need to create a SageMaker transform job based on an existed SageMaker model:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">transform_config</span>
</pre></div>
</div>
<p>If you need to create both SageMaker model and SageMaker Transform job:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;Model&#39;</span><span class="p">:</span> <span class="n">model_config</span><span class="p">,</span>
<span class="s1">&#39;Transform&#39;</span><span class="p">:</span> <span class="n">transform_config</span>
<span class="p">}</span>
</pre></div>
</div>
<p>For details of the configuration parameter of transform_config see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_transform_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_transform_job()</span></code></a></p>
<p>For details of the configuration parameter of model_config, See:
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the transform job finishes.</li>
<li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds,
that this operation waits to check the status of the transform job.</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails
if the transform job doesn’t finish within max_ingestion_time seconds. If you
set this parameter to None, the operation does not timeout.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="sagemakerendpointconfigoperator">
<span id="id30"></span><h4>SageMakerEndpointConfigOperator<a class="headerlink" href="#sagemakerendpointconfigoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.sagemaker_endpoint_config_operator.SageMakerEndpointConfigOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_config_operator.</code><code class="descname">SageMakerEndpointConfigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_config_operator.html#SageMakerEndpointConfigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_endpoint_config_operator.SageMakerEndpointConfigOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Create a SageMaker endpoint config.</p>
<p>This operator returns The ARN of the endpoint config created in Amazon SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint config.</p>
<p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="sagemakerendpointoperator">
<span id="id31"></span><h4>SageMakerEndpointOperator<a class="headerlink" href="#sagemakerendpointoperator" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.operators.sagemaker_endpoint_operator.SageMakerEndpointOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_operator.</code><code class="descname">SageMakerEndpointOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_operator.html#SageMakerEndpointOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_endpoint_operator.SageMakerEndpointOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Create a SageMaker endpoint.</p>
<p>This operator returns The ARN of the endpoint created in Amazon SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint.</p>
<p>If you need to create a SageMaker endpoint based on an existed
SageMaker model and an existed SageMaker endpoint config:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">endpoint_configuration</span><span class="p">;</span>
</pre></div>
</div>
<p>If you need to create all of SageMaker model, SageMaker endpoint-config and SageMaker endpoint:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;Model&#39;</span><span class="p">:</span> <span class="n">model_configuration</span><span class="p">,</span>
<span class="s1">&#39;EndpointConfig&#39;</span><span class="p">:</span> <span class="n">endpoint_config_configuration</span><span class="p">,</span>
<span class="s1">&#39;Endpoint&#39;</span><span class="p">:</span> <span class="n">endpoint_configuration</span>
<span class="p">}</span>
</pre></div>
</div>
<p>For details of the configuration parameter of model_configuration see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p>
<p>For details of the configuration parameter of endpoint_config_configuration see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p>
<p>For details of the configuration parameter of endpoint_configuration see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether the operator should wait until the endpoint creation finishes.</li>
<li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, this is the time interval, in seconds, that this operation
waits before polling the status of the endpoint creation.</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, this operation fails if the endpoint creation doesn’t
finish within max_ingestion_time seconds. If you set this parameter to None it never times out.</li>
<li><strong>operation</strong> (<em>str</em>) – Whether to create an endpoint or update an endpoint. Must be either ‘create or ‘update’.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="id32">
<h3>Amazon SageMaker<a class="headerlink" href="#id32" title="Permalink to this headline"></a></h3>
<p>For more instructions on using Amazon SageMaker in Airflow, please see <a class="reference external" href="https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/workflow/README.rst">the SageMaker Python SDK README</a>.</p>
<ul class="simple">
<li><span class="xref std std-ref">SageMakerHook</span> : Interact with Amazon SageMaker.</li>
<li><span class="xref std std-ref">SageMakerTrainingOperator</span> : Create a SageMaker training job.</li>
<li><span class="xref std std-ref">SageMakerTuningOperator</span> : Create a SageMaker tuning job.</li>
<li><span class="xref std std-ref">SageMakerModelOperator</span> : Create a SageMaker model.</li>
<li><span class="xref std std-ref">SageMakerTransformOperator</span> : Create a SageMaker transform job.</li>
<li><span class="xref std std-ref">SageMakerEndpointConfigOperator</span> : Create a SageMaker endpoint config.</li>
<li><span class="xref std std-ref">SageMakerEndpointOperator</span> : Create a SageMaker endpoint.</li>
</ul>
<div class="section" id="id34">
<span id="id35"></span><h4>SageMakerHook<a class="headerlink" href="#id34" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt>
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.sagemaker_hook.</code><code class="descname">SageMakerHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p>
<p>Interact with Amazon SageMaker.</p>
<dl class="method">
<dt>
<code class="descname">check_s3_url</code><span class="sig-paren">(</span><em>s3url</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_s3_url"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Check if an S3 URL exists</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>s3url</strong> (<em>str</em>) – S3 url</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">bool</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">check_status</code><span class="sig-paren">(</span><em>job_name</em>, <em>key</em>, <em>describe_function</em>, <em>check_interval</em>, <em>max_ingestion_time</em>, <em>non_terminal_states=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_status"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Check status of a SageMaker job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>job_name</strong> (<em>str</em>) – name of the job to check status</li>
<li><strong>key</strong> (<em>str</em>) – the key of the response dict
that points to the state</li>
<li><strong>describe_function</strong> (<em>python callable</em>) – the function used to retrieve the status</li>
<li><strong>args</strong> – the arguments for the function</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
<li><strong>non_terminal_states</strong> (<em>set</em>) – the set of nonterminal states</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">response of describe call after job is done</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">check_training_config</code><span class="sig-paren">(</span><em>training_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_config"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Check if a training configuration is valid</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>training_config</strong> (<em>dict</em>) – training_config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">check_training_status_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>non_terminal_states</em>, <em>failed_states</em>, <em>wait_for_completion</em>, <em>check_interval</em>, <em>max_ingestion_time</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_status_with_log"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Display the logs for a given training job, optionally tailing them until the
job is complete.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>job_name</strong> (<em>str</em>) – name of the training job to check status and display logs for</li>
<li><strong>non_terminal_states</strong> (<em>set</em>) – the set of non_terminal states</li>
<li><strong>failed_states</strong> (<em>set</em>) – the set of failed states</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether to keep looking for new log entries
until the job completes</li>
<li><strong>check_interval</strong> (<em>int</em>) – The interval in seconds between polling for new log entries and job completion</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">check_tuning_config</code><span class="sig-paren">(</span><em>tuning_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_tuning_config"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Check if a tuning configuration is valid</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>tuning_config</strong> (<em>dict</em>) – tuning_config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">configure_s3_resources</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.configure_s3_resources"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Extract the S3 operations from the configuration and execute them.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – config of SageMaker operation</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">create_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Create an endpoint</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">create_endpoint_config</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint_config"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Create an endpoint config</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for endpoint-config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to endpoint config creation</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">create_model</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_model"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Create a model job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for model</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to model creation</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">create_training_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>print_log=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_training_job"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Create a training job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for training</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to training job creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">create_transform_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_transform_job"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Create a transform job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for transform job</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to transform job creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">create_tuning_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_tuning_job"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Create a tuning job</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for tuning</li>
<li><strong>wait_for_completion</strong> – if the program should keep running until job finishes</li>
<li><strong>wait_for_completion</strong> – bool</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to tuning job creation</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">describe_endpoint</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint"><span class="viewcode-link">[source]</span></a></dt>
<dd><table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">describe_endpoint_config</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint_config"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Return the endpoint config info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint config</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint config info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">describe_model</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_model"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Return the SageMaker model info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the SageMaker model</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the model info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">describe_training_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Return the training job info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – the name of the training job</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the training job info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">describe_training_job_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>positions</em>, <em>stream_names</em>, <em>instance_count</em>, <em>state</em>, <em>last_description</em>, <em>last_describe_job_call</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job_with_log"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Return the training job info associated with job_name and print CloudWatch logs</p>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">describe_transform_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_transform_job"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Return the transform job info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the transform job</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the transform job info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">describe_tuning_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_tuning_job"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Return the tuning job info associated with the name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the tuning job</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the tuning job info</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_conn"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Establish an AWS connection for SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-class docutils literal notranslate"><span class="pre">SageMaker.Client</span></code></a></td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">get_log_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_log_conn"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Establish an AWS connection for retrieving logs during training</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><code class="xref py py-class docutils literal notranslate"><span class="pre">CloudWatchLog.Client</span></code></td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">log_stream</code><span class="sig-paren">(</span><em>log_group</em>, <em>stream_name</em>, <em>start_time=0</em>, <em>skip=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.log_stream"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>A generator for log items in a single stream. This will yield all the
items that are available at the current moment.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li>
<li><strong>stream_name</strong> (<em>str</em>) – The name of the specific stream.</li>
<li><strong>start_time</strong> (<em>int</em>) – The time stamp value to start reading the logs from (default: 0).</li>
<li><strong>skip</strong> (<em>int</em>) – The number of log entries to skip at the start (default: 0).
This is for when there are multiple entries at the same timestamp.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">dict</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><div class="line-block">
<div class="line">A CloudWatch log event with the following key-value pairs:</div>
<div class="line-block">
<div class="line">’timestamp’ (int): The time in milliseconds of the event.</div>
<div class="line">’message’ (str): The log event data.</div>
<div class="line">’ingestionTime’ (int): The time in milliseconds the event was ingested.</div>
</div>
</div>
</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">multi_stream_iter</code><span class="sig-paren">(</span><em>log_group</em>, <em>streams</em>, <em>positions=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.multi_stream_iter"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Iterate over the available events coming from a set of log streams in a single log group
interleaving the events from each stream so they’re yielded in timestamp order.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li>
<li><strong>streams</strong> (<em>list</em>) – A list of the log stream names. The position of the stream in this list is
the stream number.</li>
<li><strong>positions</strong> (<em>list</em>) – A list of pairs of (timestamp, skip) which represents the last record
read from each stream.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A tuple of (stream number, cloudwatch log event).</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">tar_and_s3_upload</code><span class="sig-paren">(</span><em>path</em>, <em>key</em>, <em>bucket</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.tar_and_s3_upload"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Tar the local file or directory and upload to s3</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path</strong> (<em>str</em>) – local file or directory</li>
<li><strong>key</strong> (<em>str</em>) – s3 key</li>
<li><strong>bucket</strong> (<em>str</em>) – s3 bucket</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt>
<code class="descname">update_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.update_endpoint"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Update an endpoint</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li>
<li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator
will check the status of any SageMaker job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any
SageMaker jobs that run longer than this will fail. Setting this to
None implies no timeout for any SageMaker job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint update</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="id36">
<span id="id37"></span><h4>SageMakerTrainingOperator<a class="headerlink" href="#id36" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt>
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_training_operator.</code><code class="descname">SageMakerTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_training_operator.html#SageMakerTrainingOperator"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Initiate a SageMaker training job.</p>
<p>This operator returns The ARN of the training job created in Amazon SageMaker.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a training job (templated).</p>
<p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_training_job()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – If wait is set to True, the time interval, in seconds,
that the operation waits to check the status of the training job.</li>
<li><strong>print_log</strong> (<em>bool</em>) – if the operator should print the cloudwatch log during training</li>
<li><strong>check_interval</strong> (<em>int</em>) – if wait is set to be true, this is the time interval
in seconds which the operator will check the status of the training job</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails if the training job
doesn’t finish within max_ingestion_time seconds. If you set this parameter to None,
the operation does not timeout.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="id38">
<span id="id39"></span><h4>SageMakerTuningOperator<a class="headerlink" href="#id38" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt>
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_tuning_operator.</code><code class="descname">SageMakerTuningOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_tuning_operator.html#SageMakerTuningOperator"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Initiate a SageMaker hyperparameter tuning job.</p>
<p>This operator returns The ARN of the tuning job created in Amazon SageMaker.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a tuning job (templated).</p>
<p>For details of the configuration parameter see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_hyper_parameter_tuning_job()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the tuning job finishes.</li>
<li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds,
that this operation waits to check the status of the tuning job.</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails
if the tuning job doesn’t finish within max_ingestion_time seconds. If you
set this parameter to None, the operation does not timeout.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="id40">
<span id="id41"></span><h4>SageMakerModelOperator<a class="headerlink" href="#id40" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt>
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_model_operator.</code><code class="descname">SageMakerModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_model_operator.html#SageMakerModelOperator"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Create a SageMaker model.</p>
<p>This operator returns The ARN of the model created in Amazon SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create a model.</p>
<p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="id42">
<span id="id43"></span><h4>SageMakerTransformOperator<a class="headerlink" href="#id42" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt>
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_transform_operator.</code><code class="descname">SageMakerTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_transform_operator.html#SageMakerTransformOperator"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Initiate a SageMaker transform job.</p>
<p>This operator returns The ARN of the model created in Amazon SageMaker.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a transform job (templated).</p>
<p>If you need to create a SageMaker transform job based on an existed SageMaker model:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">transform_config</span>
</pre></div>
</div>
<p>If you need to create both SageMaker model and SageMaker Transform job:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;Model&#39;</span><span class="p">:</span> <span class="n">model_config</span><span class="p">,</span>
<span class="s1">&#39;Transform&#39;</span><span class="p">:</span> <span class="n">transform_config</span>
<span class="p">}</span>
</pre></div>
</div>
<p>For details of the configuration parameter of transform_config see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_transform_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_transform_job()</span></code></a></p>
<p>For details of the configuration parameter of model_config, See:
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>string</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the transform job finishes.</li>
<li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds,
that this operation waits to check the status of the transform job.</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails
if the transform job doesn’t finish within max_ingestion_time seconds. If you
set this parameter to None, the operation does not timeout.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="id44">
<span id="id45"></span><h4>SageMakerEndpointConfigOperator<a class="headerlink" href="#id44" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt>
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_config_operator.</code><code class="descname">SageMakerEndpointConfigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_config_operator.html#SageMakerEndpointConfigOperator"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Create a SageMaker endpoint config.</p>
<p>This operator returns The ARN of the endpoint config created in Amazon SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint config.</p>
<p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="id46">
<span id="id47"></span><h4>SageMakerEndpointOperator<a class="headerlink" href="#id46" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt>
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_operator.</code><code class="descname">SageMakerEndpointOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_operator.html#SageMakerEndpointOperator"><span class="viewcode-link">[source]</span></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p>
<p>Create a SageMaker endpoint.</p>
<p>This operator returns The ARN of the endpoint created in Amazon SageMaker</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint.</p>
<p>If you need to create a SageMaker endpoint based on an existed
SageMaker model and an existed SageMaker endpoint config:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">endpoint_configuration</span><span class="p">;</span>
</pre></div>
</div>
<p>If you need to create all of SageMaker model, SageMaker endpoint-config and SageMaker endpoint:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;Model&#39;</span><span class="p">:</span> <span class="n">model_configuration</span><span class="p">,</span>
<span class="s1">&#39;EndpointConfig&#39;</span><span class="p">:</span> <span class="n">endpoint_config_configuration</span><span class="p">,</span>
<span class="s1">&#39;Endpoint&#39;</span><span class="p">:</span> <span class="n">endpoint_configuration</span>
<span class="p">}</span>
</pre></div>
</div>
<p>For details of the configuration parameter of model_configuration see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p>
<p>For details of the configuration parameter of endpoint_config_configuration see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p>
<p>For details of the configuration parameter of endpoint_configuration see
<a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint()</span></code></a></p>
</li>
<li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li>
<li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether the operator should wait until the endpoint creation finishes.</li>
<li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, this is the time interval, in seconds, that this operation
waits before polling the status of the endpoint creation.</li>
<li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, this operation fails if the endpoint creation doesn’t
finish within max_ingestion_time seconds. If you set this parameter to None it never times out.</li>
<li><strong>operation</strong> (<em>str</em>) – Whether to create an endpoint or update an endpoint. Must be either ‘create or ‘update’.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="databricks">
<span id="id48"></span><h2>Databricks<a class="headerlink" href="#databricks" title="Permalink to this headline"></a></h2>
<p><a class="reference external" href="https://databricks.com/">Databricks</a> has contributed an Airflow operator which enables
submitting runs to the Databricks platform. Internally the operator talks to the
<code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">endpoint</a>.</p>
<div class="section" id="databrickssubmitrunoperator">
<h3>DatabricksSubmitRunOperator<a class="headerlink" href="#databrickssubmitrunoperator" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.databricks_operator.</code><code class="descname">DatabricksSubmitRunOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Submits a Spark job run to Databricks using the
<a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">api/2.0/jobs/runs/submit</a>
API endpoint.</p>
<p>There are two ways to instantiate this operator.</p>
<p>In the first way, you can take the JSON payload that you typically use
to call the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint and pass it directly
to our <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> through the <code class="docutils literal notranslate"><span class="pre">json</span></code> parameter.
For example</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">json</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;new_cluster&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;spark_version&#39;</span><span class="p">:</span> <span class="s1">&#39;2.1.0-db3-scala2.11&#39;</span><span class="p">,</span>
<span class="s1">&#39;num_workers&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="p">},</span>
<span class="s1">&#39;notebook_task&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;notebook_path&#39;</span><span class="p">:</span> <span class="s1">&#39;/Users/airflow@example.com/PrepareData&#39;</span><span class="p">,</span>
<span class="p">},</span>
<span class="p">}</span>
<span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span><span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;notebook_run&#39;</span><span class="p">,</span> <span class="n">json</span><span class="o">=</span><span class="n">json</span><span class="p">)</span>
</pre></div>
</div>
<p>Another way to accomplish the same thing is to use the named parameters
of the <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> directly. Note that there is exactly
one named parameter for each top level parameter in the <code class="docutils literal notranslate"><span class="pre">runs/submit</span></code>
endpoint. In this method, your code would look like this:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">new_cluster</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;spark_version&#39;</span><span class="p">:</span> <span class="s1">&#39;2.1.0-db3-scala2.11&#39;</span><span class="p">,</span>
<span class="s1">&#39;num_workers&#39;</span><span class="p">:</span> <span class="mi">2</span>
<span class="p">}</span>
<span class="n">notebook_task</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;notebook_path&#39;</span><span class="p">:</span> <span class="s1">&#39;/Users/airflow@example.com/PrepareData&#39;</span><span class="p">,</span>
<span class="p">}</span>
<span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;notebook_run&#39;</span><span class="p">,</span>
<span class="n">new_cluster</span><span class="o">=</span><span class="n">new_cluster</span><span class="p">,</span>
<span class="n">notebook_task</span><span class="o">=</span><span class="n">notebook_task</span><span class="p">)</span>
</pre></div>
</div>
<p>In the case where both the json parameter <strong>AND</strong> the named parameters
are provided, they will be merged together. If there are conflicts during the merge,
the named parameters will take precedence and override the top level <code class="docutils literal notranslate"><span class="pre">json</span></code> keys.</p>
<dl class="docutils">
<dt>Currently the named parameters that <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> supports are</dt>
<dd><ul class="first last simple">
<li><code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">notebook_task</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">new_cluster</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">libraries</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">run_name</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">timeout_seconds</span></code></li>
</ul>
</dd>
</dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>json</strong> (<em>dict</em>) – <p>A JSON object containing API parameters which will be passed
directly to the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint. The other named parameters
(i.e. <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code>, <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code>..) to this operator will
be merged with this json dictionary if they are provided.
If there are conflicts during the merge, the named parameters will
take precedence and override the top level json keys. (templated)</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more information about templating see <a class="reference internal" href="concepts.html#jinja-templating"><span class="std std-ref">Jinja Templating</span></a>.
<a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">https://docs.databricks.com/api/latest/jobs.html#runs-submit</a></p>
</div>
</li>
<li><strong>spark_jar_task</strong> (<em>dict</em>) – <p>The main class and parameters for the JAR task. Note that
the actual JAR is specified in the <code class="docutils literal notranslate"><span class="pre">libraries</span></code>.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask">https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask</a></p>
</div>
</li>
<li><strong>notebook_task</strong> (<em>dict</em>) – <p>The notebook path and parameters for the notebook task.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask">https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask</a></p>
</div>
</li>
<li><strong>new_cluster</strong> (<em>dict</em>) – <p>Specs for a new cluster on which this task will be run.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster">https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster</a></p>
</div>
</li>
<li><strong>existing_cluster_id</strong> (<em>string</em>) – ID for existing cluster on which to run this task.
<em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified.
This field will be templated.</li>
<li><strong>libraries</strong> (<em>list of dicts</em>) – <p>Libraries which this run will use.
This field will be templated.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary">https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary</a></p>
</div>
</li>
<li><strong>run_name</strong> (<em>string</em>) – The run name used for this task.
By default this will be set to the Airflow <code class="docutils literal notranslate"><span class="pre">task_id</span></code>. This <code class="docutils literal notranslate"><span class="pre">task_id</span></code> is a
required parameter of the superclass <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code>.
This field will be templated.</li>
<li><strong>timeout_seconds</strong> (<em>int32</em>) – The timeout for this run. By default a value of 0 is used
which means to have no timeout.
This field will be templated.</li>
<li><strong>databricks_conn_id</strong> (<em>string</em>) – The name of the Airflow connection to use.
By default and in the common case this will be <code class="docutils literal notranslate"><span class="pre">databricks_default</span></code>. To use
token based authentication, provide the key <code class="docutils literal notranslate"><span class="pre">token</span></code> in the extra field for the
connection.</li>
<li><strong>polling_period_seconds</strong> (<em>int</em>) – Controls the rate which we poll for the result of
this run. By default the operator will poll every 30 seconds.</li>
<li><strong>databricks_retry_limit</strong> (<em>int</em>) – Amount of times retry if the Databricks backend is
unreachable. Its value must be greater than or equal to 1.</li>
<li><strong>databricks_retry_delay</strong> (<em>float</em>) – Number of seconds to wait between retries (it
might be a floating point number).</li>
<li><strong>do_xcom_push</strong> (<em>boolean</em>) – Whether we should push run_id and run_page_url to xcom.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="gcp-google-cloud-platform">
<span id="gcp"></span><h2>GCP: Google Cloud Platform<a class="headerlink" href="#gcp-google-cloud-platform" title="Permalink to this headline"></a></h2>
<p>Airflow has extensive support for the Google Cloud Platform. But note that most Hooks and
Operators are in the contrib section. Meaning that they have a <em>beta</em> status, meaning that
they can have breaking changes between minor releases.</p>
<p>See the <a class="reference internal" href="howto/manage-connections.html#connection-type-gcp"><span class="std std-ref">GCP connection type</span></a> documentation to
configure connections to GCP.</p>
<div class="section" id="id49">
<h3>Logging<a class="headerlink" href="#id49" title="Permalink to this headline"></a></h3>
<p>Airflow can be configured to read and write task logs in Google Cloud Storage.
See <a class="reference internal" href="howto/write-logs.html#write-logs-gcp"><span class="std std-ref">Writing Logs to Google Cloud Storage</span></a>.</p>
</div>
<div class="section" id="googlecloudbasehook">
<h3>GoogleCloudBaseHook<a class="headerlink" href="#googlecloudbasehook" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_api_base_hook.</code><code class="descname">GoogleCloudBaseHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_api_base_hook.html#GoogleCloudBaseHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p>
<p>A base hook for Google cloud-related hooks. Google cloud has a shared REST
API client that is built in the same way no matter which service you use.
This class helps construct and authorize the credentials needed to then
call googleapiclient.discovery.build() to actually discover and build a client
for a Google cloud service.</p>
<p>The class also contains some miscellaneous helper functions.</p>
<p>All hook derived from this base hook use the ‘Google Cloud Platform’ connection
type. Three ways of authentication are supported:</p>
<p>Default credentials: Only the ‘Project Id’ is required. You’ll need to
have set up default credentials, such as by the
<code class="docutils literal notranslate"><span class="pre">GOOGLE_APPLICATION_DEFAULT</span></code> environment variable or from the metadata
server on Google Compute Engine.</p>
<p>JSON key file: Specify ‘Project Id’, ‘Keyfile Path’ and ‘Scope’.</p>
<p>Legacy P12 key files are not supported.</p>
<p>JSON data provided in the UI: Specify ‘Keyfile JSON’.</p>
<dl class="staticmethod">
<dt id="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.fallback_to_default_project_id">
<em class="property">static </em><code class="descname">fallback_to_default_project_id</code><span class="sig-paren">(</span><em>func</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_api_base_hook.html#GoogleCloudBaseHook.fallback_to_default_project_id"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.fallback_to_default_project_id" title="Permalink to this definition"></a></dt>
<dd><p>Decorator that provides fallback for Google Cloud Platform project id. If
the project is None it will be replaced with the project_id from the
service account the Hook is authenticated with. Project id can be specified
either via project_id kwarg or via first parameter in positional args.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>func</strong> – function to wrap</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">result of the function call</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="bigquery">
<span id="id50"></span><h3>BigQuery<a class="headerlink" href="#bigquery" title="Permalink to this headline"></a></h3>
<div class="section" id="bigquery-operators">
<h4>BigQuery Operators<a class="headerlink" href="#bigquery-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#bigquerycheckoperator"><span class="std std-ref">BigQueryCheckOperator</span></a> : Performs checks against a SQL query that will return a single row with different values.</li>
<li><a class="reference internal" href="#bigqueryvaluecheckoperator"><span class="std std-ref">BigQueryValueCheckOperator</span></a> : Performs a simple value check using SQL code.</li>
<li><a class="reference internal" href="#bigqueryintervalcheckoperator"><span class="std std-ref">BigQueryIntervalCheckOperator</span></a> : Checks that the values of metrics given as SQL expressions are within a certain tolerance of the ones from days_back before.</li>
<li><a class="reference internal" href="#bigquerygetdataoperator"><span class="std std-ref">BigQueryGetDataOperator</span></a> : Fetches the data from a BigQuery table and returns data in a python list</li>
<li><a class="reference internal" href="#bigquerycreateemptydatasetoperator"><span class="std std-ref">BigQueryCreateEmptyDatasetOperator</span></a> : Creates an empty BigQuery dataset.</li>
<li><a class="reference internal" href="#bigquerycreateemptytableoperator"><span class="std std-ref">BigQueryCreateEmptyTableOperator</span></a> : Creates a new, empty table in the specified BigQuery dataset optionally with schema.</li>
<li><a class="reference internal" href="#bigquerycreateexternaltableoperator"><span class="std std-ref">BigQueryCreateExternalTableOperator</span></a> : Creates a new, external table in the dataset with the data in Google Cloud Storage.</li>
<li><a class="reference internal" href="#bigquerydeletedatasetoperator"><span class="std std-ref">BigQueryDeleteDatasetOperator</span></a> : Deletes an existing BigQuery dataset.</li>
<li><a class="reference internal" href="#bigquerytabledeleteoperator"><span class="std std-ref">BigQueryTableDeleteOperator</span></a> : Deletes an existing BigQuery table.</li>
<li><a class="reference internal" href="#bigqueryoperator"><span class="std std-ref">BigQueryOperator</span></a> : Executes BigQuery SQL queries in a specific BigQuery database.</li>
<li><a class="reference internal" href="#bigquerytobigqueryoperator"><span class="std std-ref">BigQueryToBigQueryOperator</span></a> : Copy a BigQuery table to another BigQuery table.</li>
<li><a class="reference internal" href="#bigquerytocloudstorageoperator"><span class="std std-ref">BigQueryToCloudStorageOperator</span></a> : Transfers a BigQuery table to a Google Cloud Storage bucket</li>
</ul>
<div class="section" id="bigquerycheckoperator">
<span id="id51"></span><h5>BigQueryCheckOperator<a class="headerlink" href="#bigquerycheckoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a></p>
<p>Performs checks against BigQuery. The <code class="docutils literal notranslate"><span class="pre">BigQueryCheckOperator</span></code> expects
a sql query that will return a single row. Each value on that
first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the
values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p>
<p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">False</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">0</span></code></li>
<li>Empty string (<code class="docutils literal notranslate"><span class="pre">&quot;&quot;</span></code>)</li>
<li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li>
<li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li>
</ul>
<p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if
the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could,
for instance, check that the table has the same number of rows as
the source table upstream, or that the count of today’s partition is
greater than yesterday’s partition, or that a set of metrics are less
than 3 standard deviation for the 7 day average.</p>
<p>This operator can be used as a data quality check in your pipeline, and
depending on where you put it in your DAG, you have the choice to
stop the critical path, preventing from
publishing dubious data, or on the side and receive email alterts
without stopping the progress of the DAG.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to the BigQuery database</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true)
or standard SQL (false).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigqueryvaluecheckoperator">
<span id="id52"></span><h5>BigQueryValueCheckOperator<a class="headerlink" href="#bigqueryvaluecheckoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a></p>
<p>Performs a simple value check using sql code.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true)
or standard SQL (false).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigqueryintervalcheckoperator">
<span id="id53"></span><h5>BigQueryIntervalCheckOperator<a class="headerlink" href="#bigqueryintervalcheckoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryIntervalCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryIntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.IntervalCheckOperator" title="airflow.operators.check_operator.IntervalCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.IntervalCheckOperator</span></code></a></p>
<p>Checks that the values of metrics given as SQL expressions are within
a certain tolerance of the ones from days_back before.</p>
<p>This method constructs a query like so</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">SELECT</span> <span class="p">{</span><span class="n">metrics_threshold_dict_key</span><span class="p">}</span> <span class="n">FROM</span> <span class="p">{</span><span class="n">table</span><span class="p">}</span>
<span class="n">WHERE</span> <span class="p">{</span><span class="n">date_filter_column</span><span class="p">}</span><span class="o">=&lt;</span><span class="n">date</span><span class="o">&gt;</span>
</pre></div>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>table</strong> (<em>str</em>) – the table name</li>
<li><strong>days_back</strong> (<em>int</em>) – number of days between ds and the ds we want to check
against. Defaults to 7 days</li>
<li><strong>metrics_threshold</strong> (<em>dict</em>) – a dictionary of ratios indexed by metrics, for
example ‘COUNT(*)’: 1.5 would require a 50 percent or less difference
between the current day, and the prior days_back.</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true)
or standard SQL (false).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigquerygetdataoperator">
<span id="id54"></span><h5>BigQueryGetDataOperator<a class="headerlink" href="#bigquerygetdataoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_get_data.</code><code class="descname">BigQueryGetDataOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_get_data.html#BigQueryGetDataOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Fetches the data from a BigQuery table (alternatively fetch data for selected columns)
and returns data in a python list. The number of elements in the returned list will
be equal to the number of rows fetched. Each element in the list will again be a list
where element would represent the columns values for that row.</p>
<p><strong>Example Result</strong>: <code class="docutils literal notranslate"><span class="pre">[['Tony',</span> <span class="pre">'10'],</span> <span class="pre">['Mike',</span> <span class="pre">'20'],</span> <span class="pre">['Steve',</span> <span class="pre">'15']]</span></code></p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">If you pass fields to <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> which are in different order than the
order of columns already in
BQ table, the data will still be in the order of BQ table.
For example if the BQ table has 3 columns as
<code class="docutils literal notranslate"><span class="pre">[A,B,C]</span></code> and you pass ‘B,A’ in the <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code>
the data would still be of the form <code class="docutils literal notranslate"><span class="pre">'A,B'</span></code>.</p>
</div>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">get_data</span> <span class="o">=</span> <span class="n">BigQueryGetDataOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;get_data_from_bq&#39;</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="o">=</span><span class="s1">&#39;test_dataset&#39;</span><span class="p">,</span>
<span class="n">table_id</span><span class="o">=</span><span class="s1">&#39;Transaction_partitions&#39;</span><span class="p">,</span>
<span class="n">max_results</span><span class="o">=</span><span class="s1">&#39;100&#39;</span><span class="p">,</span>
<span class="n">selected_fields</span><span class="o">=</span><span class="s1">&#39;DATE&#39;</span><span class="p">,</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>dataset_id</strong> (<em>string</em>) – The dataset ID of the requested table. (templated)</li>
<li><strong>table_id</strong> (<em>string</em>) – The table ID of the requested table. (templated)</li>
<li><strong>max_results</strong> (<em>string</em>) – The maximum number of records (rows) to be fetched
from the table. (templated)</li>
<li><strong>selected_fields</strong> (<em>string</em>) – List of fields to return (comma-separated). If
unspecified, all fields are returned.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigquerycreateemptytableoperator">
<span id="id55"></span><h5>BigQueryCreateEmptyTableOperator<a class="headerlink" href="#bigquerycreateemptytableoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyTableOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new, empty table in the specified BigQuery dataset,
optionally with schema.</p>
<p>The schema to be used for the BigQuery table may be specified in one of
two ways. You may either directly pass the schema fields in, or you may
point the operator to a Google cloud storage object name. The object in
Google cloud storage must be a JSON file with the schema fields in it.
You can also create a table without schema.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The project to create the table into. (templated)</li>
<li><strong>dataset_id</strong> (<em>string</em>) – The dataset to create the table into. (templated)</li>
<li><strong>table_id</strong> (<em>string</em>) – The Name of the table to be created. (templated)</li>
<li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">}]</span>
</pre></div>
</div>
</li>
<li><strong>gcs_schema_object</strong> (<em>string</em>) – Full path to the JSON file containing
schema (templated). For
example: <code class="docutils literal notranslate"><span class="pre">gs://test-bucket/dir1/dir2/employee_schema.json</span></code></li>
<li><strong>time_partitioning</strong> (<em>dict</em>) – <p>configure optional time partitioning fields i.e.
partition by field, type and expiration as per API specifications.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning</a></p>
</div>
</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google
cloud storage hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to
work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>labels</strong> (<em>dict</em>) – <p>a dictionary containing labels for the table, passed to BigQuery</p>
<p><strong>Example (with schema JSON in GCS)</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;BigQueryCreateEmptyTableOperator_task&#39;</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="o">=</span><span class="s1">&#39;ODS&#39;</span><span class="p">,</span>
<span class="n">table_id</span><span class="o">=</span><span class="s1">&#39;Employees&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="s1">&#39;internal-gcp-project&#39;</span><span class="p">,</span>
<span class="n">gcs_schema_object</span><span class="o">=</span><span class="s1">&#39;gs://schema-bucket/employee_schema.json&#39;</span><span class="p">,</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
<p><strong>Corresponding Schema file</strong> (<code class="docutils literal notranslate"><span class="pre">employee_schema.json</span></code>):</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span>
<span class="p">{</span>
<span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">,</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span>
<span class="p">},</span>
<span class="p">{</span>
<span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">,</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span>
<span class="p">}</span>
<span class="p">]</span>
</pre></div>
</div>
<p><strong>Example (with schema in the DAG)</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;BigQueryCreateEmptyTableOperator_task&#39;</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="o">=</span><span class="s1">&#39;ODS&#39;</span><span class="p">,</span>
<span class="n">table_id</span><span class="o">=</span><span class="s1">&#39;Employees&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="s1">&#39;internal-gcp-project&#39;</span><span class="p">,</span>
<span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">}],</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigquerycreateexternaltableoperator">
<span id="id56"></span><h5>BigQueryCreateExternalTableOperator<a class="headerlink" href="#bigquerycreateexternaltableoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateExternalTableOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateExternalTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new external table in the dataset with the data in Google Cloud
Storage.</p>
<p>The schema to be used for the BigQuery table may be specified in one of
two ways. You may either directly pass the schema fields in, or you may
point the operator to a Google cloud storage object name. The object in
Google cloud storage must be a JSON file with the schema fields in it.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to point the external table to. (templated)</li>
<li><strong>source_objects</strong> (<em>list</em>) – List of Google cloud storage URIs to point
table to. (templated)
If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li>
<li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (&lt;project&gt;.)&lt;dataset&gt;.&lt;table&gt;
BigQuery table to load data into (templated). If &lt;project&gt; is not included,
project will be the project defined in the connection json.</li>
<li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;emp_name&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;STRING&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;REQUIRED&quot;</span><span class="p">},</span>
<span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;salary&quot;</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;INTEGER&quot;</span><span class="p">,</span> <span class="s2">&quot;mode&quot;</span><span class="p">:</span> <span class="s2">&quot;NULLABLE&quot;</span><span class="p">}]</span>
</pre></div>
</div>
<p>Should not be set when source_format is ‘DATASTORE_BACKUP’.</p>
</li>
<li><strong>schema_object</strong> (<em>string</em>) – If set, a GCS object path pointing to a .json file that
contains the schema for the table. (templated)</li>
<li><strong>source_format</strong> (<em>string</em>) – File format of the data.</li>
<li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source.
Possible values include GZIP and NONE.
The default value is NONE.
This setting is ignored for Google Cloud Bigtable,
Google Cloud Datastore backups and Avro formats.</li>
<li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li>
<li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use for the CSV.</li>
<li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can
ignore when running the job.</li>
<li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li>
<li><strong>allow_quoted_newlines</strong> (<em>boolean</em>) – Whether to allow quoted newlines (true) or not (false).</li>
<li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns.
The missing values are treated as nulls. If false, records with missing trailing
columns are treated as bad records, and if there are too many bad records, an
invalid error is returned in the job result. Only applicable to CSV, ignored
for other formats.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google
cloud storage hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to
work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>:param labels a dictionary containing labels for the table, passed to BigQuery
:type labels: dict</p>
</dd></dl>
</div>
<div class="section" id="bigquerycreateemptydatasetoperator">
<span id="id57"></span><h5>BigQueryCreateEmptyDatasetOperator<a class="headerlink" href="#bigquerycreateemptydatasetoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyDatasetOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyDatasetOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyDatasetOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>
This operator is used to create new dataset for your Project in Big query.
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>str</em>) – The name of the project where we want to create the dataset.
Don’t need to provide, if projectId in dataset_reference.</li>
<li><strong>dataset_id</strong> (<em>str</em>) – The id of dataset. Don’t need to provide,
if datasetId in dataset_reference.</li>
<li><strong>dataset_reference</strong> – Dataset reference that could be provided with request body.
More info:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</a></li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigquerydeletedatasetoperator">
<span id="id58"></span><h5>BigQueryDeleteDatasetOperator<a class="headerlink" href="#bigquerydeletedatasetoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryDeleteDatasetOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryDeleteDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>
This operator deletes an existing dataset from your Project in Big query.
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete</a>
:param project_id: The project id of the dataset.
:type project_id: string
:param dataset_id: The dataset to be deleted.
:type dataset_id: string</p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">delete_temp_data</span> <span class="o">=</span> <span class="n">BigQueryDeleteDatasetOperator</span><span class="p">(</span><span class="n">dataset_id</span> <span class="o">=</span> <span class="s1">&#39;temp-dataset&#39;</span><span class="p">,</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="s1">&#39;temp-project&#39;</span><span class="p">,</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;_my_gcp_conn_&#39;</span><span class="p">,</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;Deletetemp&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
</div>
<div class="section" id="bigquerytabledeleteoperator">
<span id="id59"></span><h5>BigQueryTableDeleteOperator<a class="headerlink" href="#bigquerytabledeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_table_delete_operator.</code><code class="descname">BigQueryTableDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_table_delete_operator.html#BigQueryTableDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Deletes BigQuery tables</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>deletion_dataset_table</strong> (<em>string</em>) – A dotted
(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt; that indicates which table
will be deleted. (templated)</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>ignore_if_missing</strong> (<em>boolean</em>) – if True, then return success even if the
requested table does not exist.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigqueryoperator">
<span id="id60"></span><h5>BigQueryOperator<a class="headerlink" href="#bigqueryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Executes BigQuery SQL queries in a specific BigQuery database</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bql</strong> (<em>Can receive a str representing a sql statement</em><em>,
</em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file.
Template reference are recognized by str ending in '.sql'.</em>) – (Deprecated. Use <cite>sql</cite> parameter instead) the sql code to be
executed (templated)</li>
<li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>,
</em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file.
Template reference are recognized by str ending in '.sql'.</em>) – the sql code to be executed (templated)</li>
<li><strong>destination_dataset_table</strong> (<em>string</em>) – A dotted
(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt; that, if set, will store the results
of the query. (templated)</li>
<li><strong>write_disposition</strong> (<em>string</em>) – Specifies the action that occurs if the destination table
already exists. (default: ‘WRITE_EMPTY’)</li>
<li><strong>create_disposition</strong> (<em>string</em>) – Specifies whether the job is allowed to create new tables.
(default: ‘CREATE_IF_NEEDED’)</li>
<li><strong>allow_large_results</strong> (<em>boolean</em>) – Whether to allow large results.</li>
<li><strong>flatten_results</strong> (<em>boolean</em>) – If true and query uses legacy SQL dialect, flattens
all nested and repeated fields in the query results. <code class="docutils literal notranslate"><span class="pre">allow_large_results</span></code>
must be <code class="docutils literal notranslate"><span class="pre">true</span></code> if this is set to <code class="docutils literal notranslate"><span class="pre">false</span></code>. For standard SQL queries, this
flag is ignored and results are never flattened.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>udf_config</strong> (<em>list</em>) – The User Defined Function configuration for the query.
See <a class="reference external" href="https://cloud.google.com/bigquery/user-defined-functions">https://cloud.google.com/bigquery/user-defined-functions</a> for details.</li>
<li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) or standard SQL (false).</li>
<li><strong>maximum_billing_tier</strong> (<em>integer</em>) – Positive integer that serves as a multiplier
of the basic price.
Defaults to None, in which case it uses the value set in the project.</li>
<li><strong>maximum_bytes_billed</strong> (<em>float</em>) – Limits the bytes billed for this job.
Queries that will have bytes billed beyond this limit will fail
(without incurring a charge). If unspecified, this will be
set to your project default.</li>
<li><strong>api_resource_configs</strong> (<em>dict</em>) – a dictionary that contain params
‘configuration’ applied for Google BigQuery Jobs API:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs</a>
for example, {‘query’: {‘useQueryCache’: False}}. You could use it
if you need to provide some params that are not supported by BigQueryOperator
like args.</li>
<li><strong>schema_update_options</strong> (<em>tuple</em>) – Allows the schema of the destination
table to be updated as a side effect of the load job.</li>
<li><strong>query_params</strong> (<em>dict</em>) – a dictionary containing query parameter types and
values, passed to BigQuery.</li>
<li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query,
passed to BigQuery</li>
<li><strong>priority</strong> (<em>string</em>) – Specifies a priority for the query.
Possible values include INTERACTIVE and BATCH.
The default value is INTERACTIVE.</li>
<li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e.
partition by field, type and expiration as per API specifications.</li>
<li><strong>cluster_fields</strong> (<em>list of str</em>) – Request that the result of this query be stored sorted
by one or more columns. This is only available in conjunction with
time_partitioning. The order of columns given determines the sort order.</li>
<li><strong>location</strong> (<em>str</em>) – The geographic location of the job. Required except for
US and EU. See details at
<a class="reference external" href="https://cloud.google.com/bigquery/docs/locations#specifying_your_location">https://cloud.google.com/bigquery/docs/locations#specifying_your_location</a></li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigquerytobigqueryoperator">
<span id="id61"></span><h5>BigQueryToBigQueryOperator<a class="headerlink" href="#bigquerytobigqueryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_bigquery.</code><code class="descname">BigQueryToBigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_bigquery.html#BigQueryToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copies data from one BigQuery table to another.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more details about these parameters:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_project_dataset_tables</strong> (<em>list|string</em>) – One or more
dotted (project:<a href="#id62"><span class="problematic" id="id63">|</span></a>project.)&lt;dataset&gt;.&lt;table&gt; BigQuery tables to use as the
source data. If &lt;project&gt; is not included, project will be the
project defined in the connection json. Use a list if there are multiple
source tables. (templated)</li>
<li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The destination BigQuery
table. Format is: (project:<a href="#id64"><span class="problematic" id="id65">|</span></a>project.)&lt;dataset&gt;.&lt;table&gt; (templated)</li>
<li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li>
<li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query,
passed to BigQuery</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="bigquerytocloudstorageoperator">
<span id="id66"></span><h5>BigQueryToCloudStorageOperator<a class="headerlink" href="#bigquerytocloudstorageoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_gcs.</code><code class="descname">BigQueryToCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_gcs.html#BigQueryToCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Transfers a BigQuery table to a Google Cloud Storage bucket.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more details about these parameters:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">https://cloud.google.com/bigquery/docs/reference/v2/jobs</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_project_dataset_table</strong> (<em>string</em>) – The dotted
<code class="docutils literal notranslate"><span class="pre">(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt;</span></code> BigQuery table to use as the
source data. If &lt;project&gt; is not included, project will be the project
defined in the connection json. (templated)</li>
<li><strong>destination_cloud_storage_uris</strong> (<em>list</em>) – The destination Google Cloud
Storage URI (e.g. gs://some-bucket/some-file.txt). (templated) Follows
convention defined here:
https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple</li>
<li><strong>compression</strong> (<em>string</em>) – Type of compression to use.</li>
<li><strong>export_format</strong> (<em>string</em>) – File format to export.</li>
<li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when extracting to a CSV.</li>
<li><strong>print_header</strong> (<em>boolean</em>) – Whether to print a header for a CSV file extract.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query,
passed to BigQuery</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="bigqueryhook">
<h4>BigQueryHook<a class="headerlink" href="#bigqueryhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.bigquery_hook.</code><code class="descname">BigQueryHook</code><span class="sig-paren">(</span><em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>use_legacy_sql=True</em>, <em>location=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a>, <a class="reference internal" href="code.html#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p>
<p>Interact with BigQuery. This hook uses the Google Cloud Platform
connection.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a BigQuery PEP 249 connection object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df">
<code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em>, <em>dialect=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Pandas DataFrame for the results produced by a BigQuery
query. The DbApiHook method must be overridden because Pandas
doesn’t support PEP 249 connections, except for SQLite. See:</p>
<p><a class="reference external" href="https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447">https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447</a>
<a class="reference external" href="https://github.com/pydata/pandas/issues/6900">https://github.com/pydata/pandas/issues/6900</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> (<em>string</em>) – The BigQuery SQL to execute.</li>
<li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with (not
used, leave to override superclass method)</li>
<li><strong>dialect</strong> (<em>string in {'legacy'</em><em>, </em><em>'standard'}</em>) – Dialect of BigQuery SQL – legacy SQL or standard SQL
defaults to use <cite>self.use_legacy_sql</cite> if not specified</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service">
<code class="descname">get_service</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_service"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service" title="Permalink to this definition"></a></dt>
<dd><p>Returns a BigQuery service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows">
<code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows" title="Permalink to this definition"></a></dt>
<dd><p>Insertion is currently unsupported. Theoretically, you could use
BigQuery’s streaming API to insert rows into a table, but this hasn’t
been implemented.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists">
<code class="descname">table_exists</code><span class="sig-paren">(</span><em>project_id</em>, <em>dataset_id</em>, <em>table_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists" title="Permalink to this definition"></a></dt>
<dd><p>Checks for the existence of a table in Google BigQuery.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google cloud project in which to look for the
table. The connection supplied to the hook must provide access to
the specified project.</li>
<li><strong>dataset_id</strong> (<em>string</em>) – The name of the dataset in which to look for the
table.</li>
<li><strong>table_id</strong> (<em>string</em>) – The name of the table to check the existence of.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-spanner">
<h3>Cloud Spanner<a class="headerlink" href="#cloud-spanner" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-spanner-operators">
<h4>Cloud Spanner Operators<a class="headerlink" href="#cloud-spanner-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#cloudspannerinstancedatabasedeleteoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseDeleteOperator</span></a> : deletes an existing database from
a Google Cloud Spanner instance or returns success if the database is missing.</li>
<li><a class="reference internal" href="#cloudspannerinstancedatabasedeployoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseDeployOperator</span></a> : creates a new database in a Google
Cloud instance or returns success if the database already exists.</li>
<li><a class="reference internal" href="#cloudspannerinstancedatabaseupdateoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseUpdateOperator</span></a> : updates the structure of a
Google Cloud Spanner database.</li>
<li><a class="reference internal" href="#cloudspannerinstancedatabasequeryoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseQueryOperator</span></a> : executes an arbitrary DML query
(INSERT, UPDATE, DELETE).</li>
<li><a class="reference internal" href="#cloudspannerinstancedeployoperator"><span class="std std-ref">CloudSpannerInstanceDeployOperator</span></a> : creates a new Google Cloud Spanner instance,
or if an instance with the same name exists, updates the instance.</li>
<li><a class="reference internal" href="#cloudspannerinstancedeleteoperator"><span class="std std-ref">CloudSpannerInstanceDeleteOperator</span></a> : deletes a Google Cloud Spanner instance.</li>
</ul>
<div class="section" id="cloudspannerinstancedatabasedeleteoperator">
<span id="id67"></span><h5>CloudSpannerInstanceDatabaseDeleteOperator<a class="headerlink" href="#cloudspannerinstancedatabasedeleteoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="cloudspannerinstancedatabasedeployoperator">
<span id="id68"></span><h5>CloudSpannerInstanceDatabaseDeployOperator<a class="headerlink" href="#cloudspannerinstancedatabasedeployoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="cloudspannerinstancedatabaseupdateoperator">
<span id="id69"></span><h5>CloudSpannerInstanceDatabaseUpdateOperator<a class="headerlink" href="#cloudspannerinstancedatabaseupdateoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="cloudspannerinstancedatabasequeryoperator">
<span id="id70"></span><h5>CloudSpannerInstanceDatabaseQueryOperator<a class="headerlink" href="#cloudspannerinstancedatabasequeryoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="cloudspannerinstancedeployoperator">
<span id="id71"></span><h5>CloudSpannerInstanceDeployOperator<a class="headerlink" href="#cloudspannerinstancedeployoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="cloudspannerinstancedeleteoperator">
<span id="id72"></span><h5>CloudSpannerInstanceDeleteOperator<a class="headerlink" href="#cloudspannerinstancedeleteoperator" title="Permalink to this headline"></a></h5>
</div>
</div>
<div class="section" id="cloudspannerhook">
<h4>CloudSpannerHook<a class="headerlink" href="#cloudspannerhook" title="Permalink to this headline"></a></h4>
</div>
</div>
<div class="section" id="cloud-sql">
<h3>Cloud SQL<a class="headerlink" href="#cloud-sql" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-sql-operators">
<h4>Cloud SQL Operators<a class="headerlink" href="#cloud-sql-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#cloudsqlinstancedatabasedeleteoperator"><span class="std std-ref">CloudSqlInstanceDatabaseDeleteOperator</span></a> : deletes a database from a Cloud SQL
instance.</li>
<li><a class="reference internal" href="#cloudsqlinstancedatabasecreateoperator"><span class="std std-ref">CloudSqlInstanceDatabaseCreateOperator</span></a> : creates a new database inside a Cloud
SQL instance.</li>
<li><a class="reference internal" href="#cloudsqlinstancedatabasepatchoperator"><span class="std std-ref">CloudSqlInstanceDatabasePatchOperator</span></a> : updates a database inside a Cloud
SQL instance.</li>
<li><a class="reference internal" href="#cloudsqlinstancedeleteoperator"><span class="std std-ref">CloudSqlInstanceDeleteOperator</span></a> : delete a Cloud SQL instance.</li>
<li><a class="reference internal" href="#cloudsqlinstanceexportoperator"><span class="std std-ref">CloudSqlInstanceExportOperator</span></a> : exports data from a Cloud SQL instance.</li>
<li><a class="reference internal" href="#cloudsqlinstanceimportoperator"><span class="std std-ref">CloudSqlInstanceImportOperator</span></a> : imports data into a Cloud SQL instance.</li>
<li><a class="reference internal" href="#cloudsqlinstancecreateoperator"><span class="std std-ref">CloudSqlInstanceCreateOperator</span></a> : create a new Cloud SQL instance.</li>
<li><a class="reference internal" href="#cloudsqlinstancepatchoperator"><span class="std std-ref">CloudSqlInstancePatchOperator</span></a> : patch a Cloud SQL instance.</li>
<li><a class="reference internal" href="#cloudsqlqueryoperator"><span class="std std-ref">CloudSqlQueryOperator</span></a> : run query in a Cloud SQL instance.</li>
</ul>
<div class="section" id="cloudsqlinstancedatabasedeleteoperator">
<span id="id73"></span><h5>CloudSqlInstanceDatabaseDeleteOperator<a class="headerlink" href="#cloudsqlinstancedatabasedeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Deletes a database from a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing,
the default project_id from the GCP connection is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancedatabasecreateoperator">
<span id="id74"></span><h5>CloudSqlInstanceDatabaseCreateOperator<a class="headerlink" href="#cloudsqlinstancedatabasecreateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Creates a new database inside a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing,
the default project_id from the GCP connection is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancedatabasepatchoperator">
<span id="id75"></span><h5>CloudSqlInstanceDatabasePatchOperator<a class="headerlink" href="#cloudsqlinstancedatabasepatchoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabasePatchOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabasePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Updates a resource containing information about a database inside a Cloud SQL
instance using patch semantics.
See: <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancedeleteoperator">
<span id="id76"></span><h5>CloudSqlInstanceDeleteOperator<a class="headerlink" href="#cloudsqlinstancedeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Deletes a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing,
the default project_id from the GCP connection is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstanceexportoperator">
<span id="id77"></span><h5>CloudSqlInstanceExportOperator<a class="headerlink" href="#cloudsqlinstanceexportoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceExportOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceExportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceExportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceExportOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump
or CSV file.</p>
<p>Note: This operator is idempotent. If executed multiple times with the same
export file URI, the export file in GCS will simply be overridden.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing,
the default project_id from the GCP connection is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstanceimportoperator">
<span id="id78"></span><h5>CloudSqlInstanceImportOperator<a class="headerlink" href="#cloudsqlinstanceimportoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceImportOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceImportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceImportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceImportOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Imports data into a Cloud SQL instance from a SQL dump or CSV file in Cloud Storage.</p>
<p>CSV IMPORT:</p>
<p>This operator is NOT idempotent for a CSV import. If the same file is imported
multiple times, the imported data will be duplicated in the database.
Moreover, if there are any unique constraints the duplicate import may result in an
error.</p>
<p>SQL IMPORT:</p>
<p>This operator is idempotent for a SQL import if it was also exported by Cloud SQL.
The exported SQL contains ‘DROP TABLE IF EXISTS’ statements for all tables
to be imported.</p>
<p>If the import file was generated in a different way, idempotence is not guaranteed.
It has to be ensured on the SQL file level.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing,
the default project_id from the GCP connection is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancecreateoperator">
<span id="id79"></span><h5>CloudSqlInstanceCreateOperator<a class="headerlink" href="#cloudsqlinstancecreateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Creates a new Cloud SQL instance.
If an instance with the same name exists, no action will be taken and
the operator will succeed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert</a>
#request-body</li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing,
the default project_id from the GCP connection is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
<li><strong>validate_body</strong> (<em>bool</em>) – True if body should be validated, False otherwise.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlinstancepatchoperator">
<span id="id80"></span><h5>CloudSqlInstancePatchOperator<a class="headerlink" href="#cloudsqlinstancepatchoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstancePatchOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstancePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p>
<p>Updates settings of a Cloud SQL instance.</p>
<p>Caution: This is a partial update, so only included values for the settings will be
updated.</p>
<p>In the request body, supply the relevant portions of an instance resource, according
to the rules of patch semantics.
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a></li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing,
the default project_id from the GCP connection is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="cloudsqlqueryoperator">
<span id="id81"></span><h5>CloudSqlQueryOperator<a class="headerlink" href="#cloudsqlqueryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlQueryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlQueryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses
cloud-sql-proxy to establish secure connection with the database.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>sql</strong> (<em>str</em><em> or </em><em>[</em><em>str</em><em>]</em>) – SQL query or list of queries to run (should be DML or DDL query -
this operator does not return any data from the database,
so it is useless to pass it DQL queries. Note that it is responsibility of the
author of the queries to make sure that the queries are idempotent. For example
you can use CREATE TABLE IF NOT EXISTS to create a table.</li>
<li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – (optional) the parameters to render the SQL query with.</li>
<li><strong>autocommit</strong> (<em>bool</em>) – if True, each command is automatically committed.
(default value: False)</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform for
cloud-sql-proxy authentication.</li>
<li><strong>gcp_cloudsql_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud SQL
its schema should be gcpcloudsql://.
See <code class="xref py py-class docutils literal notranslate"><span class="pre">CloudSqlDatabaseHook</span></code> for
details on how to define gcpcloudsql:// connection.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-sql-hooks">
<h4>Cloud SQL Hooks<a class="headerlink" href="#cloud-sql-hooks" title="Permalink to this headline"></a></h4>
<span class="target" id="cloudsqlhook"></span><dl class="class">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Hook for Google Cloud SQL APIs.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database">
<code class="descname">create_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new database inside a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a>.</li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance">
<code class="descname">create_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body</a>.</li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database">
<code class="descname">delete_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database" title="Permalink to this definition"></a></dt>
<dd><p>Deletes a database from a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance">
<code class="descname">delete_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance" title="Permalink to this definition"></a></dt>
<dd><p>Deletes a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.export_instance">
<code class="descname">export_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.export_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.export_instance" title="Permalink to this definition"></a></dt>
<dd><p>Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump
or CSV file.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID of the Cloud SQL instance. This does not include the
project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves connection to Cloud SQL.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud SQL services object.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database">
<code class="descname">get_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves a database resource from a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database in the instance.</li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL database resource, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource</a>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance">
<code class="descname">get_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves a resource containing information about a Cloud SQL instance.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL instance resource.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.import_instance">
<code class="descname">import_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.import_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.import_instance" title="Permalink to this definition"></a></dt>
<dd><p>Imports data into a Cloud SQL instance from a SQL dump or CSV file in
Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the
project ID.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database">
<code class="descname">patch_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database" title="Permalink to this definition"></a></dt>
<dd><p>Updates a database resource inside a Cloud SQL instance.</p>
<p>This method supports patch semantics.
See <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li>
<li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li>
<li><strong>body</strong> (<em>dict</em>) – The request body, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a>.</li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance">
<code class="descname">patch_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance" title="Permalink to this definition"></a></dt>
<dd><p>Updates settings of a Cloud SQL instance.</p>
<p>Caution: This is not a partial update, so you must include values for
all the settings that you want to retain.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a>.</li>
<li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li>
<li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set
to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
<span class="target" id="cloudsqldatabasehook"></span><dl class="class">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlDatabaseHook</code><span class="sig-paren">(</span><em>gcp_cloudsql_conn_id='google_cloud_sql_default'</em>, <em>default_gcp_project_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p>
<p>Serves DB connection configuration for Google Cloud SQL (Connections
of <em>gcpcloudsql://</em> type).</p>
<p>The hook is a “meta” one. It does not perform an actual connection.
It is there to retrieve all the parameters configured in gcpcloudsql:// connection,
start/stop Cloud SQL Proxy if needed, dynamically generate Postgres or MySQL
connection in the database and return an actual Postgres or MySQL hook.
The returned Postgres/MySQL hooks are using direct connection or Cloud SQL
Proxy socket/TCP as configured.</p>
<p>Main parameters of the hook are retrieved from the standard URI components:</p>
<ul class="simple">
<li><strong>user</strong> - User name to authenticate to the database (from login of the URI).</li>
<li><strong>password</strong> - Password to authenticate to the database (from password of the URI).</li>
<li><strong>public_ip</strong> - IP to connect to for public connection (from host of the URI).</li>
<li><strong>public_port</strong> - Port to connect to for public connection (from port of the URI).</li>
<li><strong>database</strong> - Database to connect to (from schema of the URI).</li>
</ul>
<p>Remaining parameters are retrieved from the extras (URI query parameters):</p>
<ul class="simple">
<li><dl class="first docutils">
<dt><strong>project_id</strong> - Optional, Google Cloud Platform project where the Cloud SQL</dt>
<dd>instance exists. If missing, default project id passed is used.</dd>
</dl>
</li>
<li><strong>instance</strong> - Name of the instance of the Cloud SQL database instance.</li>
<li><strong>location</strong> - The location of the Cloud SQL instance (for example europe-west1).</li>
<li><strong>database_type</strong> - The type of the database instance (MySQL or Postgres).</li>
<li><strong>use_proxy</strong> - (default False) Whether SQL proxy should be used to connect to Cloud
SQL DB.</li>
<li><strong>use_ssl</strong> - (default False) Whether SSL should be used to connect to Cloud SQL DB.
You cannot use proxy and SSL together.</li>
<li><strong>sql_proxy_use_tcp</strong> - (default False) If set to true, TCP is used to connect via
proxy, otherwise UNIX sockets are used.</li>
<li><strong>sql_proxy_binary_path</strong> - Optional path to Cloud SQL Proxy binary. If the binary
is not specified or the binary is not present, it is automatically downloaded.</li>
<li><strong>sql_proxy_version</strong> - Specific version of the proxy to download (for example
v1.13). If not specified, the latest version is downloaded.</li>
<li><strong>sslcert</strong> - Path to client certificate to authenticate when SSL is used.</li>
<li><strong>sslkey</strong> - Path to client private key to authenticate when SSL is used.</li>
<li><strong>sslrootcert</strong> - Path to server’s certificate to authenticate when SSL is used.</li>
</ul>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>gcp_cloudsql_conn_id</strong> (<em>str</em>) – URL of the connection</li>
<li><strong>default_gcp_project_id</strong> (<em>str</em>) – Default project id used if project_id not specified
in the connection URL</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.cleanup_database_hook">
<code class="descname">cleanup_database_hook</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.cleanup_database_hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.cleanup_database_hook" title="Permalink to this definition"></a></dt>
<dd><p>Clean up database hook after it was used.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.create_connection">
<code class="descname">create_connection</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.create_connection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.create_connection" title="Permalink to this definition"></a></dt>
<dd><p>Create connection in the Connection table, according to whether it uses
proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – Session of the SQL Alchemy ORM (automatically generated with
decorator).</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.delete_connection">
<code class="descname">delete_connection</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.delete_connection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.delete_connection" title="Permalink to this definition"></a></dt>
<dd><p>Delete the dynamically created connection from the Connection table.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – Session of the SQL Alchemy ORM (automatically generated with
decorator).</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.free_reserved_port">
<code class="descname">free_reserved_port</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.free_reserved_port"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.free_reserved_port" title="Permalink to this definition"></a></dt>
<dd><p>Free TCP port. Makes it immediately ready to be used by Cloud SQL Proxy.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_database_hook">
<code class="descname">get_database_hook</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.get_database_hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_database_hook" title="Permalink to this definition"></a></dt>
<dd><p>Retrieve database hook. This is the actual Postgres or MySQL database hook
that uses proxy or connects directly to the Google Cloud SQL database.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_sqlproxy_runner">
<code class="descname">get_sqlproxy_runner</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.get_sqlproxy_runner"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_sqlproxy_runner" title="Permalink to this definition"></a></dt>
<dd><p>Retrieve Cloud SQL Proxy runner. It is used to manage the proxy
lifecycle per task.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The Cloud SQL Proxy runner.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner" title="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner">CloudSqlProxyRunner</a></td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.reserve_free_tcp_port">
<code class="descname">reserve_free_tcp_port</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.reserve_free_tcp_port"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.reserve_free_tcp_port" title="Permalink to this definition"></a></dt>
<dd><p>Reserve free TCP port to be used by Cloud SQL Proxy</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.retrieve_connection">
<code class="descname">retrieve_connection</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.retrieve_connection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.retrieve_connection" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves the dynamically created connection from the Connection table.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – Session of the SQL Alchemy ORM (automatically generated with
decorator).</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
<span class="target" id="cloudsqlproxyrunner"></span><dl class="class">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlProxyRunner</code><span class="sig-paren">(</span><em>path_prefix</em>, <em>instance_specification</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>project_id=None</em>, <em>sql_proxy_version=None</em>, <em>sql_proxy_binary_path=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p>
<p>Downloads and runs cloud-sql-proxy as subprocess of the Python process.</p>
<p>The cloud-sql-proxy needs to be downloaded and started before we can connect
to the Google Cloud SQL instance via database connection. It establishes
secure tunnel connection to the database. It authorizes using the
GCP credentials that are passed by the configuration.</p>
<p>More details about the proxy can be found here:
<a class="reference external" href="https://cloud.google.com/sql/docs/mysql/sql-proxy">https://cloud.google.com/sql/docs/mysql/sql-proxy</a></p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_proxy_version">
<code class="descname">get_proxy_version</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.get_proxy_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_proxy_version" title="Permalink to this definition"></a></dt>
<dd><p>Returns version of the Cloud SQL Proxy.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_socket_path">
<code class="descname">get_socket_path</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.get_socket_path"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_socket_path" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves UNIX socket path used by Cloud SQL Proxy.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The dynamically generated path for the socket created by the proxy.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">str</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.start_proxy">
<code class="descname">start_proxy</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.start_proxy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.start_proxy" title="Permalink to this definition"></a></dt>
<dd><p>Starts Cloud SQL Proxy.</p>
<p>You have to remember to stop the proxy if you started it!</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.stop_proxy">
<code class="descname">stop_proxy</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.stop_proxy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.stop_proxy" title="Permalink to this definition"></a></dt>
<dd><p>Stops running proxy.</p>
<p>You should stop the proxy after you stop using it.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-bigtable">
<h3>Cloud Bigtable<a class="headerlink" href="#cloud-bigtable" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-bigtable-operators">
<h4>Cloud Bigtable Operators<a class="headerlink" href="#cloud-bigtable-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#bigtableinstancecreateoperator"><span class="std std-ref">BigtableInstanceCreateOperator</span></a> : creates a Cloud Bigtable instance.</li>
<li><a class="reference internal" href="#bigtableinstancedeleteoperator"><span class="std std-ref">BigtableInstanceDeleteOperator</span></a> : deletes a Google Cloud Bigtable instance.</li>
<li><a class="reference internal" href="#bigtableclusterupdateoperator"><span class="std std-ref">BigtableClusterUpdateOperator</span></a> : updates the number of nodes in a Google Cloud Bigtable cluster.</li>
<li><a class="reference internal" href="#bigtabletablecreateoperator"><span class="std std-ref">BigtableTableCreateOperator</span></a> : creates a table in a Google Cloud Bigtable instance.</li>
<li><a class="reference internal" href="#bigtabletabledeleteoperator"><span class="std std-ref">BigtableTableDeleteOperator</span></a> : deletes a table in a Google Cloud Bigtable instance.</li>
<li><a class="reference internal" href="#bigtabletablewaitforreplicationsensor"><span class="std std-ref">BigtableTableWaitForReplicationSensor</span></a> : (sensor) waits for a table to be fully replicated.</li>
</ul>
<div class="section" id="bigtableinstancecreateoperator">
<span id="id82"></span><h5>BigtableInstanceCreateOperator<a class="headerlink" href="#bigtableinstancecreateoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="bigtableinstancedeleteoperator">
<span id="id83"></span><h5>BigtableInstanceDeleteOperator<a class="headerlink" href="#bigtableinstancedeleteoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="bigtableclusterupdateoperator">
<span id="id84"></span><h5>BigtableClusterUpdateOperator<a class="headerlink" href="#bigtableclusterupdateoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="bigtabletablecreateoperator">
<span id="id85"></span><h5>BigtableTableCreateOperator<a class="headerlink" href="#bigtabletablecreateoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="bigtabletabledeleteoperator">
<span id="id86"></span><h5>BigtableTableDeleteOperator<a class="headerlink" href="#bigtabletabledeleteoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="bigtabletablewaitforreplicationsensor">
<span id="id87"></span><h5>BigtableTableWaitForReplicationSensor<a class="headerlink" href="#bigtabletablewaitforreplicationsensor" title="Permalink to this headline"></a></h5>
</div>
</div>
<div class="section" id="cloud-bigtable-hook">
<span id="bigtablehook"></span><h4>Cloud Bigtable Hook<a class="headerlink" href="#cloud-bigtable-hook" title="Permalink to this headline"></a></h4>
</div>
</div>
<div class="section" id="compute-engine">
<h3>Compute Engine<a class="headerlink" href="#compute-engine" title="Permalink to this headline"></a></h3>
<div class="section" id="compute-engine-operators">
<h4>Compute Engine Operators<a class="headerlink" href="#compute-engine-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#gceinstancestartoperator"><span class="std std-ref">GceInstanceStartOperator</span></a> : start an existing Google Compute Engine instance.</li>
<li><a class="reference internal" href="#gceinstancestopoperator"><span class="std std-ref">GceInstanceStopOperator</span></a> : stop an existing Google Compute Engine instance.</li>
<li><a class="reference internal" href="#gcesetmachinetypeoperator"><span class="std std-ref">GceSetMachineTypeOperator</span></a> : change the machine type for a stopped instance.</li>
<li><a class="reference internal" href="#gceinstancetemplatecopyoperator"><span class="std std-ref">GceInstanceTemplateCopyOperator</span></a> : copy the Instance Template, applying
specified changes.</li>
<li><a class="reference internal" href="#gceinstancegroupmanagerupdatetemplateoperator"><span class="std std-ref">GceInstanceGroupManagerUpdateTemplateOperator</span></a> : patch the Instance Group Manager,
replacing source Instance Template URL with the destination one.</li>
</ul>
<p>The operators have the common base operator:</p>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceBaseOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceBaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Abstract base operator for Google Compute Engine operators to inherit from.</p>
</dd></dl>
<p>They also use <a class="reference internal" href="#gcehook"><span class="std std-ref">Compute Engine Hook</span></a> to communicate with Google Cloud Platform.</p>
<div class="section" id="gceinstancestartoperator">
<span id="id88"></span><h5>GceInstanceStartOperator<a class="headerlink" href="#gceinstancestartoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStartOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStartOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p>
<p>Starts an instance in Google Compute Engine.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute
Engine Instance exists. If set to None or missing, the default project_id from the GCP connection is
used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud
Platform. Defaults to ‘google_cloud_default’.</li>
<li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults
to v1.</li>
<li><strong>validate_body</strong> – Optional, If set to False, body validation is not performed.
Defaults to False.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="gceinstancestopoperator">
<span id="id89"></span><h5>GceInstanceStopOperator<a class="headerlink" href="#gceinstancestopoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStopOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p>
<p>Stops an instance in Google Compute Engine.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute
Engine Instance exists. If set to None or missing, the default project_id from the GCP connection is
used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud
Platform. Defaults to ‘google_cloud_default’.</li>
<li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults
to v1.</li>
<li><strong>validate_body</strong> – Optional, If set to False, body validation is not performed.
Defaults to False.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="gcesetmachinetypeoperator">
<span id="id90"></span><h5>GceSetMachineTypeOperator<a class="headerlink" href="#gcesetmachinetypeoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceSetMachineTypeOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceSetMachineTypeOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p>
<dl class="docutils">
<dt>Changes the machine type for a stopped instance to the machine type specified in</dt>
<dd>the request.</dd>
</dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li>
<li><strong>body</strong> (<em>dict</em>) – Body required by the Compute Engine setMachineType API, as described in
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body">https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute
Engine Instance exists. If set to None or missing, the default project_id from the GCP connection
is used.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud
Platform. Defaults to ‘google_cloud_default’.</li>
<li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults
to v1.</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Optional, If set to False, body validation is not performed.
Defaults to False.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="gceinstancetemplatecopyoperator">
<span id="id91"></span><h5>GceInstanceTemplateCopyOperator<a class="headerlink" href="#gceinstancetemplatecopyoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceTemplateCopyOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceTemplateCopyOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceTemplateCopyOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceTemplateCopyOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p>
<p>Copies the instance template, applying specified changes.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Template</li>
<li><strong>body_patch</strong> (<em>dict</em>) – Patch to the body of instanceTemplates object following rfc7386
PATCH semantics. The body_patch content follows
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates">https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates</a>
Name field is required as we need to rename the template,
all the other fields are optional. It is important to follow PATCH semantics
- arrays are replaced fully, so if you need to update an array you should
provide the whole target array as patch element.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute
Engine Instance exists. If set to None or missing, the default project_id from the GCP connection
is used.</li>
<li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve
full idempotence (for example when client call times out repeating the request
with the same request id will not create a new instance template again).
It should be in UUID format as defined in RFC 4122.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud
Platform. Defaults to ‘google_cloud_default’.</li>
<li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults
to v1.</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Optional, If set to False, body validation is not performed.
Defaults to False.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="gceinstancegroupmanagerupdatetemplateoperator">
<span id="id92"></span><h5>GceInstanceGroupManagerUpdateTemplateOperator<a class="headerlink" href="#gceinstancegroupmanagerupdatetemplateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceGroupManagerUpdateTemplateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceGroupManagerUpdateTemplateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceGroupManagerUpdateTemplateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceGroupManagerUpdateTemplateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p>
<p>Patches the Instance Group Manager, replacing source template URL with the
destination one. API V1 does not have update/patch operations for Instance
Group Manager, so you must use beta or newer API version. Beta is the default.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Group Manager</li>
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the Instance Group Manager exists.</li>
<li><strong>source_template</strong> (<em>str</em>) – URL of the template to replace.</li>
<li><strong>destination_template</strong> (<em>str</em>) – URL of the target template.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute
Engine Instance exists. If set to None or missing, the default project_id from the GCP connection is
used.</li>
<li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve
full idempotence (for example when client call times out repeating the request
with the same request id will not create a new instance template again).
It should be in UUID format as defined in RFC 4122.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud
Platform. Defaults to ‘google_cloud_default’.</li>
<li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults
to v1.</li>
<li><strong>validate_body</strong> (<em>bool</em>) – Optional, If set to False, body validation is not performed.
Defaults to False.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="compute-engine-hook">
<span id="gcehook"></span><h4>Compute Engine Hook<a class="headerlink" href="#compute-engine-hook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_compute_hook.</code><code class="descname">GceHook</code><span class="sig-paren">(</span><em>api_version='v1'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Hook for Google Compute Engine APIs.</p>
<p>All the methods in the hook where project_id is used must be called with
keyword arguments rather than positional.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves connection to Google Compute Engine.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Compute Engine services object</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_group_manager">
<code class="descname">get_instance_group_manager</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.get_instance_group_manager"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_group_manager" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves Instance Group Manager by project_id, zone and resource_id.
Must be called with keyword arguments rather than positional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the Instance Group Manager exists</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Group Manager</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the
Compute Engine Instance exists. If set to None or missing,
the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Instance group manager representation as object according to
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/beta/instanceGroupManagers">https://cloud.google.com/compute/docs/reference/rest/beta/instanceGroupManagers</a></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_template">
<code class="descname">get_instance_template</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.get_instance_template"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_template" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves instance template by project_id and resource_id.
Must be called with keyword arguments rather than positional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>resource_id</strong> (<em>str</em>) – Name of the instance template</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the
Compute Engine Instance exists. If set to None or missing,
the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Instance template representation as object according to
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates">https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates</a></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.insert_instance_template">
<code class="descname">insert_instance_template</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.insert_instance_template"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.insert_instance_template" title="Permalink to this definition"></a></dt>
<dd><p>Inserts instance template using body specified
Must be called with keyword arguments rather than positional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>body</strong> (<em>dict</em>) – Instance template representation as object according to
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates">https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates</a></li>
<li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve
full idempotence (for example when client call times out repeating the request
with the same request id will not create a new instance template again)
It should be in UUID format as defined in RFC 4122</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the
Compute Engine Instance exists. If set to None or missing,
the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.patch_instance_group_manager">
<code class="descname">patch_instance_group_manager</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.patch_instance_group_manager"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.patch_instance_group_manager" title="Permalink to this definition"></a></dt>
<dd><p>Patches Instance Group Manager with the specified body.
Must be called with keyword arguments rather than positional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the Instance Group Manager exists</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Group Manager</li>
<li><strong>body</strong> (<em>dict</em>) – Instance Group Manager representation as json-merge-patch object
according to
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/beta/instanceTemplates/patch">https://cloud.google.com/compute/docs/reference/rest/beta/instanceTemplates/patch</a></li>
<li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve
full idempotence (for example when client call times out repeating the request
with the same request id will not create a new instance template again).
It should be in UUID format as defined in RFC 4122</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the
Compute Engine Instance exists. If set to None or missing,
the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>:return None</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.set_machine_type">
<code class="descname">set_machine_type</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.set_machine_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.set_machine_type" title="Permalink to this definition"></a></dt>
<dd><p>Sets machine type of an instance defined by project_id, zone and resource_id.
Must be called with keyword arguments rather than positional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource</li>
<li><strong>body</strong> (<em>dict</em>) – Body required by the Compute Engine setMachineType API,
as described in
<a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType">https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType</a></li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the
Compute Engine Instance exists. If set to None or missing,
the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.start_instance">
<code class="descname">start_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.start_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.start_instance" title="Permalink to this definition"></a></dt>
<dd><p>Starts an existing instance defined by project_id, zone and resource_id.
Must be called with keyword arguments rather than positional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the
Compute Engine Instance exists. If set to None or missing,
the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.stop_instance">
<code class="descname">stop_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.stop_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.stop_instance" title="Permalink to this definition"></a></dt>
<dd><p>Stops an instance defined by project_id, zone and resource_id
Must be called with keyword arguments rather than positional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists</li>
<li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the
Compute Engine Instance exists. If set to None or missing,
the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">members:</th><td class="field-body"></td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="section" id="cloud-functions">
<h3>Cloud Functions<a class="headerlink" href="#cloud-functions" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-functions-operators">
<h4>Cloud Functions Operators<a class="headerlink" href="#cloud-functions-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#gcffunctiondeployoperator"><span class="std std-ref">GcfFunctionDeployOperator</span></a> : deploy Google Cloud Function to Google Cloud Platform</li>
<li><a class="reference internal" href="#gcffunctiondeleteoperator"><span class="std std-ref">GcfFunctionDeleteOperator</span></a> : delete Google Cloud Function in Google Cloud Platform</li>
</ul>
<p>They also use <a class="reference internal" href="#gcfhook"><span class="std std-ref">Cloud Functions Hook</span></a> to communicate with Google Cloud Platform.</p>
<div class="section" id="gcffunctiondeployoperator">
<span id="id93"></span><h5>GcfFunctionDeployOperator<a class="headerlink" href="#gcffunctiondeployoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeployOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeployOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a function in Google Cloud Functions.
If a function with this name already exists, it will be updated.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>location</strong> (<em>str</em>) – Google Cloud Platform region where the function should be created.</li>
<li><strong>body</strong> (<em>dict</em><em> or </em><em>google.cloud.functions.v1.CloudFunction</em>) – Body of the Cloud Functions definition. The body must be a
Cloud Functions dictionary as described in:
<a class="reference external" href="https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions">https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions</a>
. Different API versions require different variants of the Cloud Functions
dictionary.</li>
<li><strong>project_id</strong> (<em>str</em>) – (Optional) Google Cloud Platform project ID where the function
should be created.</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – (Optional) The connection ID used to connect to Google Cloud
Platform - default ‘google_cloud_default’.</li>
<li><strong>api_version</strong> (<em>str</em>) – (Optional) API version used (for example v1 - default - or
v1beta1).</li>
<li><strong>zip_path</strong> (<em>str</em>) – Path to zip file containing source code of the function. If the path
is set, the sourceUploadUrl should not be specified in the body or it should
be empty. Then the zip file will be uploaded using the upload URL generated
via generateUploadUrl from the Cloud Functions API.</li>
<li><strong>validate_body</strong> (<em>bool</em>) – If set to False, body validation is not performed.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="gcffunctiondeleteoperator">
<span id="id94"></span><h5>GcfFunctionDeleteOperator<a class="headerlink" href="#gcffunctiondeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Deletes the specified function from Google Cloud Functions.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>name</strong> (<em>str</em>) – A fully-qualified function name, matching
the pattern: <cite>^projects/[^/]+/locations/[^/]+/functions/[^/]+$</cite></li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID to use to connect to Google Cloud Platform.</li>
<li><strong>api_version</strong> (<em>str</em>) – API version used (for example v1 or v1beta1).</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-functions-hook">
<span id="gcfhook"></span><h4>Cloud Functions Hook<a class="headerlink" href="#cloud-functions-hook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_function_hook.</code><code class="descname">GcfHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Hook for the Google Cloud Functions APIs.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function">
<code class="descname">create_new_function</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.create_new_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new function in Cloud Function in the location specified in the body.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>location</strong> (<em>str</em>) – The location of the function.</li>
<li><strong>body</strong> (<em>dict</em>) – The body required by the Cloud Functions insert API.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Project project_id where the function belongs.
If set to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function">
<code class="descname">delete_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.delete_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function" title="Permalink to this definition"></a></dt>
<dd><p>Deletes the specified Cloud Function.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – The name of the function.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves the connection to Cloud Functions.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud Function services object.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function">
<code class="descname">get_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function" title="Permalink to this definition"></a></dt>
<dd><p>Returns the Cloud Function with the given name.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – Name of the function.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A Cloud Functions object representing the function.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function">
<code class="descname">update_function</code><span class="sig-paren">(</span><em>name</em>, <em>body</em>, <em>update_mask</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.update_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function" title="Permalink to this definition"></a></dt>
<dd><p>Updates Cloud Functions according to the specified update mask.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>name</strong> (<em>str</em>) – The name of the function.</li>
<li><strong>body</strong> (<em>dict</em>) – The body required by the cloud function patch API.</li>
<li><strong>update_mask</strong> (<em>[</em><em>str</em><em>]</em>) – The update mask - array of fields that should be patched.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip">
<code class="descname">upload_function_zip</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.upload_function_zip"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip" title="Permalink to this definition"></a></dt>
<dd><p>Uploads zip file with sources.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>location</strong> (<em>str</em>) – The location where the function is created.</li>
<li><strong>zip_path</strong> (<em>str</em>) – The path of the valid .zip file to upload.</li>
<li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Project project_id where the function belongs.
If set to None or missing, the default project_id from the GCP connection is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">The upload URL that was returned by generateUploadUrl method.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-dataflow">
<h3>Cloud DataFlow<a class="headerlink" href="#cloud-dataflow" title="Permalink to this headline"></a></h3>
<div class="section" id="dataflow-operators">
<h4>DataFlow Operators<a class="headerlink" href="#dataflow-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#dataflowjavaoperator"><span class="std std-ref">DataFlowJavaOperator</span></a> : launching Cloud Dataflow jobs written in Java.</li>
<li><a class="reference internal" href="#dataflowtemplateoperator"><span class="std std-ref">DataflowTemplateOperator</span></a> : launching a templated Cloud DataFlow batch job.</li>
<li><a class="reference internal" href="#dataflowpythonoperator"><span class="std std-ref">DataFlowPythonOperator</span></a> : launching Cloud Dataflow jobs written in python.</li>
</ul>
<div class="section" id="dataflowjavaoperator">
<span id="id95"></span><h5>DataFlowJavaOperator<a class="headerlink" href="#dataflowjavaoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowJavaOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowJavaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Java Cloud DataFlow batch job. The parameters of the operation
will be passed to the job.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more detail on job submission have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>jar</strong> (<em>string</em>) – The reference to a self executing DataFlow jar.</li>
<li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li>
<li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud
Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google
Cloud Platform for the dataflow job status while the job is in the
JOB_STATE_RUNNING state.</li>
<li><strong>job_class</strong> (<em>string</em>) – The name of the dataflow job class to be executued, it
is often not the main class configured in the dataflow jar file.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>Both <code class="docutils literal notranslate"><span class="pre">jar</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> are templated so you can use variables in them.</p>
<p>Note that both
<code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> will be merged to specify pipeline
execution parameter, and <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> is expected to save
high-level options, for instances, project and zone information, which
apply to all dataflow operators in the DAG.</p>
<p>It’s a good practice to define dataflow_* parameters in the default_args of the dag
like the project, zone and staging location.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;dataflow_default_options&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;project&#39;</span><span class="p">:</span> <span class="s1">&#39;my-gcp-project&#39;</span><span class="p">,</span>
<span class="s1">&#39;zone&#39;</span><span class="p">:</span> <span class="s1">&#39;europe-west1-d&#39;</span><span class="p">,</span>
<span class="s1">&#39;stagingLocation&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://my-staging-bucket/staging/&#39;</span>
<span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>You need to pass the path to your dataflow as a file reference with the <code class="docutils literal notranslate"><span class="pre">jar</span></code>
parameter, the jar needs to be a self executing jar (see documentation here:
<a class="reference external" href="https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar">https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar</a>).
Use <code class="docutils literal notranslate"><span class="pre">options</span></code> to pass on options to your job.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataFlowJavaOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;datapflow_example&#39;</span><span class="p">,</span>
<span class="n">jar</span><span class="o">=</span><span class="s1">&#39;{{var.value.gcp_dataflow_base}}pipeline/build/libs/pipeline-example-1.0.jar&#39;</span><span class="p">,</span>
<span class="n">options</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;autoscalingAlgorithm&#39;</span><span class="p">:</span> <span class="s1">&#39;BASIC&#39;</span><span class="p">,</span>
<span class="s1">&#39;maxNumWorkers&#39;</span><span class="p">:</span> <span class="s1">&#39;50&#39;</span><span class="p">,</span>
<span class="s1">&#39;start&#39;</span><span class="p">:</span> <span class="s1">&#39;{{ds}}&#39;</span><span class="p">,</span>
<span class="s1">&#39;partitionType&#39;</span><span class="p">:</span> <span class="s1">&#39;DAY&#39;</span><span class="p">,</span>
<span class="s1">&#39;labels&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;foo&#39;</span> <span class="p">:</span> <span class="s1">&#39;bar&#39;</span><span class="p">}</span>
<span class="p">},</span>
<span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">&#39;gcp-airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;owner&#39;</span><span class="p">:</span> <span class="s1">&#39;airflow&#39;</span><span class="p">,</span>
<span class="s1">&#39;depends_on_past&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;start_date&#39;</span><span class="p">:</span>
<span class="p">(</span><span class="mi">2016</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span>
<span class="s1">&#39;email&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;alex@vanboxel.be&#39;</span><span class="p">],</span>
<span class="s1">&#39;email_on_failure&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;email_on_retry&#39;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
<span class="s1">&#39;retries&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
<span class="s1">&#39;retry_delay&#39;</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="mi">30</span><span class="p">),</span>
<span class="s1">&#39;dataflow_default_options&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;project&#39;</span><span class="p">:</span> <span class="s1">&#39;my-gcp-project&#39;</span><span class="p">,</span>
<span class="s1">&#39;zone&#39;</span><span class="p">:</span> <span class="s1">&#39;us-central1-f&#39;</span><span class="p">,</span>
<span class="s1">&#39;stagingLocation&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://bucket/tmp/dataflow/staging/&#39;</span><span class="p">,</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="n">dag</span> <span class="o">=</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">&#39;test-dag&#39;</span><span class="p">,</span> <span class="n">default_args</span><span class="o">=</span><span class="n">default_args</span><span class="p">)</span>
<span class="n">task</span> <span class="o">=</span> <span class="n">DataFlowJavaOperator</span><span class="p">(</span>
<span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">&#39;gcp_default&#39;</span><span class="p">,</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;normalize-cal&#39;</span><span class="p">,</span>
<span class="n">jar</span><span class="o">=</span><span class="s1">&#39;{{var.value.gcp_dataflow_base}}pipeline-ingress-cal-normalize-1.0.jar&#39;</span><span class="p">,</span>
<span class="n">options</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;autoscalingAlgorithm&#39;</span><span class="p">:</span> <span class="s1">&#39;BASIC&#39;</span><span class="p">,</span>
<span class="s1">&#39;maxNumWorkers&#39;</span><span class="p">:</span> <span class="s1">&#39;50&#39;</span><span class="p">,</span>
<span class="s1">&#39;start&#39;</span><span class="p">:</span> <span class="s1">&#39;{{ds}}&#39;</span><span class="p">,</span>
<span class="s1">&#39;partitionType&#39;</span><span class="p">:</span> <span class="s1">&#39;DAY&#39;</span>
<span class="p">},</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="dataflowtemplateoperator">
<span id="id96"></span><h5>DataflowTemplateOperator<a class="headerlink" href="#dataflowtemplateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataflowTemplateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataflowTemplateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Templated Cloud DataFlow batch job. The parameters of the operation
will be passed to the job.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>template</strong> (<em>string</em>) – The reference to the DataFlow template.</li>
<li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job environment options.</li>
<li><strong>parameters</strong> (<em>dict</em>) – Map of job specific parameters for the template.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud
Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google
Cloud Platform for the dataflow job status while the job is in the
JOB_STATE_RUNNING state.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>It’s a good practice to define dataflow_* parameters in the default_args of the dag
like the project, zone and staging location.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters</a>
<a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a></p>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;dataflow_default_options&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;project&#39;</span><span class="p">:</span> <span class="s1">&#39;my-gcp-project&#39;</span>
<span class="s1">&#39;zone&#39;</span><span class="p">:</span> <span class="s1">&#39;europe-west1-d&#39;</span><span class="p">,</span>
<span class="s1">&#39;tempLocation&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://my-staging-bucket/staging/&#39;</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
<p>You need to pass the path to your dataflow template as a file reference with the
<code class="docutils literal notranslate"><span class="pre">template</span></code> parameter. Use <code class="docutils literal notranslate"><span class="pre">parameters</span></code> to pass on parameters to your job.
Use <code class="docutils literal notranslate"><span class="pre">environment</span></code> to pass on runtime environment variables to your job.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataflowTemplateOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;datapflow_example&#39;</span><span class="p">,</span>
<span class="n">template</span><span class="o">=</span><span class="s1">&#39;{{var.value.gcp_dataflow_base}}&#39;</span><span class="p">,</span>
<span class="n">parameters</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;inputFile&#39;</span><span class="p">:</span> <span class="s2">&quot;gs://bucket/input/my_input.txt&quot;</span><span class="p">,</span>
<span class="s1">&#39;outputFile&#39;</span><span class="p">:</span> <span class="s2">&quot;gs://bucket/output/my_output.txt&quot;</span>
<span class="p">},</span>
<span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">&#39;gcp-airflow-service-account&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<p><code class="docutils literal notranslate"><span class="pre">template</span></code>, <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">parameters</span></code> are templated so you can
use variables in them.</p>
<p>Note that <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> is expected to save high-level options
for project information, which apply to all dataflow operators in the DAG.</p>
<blockquote>
<div><div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3">https://cloud.google.com/dataflow/docs/reference/rest/v1b3</a>
/LaunchTemplateParameters
<a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a>
For more detail on job template execution have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataflow/docs/templates/executing-templates">https://cloud.google.com/dataflow/docs/templates/executing-templates</a></p>
</div>
</div></blockquote>
</dd></dl>
</div>
<div class="section" id="dataflowpythonoperator">
<span id="id97"></span><h5>DataFlowPythonOperator<a class="headerlink" href="#dataflowpythonoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowPythonOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Launching Cloud Dataflow jobs written in python. Note that both
dataflow_default_options and options will be merged to specify pipeline
execution parameter, and dataflow_default_options is expected to save
high-level options, for instances, project and zone information, which
apply to all dataflow operators in the DAG.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more detail on job submission have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>py_file</strong> (<em>string</em>) – Reference to the python dataflow pipleline file.py, e.g.,
/some/local/file/path/to/your/python/pipeline/file.</li>
<li><strong>py_options</strong> – Additional python options.</li>
<li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li>
<li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud
Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google
Cloud Platform for the dataflow job status while the job is in the
JOB_STATE_RUNNING state.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>Execute the python dataflow job.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="dataflowhook">
<h4>DataFlowHook<a class="headerlink" href="#dataflowhook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_dataflow_hook.</code><code class="descname">DataFlowHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google Cloud Dataflow service object.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-dataproc">
<h3>Cloud DataProc<a class="headerlink" href="#cloud-dataproc" title="Permalink to this headline"></a></h3>
<div class="section" id="dataproc-operators">
<h4>DataProc Operators<a class="headerlink" href="#dataproc-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#dataprocclustercreateoperator"><span class="std std-ref">DataprocClusterCreateOperator</span></a> : Create a new cluster on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocclusterdeleteoperator"><span class="std std-ref">DataprocClusterDeleteOperator</span></a> : Delete a cluster on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocclusterscaleoperator"><span class="std std-ref">DataprocClusterScaleOperator</span></a> : Scale up or down a cluster on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocpigoperator"><span class="std std-ref">DataProcPigOperator</span></a> : Start a Pig query Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprochiveoperator"><span class="std std-ref">DataProcHiveOperator</span></a> : Start a Hive query Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocsparksqloperator"><span class="std std-ref">DataProcSparkSqlOperator</span></a> : Start a Spark SQL query Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocsparkoperator"><span class="std std-ref">DataProcSparkOperator</span></a> : Start a Spark Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprochadoopoperator"><span class="std std-ref">DataProcHadoopOperator</span></a> : Start a Hadoop Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocpysparkoperator"><span class="std std-ref">DataProcPySparkOperator</span></a> : Start a PySpark Job on a Cloud DataProc cluster.</li>
<li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateOperator</span></a> : Instantiate a WorkflowTemplate on Google Cloud Dataproc.</li>
<li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateinlineoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateInlineOperator</span></a> : Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.</li>
</ul>
<div class="section" id="dataprocclustercreateoperator">
<span id="id98"></span><h5>DataprocClusterCreateOperator<a class="headerlink" href="#dataprocclustercreateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Create a new cluster on Google Cloud Dataproc. The operator will wait until the
creation is successful or an error occurs in the creation process.</p>
<p>The parameters allow to configure the cluster. Please refer to</p>
<p><a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters</a></p>
<p>for a detailed explanation on the different parameters. Most of the configuration
parameters detailed in the link are available as a parameter to this operator.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster to create. (templated)</li>
<li><strong>project_id</strong> (<em>str</em>) – The ID of the google cloud project in which
to create the cluster. (templated)</li>
<li><strong>num_workers</strong> (<em>int</em>) – The # of workers to spin up. If set to zero will
spin up cluster in a single node mode</li>
<li><strong>storage_bucket</strong> (<em>string</em>) – The storage bucket to use, setting to None lets dataproc
generate a custom one for you</li>
<li><strong>init_actions_uris</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – List of GCS uri’s containing
dataproc initialization scripts</li>
<li><strong>init_action_timeout</strong> (<em>string</em>) – Amount of time executable scripts in
init_actions_uris has to complete</li>
<li><strong>metadata</strong> (<em>dict</em>) – dict of key-value google compute engine metadata entries
to add to all instances</li>
<li><strong>image_version</strong> (<em>string</em>) – the version of software inside the Dataproc cluster</li>
<li><strong>custom_image</strong> – custom Dataproc image for more info see
<a class="reference external" href="https://cloud.google.com/dataproc/docs/guides/dataproc-images">https://cloud.google.com/dataproc/docs/guides/dataproc-images</a></li>
<li><strong>properties</strong> (<em>dict</em>) – dict of properties to set on
config files (e.g. spark-defaults.conf), see
<a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig</a></li>
<li><strong>master_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the master node</li>
<li><strong>master_disk_type</strong> (<em>string</em>) – Type of the boot disk for the master node
(default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>).
Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or
<code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li>
<li><strong>master_disk_size</strong> (<em>int</em>) – Disk size for the master node</li>
<li><strong>worker_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the worker nodes</li>
<li><strong>worker_disk_type</strong> (<em>string</em>) – Type of the boot disk for the worker node
(default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>).
Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or
<code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li>
<li><strong>worker_disk_size</strong> (<em>int</em>) – Disk size for the worker nodes</li>
<li><strong>num_preemptible_workers</strong> (<em>int</em>) – The # of preemptible worker nodes to spin up</li>
<li><strong>labels</strong> (<em>dict</em>) – dict of labels to add to the cluster</li>
<li><strong>zone</strong> (<em>string</em>) – The zone where the cluster will be located. (templated)</li>
<li><strong>network_uri</strong> (<em>string</em>) – The network uri to be used for machine communication, cannot be
specified with subnetwork_uri</li>
<li><strong>subnetwork_uri</strong> (<em>string</em>) – The subnetwork uri to be used for machine communication,
cannot be specified with network_uri</li>
<li><strong>internal_ip_only</strong> (<em>bool</em>) – If true, all instances in the cluster will only
have internal IP addresses. This can only be enabled for subnetwork
enabled networks</li>
<li><strong>tags</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The GCE tags to add to all instances</li>
<li><strong>region</strong> – leave as ‘global’, might become relevant in the future. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>service_account</strong> (<em>string</em>) – The service account of the dataproc instances.</li>
<li><strong>service_account_scopes</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The URIs of service account scopes to be included.</li>
<li><strong>idle_delete_ttl</strong> (<em>int</em>) – The longest duration that cluster would keep alive while
staying idle. Passing this threshold will cause cluster to be auto-deleted.
A duration in seconds.</li>
<li><strong>auto_delete_time</strong> (<em>datetime.datetime</em>) – The time when cluster will be auto-deleted.</li>
<li><strong>auto_delete_ttl</strong> (<em>int</em>) – The life duration of cluster, the cluster will be
auto-deleted at the end of this duration.
A duration in seconds. (If auto_delete_time is set this parameter will be ignored)</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">custom_image: string</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocclusterscaleoperator">
<span id="id99"></span><h5>DataprocClusterScaleOperator<a class="headerlink" href="#dataprocclusterscaleoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterScaleOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Scale, up or down, a cluster on Google Cloud Dataproc.
The operator will wait until the cluster is re-scaled.</p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataprocClusterScaleOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;dataproc_scale&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="s1">&#39;my-project&#39;</span><span class="p">,</span>
<span class="n">cluster_name</span><span class="o">=</span><span class="s1">&#39;cluster-1&#39;</span><span class="p">,</span>
<span class="n">num_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
<span class="n">num_preemptible_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
<span class="n">graceful_decommission_timeout</span><span class="o">=</span><span class="s1">&#39;1h&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more detail on about scaling clusters have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters">https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to scale. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the cluster runs. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – The region for the dataproc cluster. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>num_workers</strong> (<em>int</em>) – The new number of workers</li>
<li><strong>num_preemptible_workers</strong> (<em>int</em>) – The new number of preemptible workers</li>
<li><strong>graceful_decommission_timeout</strong> (<em>string</em>) – Timeout for graceful YARN decomissioning.
Maximum value is 1d</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocclusterdeleteoperator">
<span id="id100"></span><h5>DataprocClusterDeleteOperator<a class="headerlink" href="#dataprocclusterdeleteoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Delete a cluster on Google Cloud Dataproc. The operator will wait until the
cluster is destroyed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to create. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the cluster runs. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocpigoperator">
<span id="id101"></span><h5>DataProcPigOperator<a class="headerlink" href="#dataprocpigoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation
will be passed to the cluster.</p>
<p>It’s a good practice to define dataproc_* parameters in the default_args of the dag
like the cluster name and UDFs.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;cluster_name&#39;</span><span class="p">:</span> <span class="s1">&#39;cluster-1&#39;</span><span class="p">,</span>
<span class="s1">&#39;dataproc_pig_jars&#39;</span><span class="p">:</span> <span class="p">[</span>
<span class="s1">&#39;gs://example/udf/jar/datafu/1.2.0/datafu.jar&#39;</span><span class="p">,</span>
<span class="s1">&#39;gs://example/udf/jar/gpig/1.2/gpig.jar&#39;</span>
<span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
<p>You can pass a pig script as string or file reference. Use variables to pass on
variables for the pig script to be resolved on the cluster or use the parameters to
be resolved in the script as template parameters.</p>
<p><strong>Example</strong>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataProcPigOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;dataproc_pig&#39;</span><span class="p">,</span>
<span class="n">query</span><span class="o">=</span><span class="s1">&#39;a_pig_script.pig&#39;</span><span class="p">,</span>
<span class="n">variables</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;out&#39;</span><span class="p">:</span> <span class="s1">&#39;gs://example/output/{{ds}}&#39;</span><span class="p">},</span>
<span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span>
</pre></div>
</div>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more detail on about job submission have a look at the reference:
<a class="reference external" href="https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs">https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>query</strong> (<em>string</em>) – The query or reference to the query
file (pg or pig extension). (templated)</li>
<li><strong>query_uri</strong> (<em>string</em>) – The uri of a pig script on Cloud Storage.</li>
<li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_pig_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_pig_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for
UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li>
<li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states.
Any states in this list will result in an error being raised and failure of the
task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure,
pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only
<code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to
<code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API.
This is useful for identifying or linking to the job in the Google Cloud Console
Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with
an 8 character random string.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprochiveoperator">
<span id="id102"></span><h5>DataProcHiveOperator<a class="headerlink" href="#dataprochiveoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHiveOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Hive query Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension).</li>
<li><strong>query_uri</strong> (<em>string</em>) – The uri of a hive script on Cloud Storage.</li>
<li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query.</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This name by default
is the task_id appended with the execution data, but can be templated. The
name will always be appended with a random number to avoid name clashes.</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li>
<li><strong>dataproc_hive_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_hive_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for
UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li>
<li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states.
Any states in this list will result in an error being raised and failure of the
task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure,
pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only
<code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to
<code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API.
This is useful for identifying or linking to the job in the Google Cloud Console
Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with
an 8 character random string.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocsparksqloperator">
<span id="id103"></span><h5>DataProcSparkSqlOperator<a class="headerlink" href="#dataprocsparksqloperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkSqlOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Spark SQL query Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension). (templated)</li>
<li><strong>query_uri</strong> (<em>string</em>) – The uri of a spark sql script on Cloud Storage.</li>
<li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li>
<li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states.
Any states in this list will result in an error being raised and failure of the
task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure,
pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only
<code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to
<code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API.
This is useful for identifying or linking to the job in the Google Cloud Console
Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with
an 8 character random string.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocsparkoperator">
<span id="id104"></span><h5>DataProcSparkOperator<a class="headerlink" href="#dataprocsparkoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Spark Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or
the main_class, not both together).</li>
<li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both
together).</li>
<li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li>
<li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work
directory. Should be stored in Cloud Storage.</li>
<li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li>
<li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states.
Any states in this list will result in an error being raised and failure of the
task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure,
pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only
<code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to
<code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API.
This is useful for identifying or linking to the job in the Google Cloud Console
Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with
an 8 character random string.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprochadoopoperator">
<span id="id105"></span><h5>DataProcHadoopOperator<a class="headerlink" href="#dataprochadoopoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHadoopOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Hadoop Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or
the main_class, not both together).</li>
<li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both
together).</li>
<li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li>
<li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work
directory. Should be stored in Cloud Storage.</li>
<li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li>
<li><strong>dataproc_hadoop_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_hadoop_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li>
<li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states.
Any states in this list will result in an error being raised and failure of the
task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure,
pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only
<code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to
<code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API.
This is useful for identifying or linking to the job in the Google Cloud Console
Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with
an 8 character random string.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocpysparkoperator">
<span id="id106"></span><h5>DataProcPySparkOperator<a class="headerlink" href="#dataprocpysparkoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPySparkOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a PySpark Job on a Cloud DataProc cluster.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>main</strong> (<em>string</em>) – [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main
Python file to use as the driver. Must be a .py file.</li>
<li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li>
<li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work
directory. Should be stored in Cloud Storage.</li>
<li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li>
<li><strong>pyfiles</strong> (<em>list</em>) – List of Python files to pass to the PySpark framework.
Supported file types: .py, .egg, and .zip</li>
<li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This
name by default is the task_id appended with the execution data, but can
be templated. The name will always be appended with a random number to
avoid name clashes. (templated)</li>
<li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li>
<li><strong>dataproc_pyspark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in
default arguments</li>
<li><strong>dataproc_pyspark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example:
for UDFs and libs) and are ideal to put in default arguments.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li>
<li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states.
Any states in this list will result in an error being raised and failure of the
task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure,
pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only
<code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to
<code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API.
This is useful for identifying or linking to the job in the Google Cloud Console
Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with
an 8 character random string.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocworkflowtemplateinstantiateoperator">
<span id="id107"></span><h5>DataprocWorkflowTemplateInstantiateOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p>
<p>Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait
until the WorkflowTemplate is finished executing.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">Please refer to:
<a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>template_id</strong> (<em>string</em>) – The id of the template. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the template runs</li>
<li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="dataprocworkflowtemplateinstantiateinlineoperator">
<span id="id108"></span><h5>DataprocWorkflowTemplateInstantiateInlineOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateinlineoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateInlineOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p>
<p>Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will
wait until the WorkflowTemplate is finished executing.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">Please refer to:
<a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>template</strong> (<em>map</em>) – The template contents. (templated)</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which
the template runs</li>
<li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="cloud-datastore">
<h3>Cloud Datastore<a class="headerlink" href="#cloud-datastore" title="Permalink to this headline"></a></h3>
<div class="section" id="datastore-operators">
<h4>Datastore Operators<a class="headerlink" href="#datastore-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#datastoreexportoperator"><span class="std std-ref">DatastoreExportOperator</span></a> : Export entities from Google Cloud Datastore to Cloud Storage.</li>
<li><a class="reference internal" href="#datastoreimportoperator"><span class="std std-ref">DatastoreImportOperator</span></a> : Import entities from Cloud Storage to Google Cloud Datastore.</li>
</ul>
<div class="section" id="datastoreexportoperator">
<span id="id109"></span><h5>DatastoreExportOperator<a class="headerlink" href="#datastoreexportoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_export_operator.</code><code class="descname">DatastoreExportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_export_operator.html#DatastoreExportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Export entities from Google Cloud Datastore to Cloud Storage</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – name of the cloud storage bucket to backup data</li>
<li><strong>namespace</strong> (<em>str</em>) – optional namespace path in the specified Cloud Storage bucket
to backup data. If this namespace does not exist in GCS, it will be created.</li>
<li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the Datastore connection id to use</li>
<li><strong>cloud_storage_conn_id</strong> (<em>string</em>) – the name of the cloud storage connection id to
force-write backup</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in the
export, refer to
<a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li>
<li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li>
<li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for
execution status again</li>
<li><strong>overwrite_existing</strong> (<em>bool</em>) – if the storage bucket + namespace is not empty, it will be
emptied prior to exports. This enables overwriting existing backups.</li>
<li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="datastoreimportoperator">
<span id="id110"></span><h5>DatastoreImportOperator<a class="headerlink" href="#datastoreimportoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_import_operator.</code><code class="descname">DatastoreImportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_import_operator.html#DatastoreImportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Import entities from Cloud Storage to Google Cloud Datastore</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – container in Cloud Storage to store data</li>
<li><strong>file</strong> (<em>string</em>) – path of the backup metadata file in the specified Cloud Storage bucket.
It should have the extension .overall_export_metadata</li>
<li><strong>namespace</strong> (<em>str</em>) – optional namespace of the backup metadata file in
the specified Cloud Storage bucket.</li>
<li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in
the export, refer to
<a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li>
<li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li>
<li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the connection id to use</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for
execution status again</li>
<li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="datastorehook">
<h4>DatastoreHook<a class="headerlink" href="#datastorehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.datastore_hook.</code><code class="descname">DatastoreHook</code><span class="sig-paren">(</span><em>datastore_conn_id='google_cloud_datastore_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Interact with Google Cloud Datastore. This hook uses the Google Cloud Platform
connection.</p>
<p>This object is not threads safe. If you want to make multiple requests
simultaneously, you will need to create a hook per thread.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids">
<code class="descname">allocate_ids</code><span class="sig-paren">(</span><em>partialKeys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.allocate_ids"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids" title="Permalink to this definition"></a></dt>
<dd><p>Allocate IDs for incomplete keys.
see <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>partialKeys</strong> – a list of partial keys</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">a list of full keys.</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction">
<code class="descname">begin_transaction</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.begin_transaction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction" title="Permalink to this definition"></a></dt>
<dd><p>Get a new transaction handle</p>
<blockquote>
<div><div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction</a></p>
</div>
</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">a transaction handle</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.commit">
<code class="descname">commit</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.commit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.commit" title="Permalink to this definition"></a></dt>
<dd><p>Commit a transaction, optionally creating, deleting or modifying some entities.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the commit request</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the response body of the commit request</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation">
<code class="descname">delete_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.delete_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation" title="Permalink to this definition"></a></dt>
<dd><p>Deletes the long-running operation</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket">
<code class="descname">export_to_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.export_to_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Export entities from Cloud Datastore to Cloud Storage for backup</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><em>version='v1'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google Cloud Datastore service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation">
<code class="descname">get_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation" title="Permalink to this definition"></a></dt>
<dd><p>Gets the latest state of a long-running operation</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket">
<code class="descname">import_from_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>file</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.import_from_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Import a backup from Cloud Storage to Cloud Datastore</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup">
<code class="descname">lookup</code><span class="sig-paren">(</span><em>keys</em>, <em>read_consistency=None</em>, <em>transaction=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.lookup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup" title="Permalink to this definition"></a></dt>
<dd><p>Lookup some entities by key</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>keys</strong> – the keys to lookup</li>
<li><strong>read_consistency</strong> – the read consistency to use. default, strong or eventual.
Cannot be used with a transaction.</li>
<li><strong>transaction</strong> – the transaction to use, if any.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">the response body of the lookup request.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done">
<code class="descname">poll_operation_until_done</code><span class="sig-paren">(</span><em>name</em>, <em>polling_interval_in_seconds</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.poll_operation_until_done"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done" title="Permalink to this definition"></a></dt>
<dd><p>Poll backup operation state until it’s completed</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback">
<code class="descname">rollback</code><span class="sig-paren">(</span><em>transaction</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.rollback"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback" title="Permalink to this definition"></a></dt>
<dd><p>Roll back a transaction</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>transaction</strong> – the transaction to roll back</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query">
<code class="descname">run_query</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.run_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query" title="Permalink to this definition"></a></dt>
<dd><p>Run a query for entities.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the query request</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the batch of query results.</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-ml-engine">
<h3>Cloud ML Engine<a class="headerlink" href="#cloud-ml-engine" title="Permalink to this headline"></a></h3>
<div class="section" id="cloud-ml-engine-operators">
<h4>Cloud ML Engine Operators<a class="headerlink" href="#cloud-ml-engine-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#mlenginebatchpredictionoperator"><span class="std std-ref">MLEngineBatchPredictionOperator</span></a> : Start a Cloud ML Engine batch prediction job.</li>
<li><a class="reference internal" href="#mlenginemodeloperator"><span class="std std-ref">MLEngineModelOperator</span></a> : Manages a Cloud ML Engine model.</li>
<li><a class="reference internal" href="#mlenginetrainingoperator"><span class="std std-ref">MLEngineTrainingOperator</span></a> : Start a Cloud ML Engine training job.</li>
<li><a class="reference internal" href="#mlengineversionoperator"><span class="std std-ref">MLEngineVersionOperator</span></a> : Manages a Cloud ML Engine model version.</li>
</ul>
<div class="section" id="mlenginebatchpredictionoperator">
<span id="id111"></span><h5>MLEngineBatchPredictionOperator<a class="headerlink" href="#mlenginebatchpredictionoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineBatchPredictionOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineBatchPredictionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Start a Google Cloud ML Engine prediction job.</p>
<p>NOTE: For model origin, users should consider exactly one from the
three options below:
1. Populate ‘uri’ field only, which should be a GCS location that
points to a tensorflow savedModel directory.
2. Populate ‘model_name’ field only, which refers to an existing
model, and the default version of the model will be used.
3. Populate both ‘model_name’ and ‘version_name’ fields, which
refers to a specific version of a specific model.</p>
<p>In options 2 and 3, both model and version name should contain the
minimal identifier. For instance, call</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MLEngineBatchPredictionOperator</span><span class="p">(</span>
<span class="o">...</span><span class="p">,</span>
<span class="n">model_name</span><span class="o">=</span><span class="s1">&#39;my_model&#39;</span><span class="p">,</span>
<span class="n">version_name</span><span class="o">=</span><span class="s1">&#39;my_version&#39;</span><span class="p">,</span>
<span class="o">...</span><span class="p">)</span>
</pre></div>
</div>
<p>if the desired model version is
“projects/my_project/models/my_model/versions/my_version”.</p>
<p>See <a class="reference external" href="https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs">https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs</a>
for further documentation on the parameters.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name where the
prediction job is submitted. (templated)</li>
<li><strong>job_id</strong> (<em>string</em>) – A unique id for the prediction job on Google Cloud
ML Engine. (templated)</li>
<li><strong>data_format</strong> (<em>string</em>) – The format of the input data.
It will default to ‘DATA_FORMAT_UNSPECIFIED’ if is not provided
or is not one of [“TEXT”, “TF_RECORD”, “TF_RECORD_GZIP”].</li>
<li><strong>input_paths</strong> (<em>list of string</em>) – A list of GCS paths of input data for batch
prediction. Accepting wildcard operator <a href="#id112"><span class="problematic" id="id113">*</span></a>, but only at the end. (templated)</li>
<li><strong>output_path</strong> (<em>string</em>) – The GCS path where the prediction results are
written to. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the
prediction job in. (templated)</li>
<li><strong>model_name</strong> (<em>string</em>) – The Google Cloud ML Engine model to use for prediction.
If version_name is not provided, the default version of this
model will be used.
Should not be None if version_name is provided.
Should be None if uri is provided. (templated)</li>
<li><strong>version_name</strong> (<em>string</em>) – The Google Cloud ML Engine model version to use for
prediction.
Should be None if uri is provided. (templated)</li>
<li><strong>uri</strong> (<em>string</em>) – The GCS path of the saved model to use for prediction.
Should be None if model_name is provided.
It should be a GCS path pointing to a tensorflow SavedModel. (templated)</li>
<li><strong>max_worker_count</strong> (<em>int</em>) – The maximum number of workers to be used
for parallel processing. Defaults to 10 if not specified.</li>
<li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML Engine runtime version to use
for batch prediction.</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID used for connection to Google
Cloud Platform.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must
have doamin-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt>Raises:</dt>
<dd><code class="docutils literal notranslate"><span class="pre">ValueError</span></code>: if a unique model/version origin cannot be determined.</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="mlenginemodeloperator">
<span id="id114"></span><h5>MLEngineModelOperator<a class="headerlink" href="#mlenginemodeloperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineModelOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineModelOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator for managing a Google Cloud ML Engine model.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine
model belongs. (templated)</li>
<li><strong>model</strong> (<em>dict</em>) – <p>A dictionary containing the information about the model.
If the <cite>operation</cite> is <cite>create</cite>, then the <cite>model</cite> parameter should
contain all the information about this model such as <cite>name</cite>.</p>
<p>If the <cite>operation</cite> is <cite>get</cite>, the <cite>model</cite> parameter
should contain the <cite>name</cite> of the model.</p>
</li>
<li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new model as provided by the <cite>model</cite> parameter.</li>
<li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets a particular model where the name is specified in <cite>model</cite>.</li>
</ul>
</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="mlenginetrainingoperator">
<span id="id115"></span><h5>MLEngineTrainingOperator<a class="headerlink" href="#mlenginetrainingoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator for launching a MLEngine training job.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name within which MLEngine
training job should run (templated).</li>
<li><strong>job_id</strong> (<em>string</em>) – A unique templated id for the submitted Google MLEngine
training job. (templated)</li>
<li><strong>package_uris</strong> (<em>string</em>) – A list of package locations for MLEngine training job,
which should include the main training program + any additional
dependencies. (templated)</li>
<li><strong>training_python_module</strong> (<em>string</em>) – The Python module name to run within MLEngine
training job after installing ‘package_uris’ packages. (templated)</li>
<li><strong>training_args</strong> (<em>string</em>) – A list of templated command line arguments to pass to
the MLEngine training program. (templated)</li>
<li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the MLEngine training
job in (templated).</li>
<li><strong>scale_tier</strong> (<em>string</em>) – Resource tier for MLEngine training job. (templated)</li>
<li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML runtime version to use for
training. (templated)</li>
<li><strong>python_version</strong> (<em>string</em>) – The version of Python used in training. (templated)</li>
<li><strong>job_dir</strong> (<em>string</em>) – A Google Cloud Storage path in which to store training
outputs and other data needed for training. (templated)</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>mode</strong> (<em>string</em>) – Can be one of ‘DRY_RUN’/’CLOUD’. In ‘DRY_RUN’ mode, no real
training job will be launched, but the MLEngine training job request
will be printed out. In ‘CLOUD’ mode, a real MLEngine training job
creation request will be issued.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="mlengineversionoperator">
<span id="id116"></span><h5>MLEngineVersionOperator<a class="headerlink" href="#mlengineversionoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineVersionOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineVersionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Operator for managing a Google Cloud ML Engine version.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine
model belongs.</li>
<li><strong>model_name</strong> (<em>string</em>) – The name of the Google Cloud ML Engine model that the version
belongs to. (templated)</li>
<li><strong>version_name</strong> (<em>string</em>) – A name to use for the version being operated upon.
If not None and the <cite>version</cite> argument is None or does not have a value for
the <cite>name</cite> key, then this will be populated in the payload for the
<cite>name</cite> key. (templated)</li>
<li><strong>version</strong> (<em>dict</em>) – A dictionary containing the information about the version.
If the <cite>operation</cite> is <cite>create</cite>, <cite>version</cite> should contain all the
information about this version such as name, and deploymentUrl.
If the <cite>operation</cite> is <cite>get</cite> or <cite>delete</cite>, the <cite>version</cite> parameter
should contain the <cite>name</cite> of the version.
If it is None, the only <cite>operation</cite> possible would be <cite>list</cite>. (templated)</li>
<li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new version in the model specified by <cite>model_name</cite>,
in which case the <cite>version</cite> parameter should contain all the
information to create that version
(e.g. <cite>name</cite>, <cite>deploymentUrl</cite>).</li>
<li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets full information of a particular version in the model
specified by <cite>model_name</cite>.
The name of the version should be specified in the <cite>version</cite>
parameter.</li>
<li><code class="docutils literal notranslate"><span class="pre">list</span></code>: Lists all available versions of the model specified
by <cite>model_name</cite>.</li>
<li><code class="docutils literal notranslate"><span class="pre">delete</span></code>: Deletes the version specified in <cite>version</cite> parameter from the
model specified by <cite>model_name</cite>).
The name of the version should be specified in the <cite>version</cite>
parameter.</li>
</ul>
</li>
<li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
</div>
<div class="section" id="cloud-ml-engine-hook">
<h4>Cloud ML Engine Hook<a class="headerlink" href="#cloud-ml-engine-hook" title="Permalink to this headline"></a></h4>
<div class="section" id="mlenginehook">
<span id="id117"></span><h5>MLEngineHook<a class="headerlink" href="#mlenginehook" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_mlengine_hook.</code><code class="descname">MLEngineHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job">
<code class="descname">create_job</code><span class="sig-paren">(</span><em>project_id</em>, <em>job</em>, <em>use_existing_job_fn=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job" title="Permalink to this definition"></a></dt>
<dd><p>Launches a MLEngine job and wait for it to reach a terminal state.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project id within which MLEngine
job will be launched.</li>
<li><strong>job</strong> (<em>dict</em>) – <p>MLEngine Job object that should be provided to the MLEngine
API, such as:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="s1">&#39;jobId&#39;</span><span class="p">:</span> <span class="s1">&#39;my_job_id&#39;</span><span class="p">,</span>
<span class="s1">&#39;trainingInput&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;scaleTier&#39;</span><span class="p">:</span> <span class="s1">&#39;STANDARD_1&#39;</span><span class="p">,</span>
<span class="o">...</span>
<span class="p">}</span>
<span class="p">}</span>
</pre></div>
</div>
</li>
<li><strong>use_existing_job_fn</strong> (<em>function</em>) – In case that a MLEngine job with the same
job_id already exist, this method (if provided) will decide whether
we should use this existing job, continue waiting for it to finish
and returning the job object. It should accepts a MLEngine job
object, and returns a boolean value indicating whether it is OK to
reuse the existing job. If ‘use_existing_job_fn’ is not provided,
we by default reuse the existing MLEngine job.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The MLEngine job object if the job successfully reach a
terminal state (which might be FAILED or CANCELLED state).</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model">
<code class="descname">create_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model" title="Permalink to this definition"></a></dt>
<dd><p>Create a Model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version">
<code class="descname">create_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_spec</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version" title="Permalink to this definition"></a></dt>
<dd><p>Creates the Version on Google Cloud ML Engine.</p>
<p>Returns the operation if the version was created successfully and
raises an error otherwise.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version">
<code class="descname">delete_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.delete_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version" title="Permalink to this definition"></a></dt>
<dd><p>Deletes the given version of a model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google MLEngine service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model">
<code class="descname">get_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model" title="Permalink to this definition"></a></dt>
<dd><p>Gets a Model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions">
<code class="descname">list_versions</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.list_versions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions" title="Permalink to this definition"></a></dt>
<dd><p>Lists all available versions of a model. Blocks until finished.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version">
<code class="descname">set_default_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.set_default_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version" title="Permalink to this definition"></a></dt>
<dd><p>Sets a version to be the default. Blocks until finished.</p>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<div class="section" id="cloud-storage">
<h3>Cloud Storage<a class="headerlink" href="#cloud-storage" title="Permalink to this headline"></a></h3>
<div class="section" id="storage-operators">
<h4>Storage Operators<a class="headerlink" href="#storage-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#filetogooglecloudstorageoperator"><span class="std std-ref">FileToGoogleCloudStorageOperator</span></a> : Uploads a file to Google Cloud Storage.</li>
<li><a class="reference internal" href="#id119"><span class="std std-ref">GoogleCloudStorageCreateBucketOperator</span></a> : Creates a new ACL entry on the specified bucket.</li>
<li><a class="reference internal" href="#googlecloudstoragecreatebucketoperator"><span class="std std-ref">GoogleCloudStorageBucketCreateAclEntryOperator</span></a> : Creates a new cloud storage bucket.</li>
<li><a class="reference internal" href="#googlecloudstoragedownloadoperator"><span class="std std-ref">GoogleCloudStorageDownloadOperator</span></a> : Downloads a file from Google Cloud Storage.</li>
<li><a class="reference internal" href="#googlecloudstoragelistoperator"><span class="std std-ref">GoogleCloudStorageListOperator</span></a> : List all objects from the bucket with the give string prefix and delimiter in name.</li>
<li><a class="reference internal" href="#id123"><span class="std std-ref">GoogleCloudStorageToBigQueryOperator</span></a> : Creates a new ACL entry on the specified object.</li>
<li><a class="reference internal" href="#googlecloudstoragetobigqueryoperator"><span class="std std-ref">GoogleCloudStorageObjectCreateAclEntryOperator</span></a> : Loads files from Google cloud storage into BigQuery.</li>
<li><a class="reference internal" href="#googlecloudstoragetogooglecloudstorageoperator"><span class="std std-ref">GoogleCloudStorageToGoogleCloudStorageOperator</span></a> : Copies objects from a bucket to another, with renaming if requested.</li>
<li><a class="reference internal" href="#googlecloudstoragetogooglecloudstoragetransferoperator"><span class="std std-ref">GoogleCloudStorageToGoogleCloudStorageTransferOperator</span></a> : Copies objects from a bucket to another using Google Transfer service.</li>
<li><a class="reference internal" href="#mysqltogooglecloudstorageoperator"><span class="std std-ref">MySqlToGoogleCloudStorageOperator</span></a>: Copy data from any MySQL Database to Google cloud storage in JSON format.</li>
</ul>
<div class="section" id="filetogooglecloudstorageoperator">
<span id="id118"></span><h5>FileToGoogleCloudStorageOperator<a class="headerlink" href="#filetogooglecloudstorageoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_gcs.</code><code class="descname">FileToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Uploads a file to Google Cloud Storage.
Optionally can compress the file for upload.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>src</strong> (<em>string</em>) – Path to the local file. (templated)</li>
<li><strong>dst</strong> (<em>string</em>) – Destination path within the specified bucket. (templated)</li>
<li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to. (templated)</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The Airflow connection ID to upload with</li>
<li><strong>mime_type</strong> (<em>string</em>) – The mime-type string</li>
<li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any</li>
<li><strong>gzip</strong> (<em>bool</em>) – Allows for file to be compressed and uploaded as gzip</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute">
<code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute" title="Permalink to this definition"></a></dt>
<dd><p>Uploads the file to Google cloud storage</p>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragebucketcreateaclentryoperator">
<span id="googlecloudstoragecreatebucketoperator"></span><h5>GoogleCloudStorageBucketCreateAclEntryOperator<a class="headerlink" href="#googlecloudstoragebucketcreateaclentryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageBucketCreateAclEntryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_acl_operator.</code><code class="descname">GoogleCloudStorageBucketCreateAclEntryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_acl_operator.html#GoogleCloudStorageBucketCreateAclEntryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageBucketCreateAclEntryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new ACL entry on the specified bucket.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li>
<li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms:
user-userId, user-email, group-groupId, group-email, domain-domain,
project-team-projectId, allUsers, allAuthenticatedUsers</li>
<li><strong>role</strong> (<em>str</em>) – The access permission for the entity.
Acceptable values are: “OWNER”, “READER”, “WRITER”.</li>
<li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request.
Required for Requester Pays buckets.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>str</em>) – The connection ID to use when
connecting to Google Cloud Storage.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="id119">
<span id="id120"></span><h5>GoogleCloudStorageCreateBucketOperator<a class="headerlink" href="#id119" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_operator.</code><code class="descname">GoogleCloudStorageCreateBucketOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_operator.html#GoogleCloudStorageCreateBucketOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new bucket. Google Cloud Storage uses a flat namespace,
so you can’t create a bucket with a name that is already in use.</p>
<blockquote>
<div><div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more information, see Bucket Naming Guidelines:
<a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p>
</div>
</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket. (templated)</li>
<li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored
and determines the SLA and the cost of storage (templated). Values include</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li>
</ul>
<p>If this value is not specified when the bucket is
created, it will default to STANDARD.</p>
</li>
<li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. (templated)
Object data for objects in the bucket resides in physical storage
within this region. Defaults to US.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p>
</div>
</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project. (templated)</li>
<li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must
have domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Example</strong>:</dt>
<dd><p class="first">The following Operator would create a new bucket <code class="docutils literal notranslate"><span class="pre">test-bucket</span></code>
with <code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code> storage class in <code class="docutils literal notranslate"><span class="pre">EU</span></code> region</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateBucket</span> <span class="o">=</span> <span class="n">GoogleCloudStorageCreateBucketOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;CreateNewBucket&#39;</span><span class="p">,</span>
<span class="n">bucket_name</span><span class="o">=</span><span class="s1">&#39;test-bucket&#39;</span><span class="p">,</span>
<span class="n">storage_class</span><span class="o">=</span><span class="s1">&#39;MULTI_REGIONAL&#39;</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="s1">&#39;EU&#39;</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;env&#39;</span><span class="p">:</span> <span class="s1">&#39;dev&#39;</span><span class="p">,</span> <span class="s1">&#39;team&#39;</span><span class="p">:</span> <span class="s1">&#39;airflow&#39;</span><span class="p">},</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;airflow-service-account&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragedownloadoperator">
<span id="id121"></span><h5>GoogleCloudStorageDownloadOperator<a class="headerlink" href="#googlecloudstoragedownloadoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_download_operator.</code><code class="descname">GoogleCloudStorageDownloadOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_download_operator.html#GoogleCloudStorageDownloadOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Downloads a file from Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is. (templated)</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to download in the Google cloud
storage bucket. (templated)</li>
<li><strong>filename</strong> (<em>string</em>) – The file path on the local file system (where the
operator is being executed) that the file should be downloaded to. (templated)
If no filename passed, the downloaded data will not be stored on the local file
system.</li>
<li><strong>store_to_xcom_key</strong> (<em>string</em>) – If this param is set, the operator will push
the contents of the downloaded file to XCom with the key set in this
parameter. If not set, the downloaded data will not be pushed to XCom. (templated)</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragelistoperator">
<span id="id122"></span><h5>GoogleCloudStorageListOperator<a class="headerlink" href="#googlecloudstoragelistoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_list_operator.</code><code class="descname">GoogleCloudStorageListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_list_operator.html#GoogleCloudStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>List all objects from the bucket with the give string prefix and delimiter in name.</p>
<dl class="docutils">
<dt>This operator returns a python list with the name of objects which can be used by</dt>
<dd><cite>xcom</cite> in the downstream task.</dd>
</dl>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket to find the objects. (templated)</li>
<li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with
this prefix. (templated)</li>
<li><strong>delimiter</strong> (<em>string</em>) – The delimiter by which you want to filter the objects. (templated)
For e.g to lists the CSV files from in a directory in GCS you would use
delimiter=’.csv’.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Example</strong>:</dt>
<dd><p class="first">The following Operator would list all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code>
folder in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">GCS_Files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageListOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;GCS_Files&#39;</span><span class="p">,</span>
<span class="n">bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">prefix</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/&#39;</span><span class="p">,</span>
<span class="n">delimiter</span><span class="o">=</span><span class="s1">&#39;.avro&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstorageobjectcreateaclentryoperator">
<span id="googlecloudstoragetobigqueryoperator"></span><h5>GoogleCloudStorageObjectCreateAclEntryOperator<a class="headerlink" href="#googlecloudstorageobjectcreateaclentryoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageObjectCreateAclEntryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_acl_operator.</code><code class="descname">GoogleCloudStorageObjectCreateAclEntryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_acl_operator.html#GoogleCloudStorageObjectCreateAclEntryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageObjectCreateAclEntryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Creates a new ACL entry on the specified object.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li>
<li><strong>object_name</strong> (<em>str</em>) – Name of the object. For information about how to URL encode object
names to be path safe, see:
<a class="reference external" href="https://cloud.google.com/storage/docs/json_api/#encoding">https://cloud.google.com/storage/docs/json_api/#encoding</a></li>
<li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms:
user-userId, user-email, group-groupId, group-email, domain-domain,
project-team-projectId, allUsers, allAuthenticatedUsers</li>
<li><strong>role</strong> (<em>str</em>) – The access permission for the entity.
Acceptable values are: “OWNER”, “READER”.</li>
<li><strong>generation</strong> (<em>str</em>) – (Optional) If present, selects a specific revision of this object
(as opposed to the latest version, the default).</li>
<li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request.
Required for Requester Pays buckets.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>str</em>) – The connection ID to use when
connecting to Google Cloud Storage.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="id123">
<span id="id124"></span><h5>GoogleCloudStorageToBigQueryOperator<a class="headerlink" href="#id123" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_bq.</code><code class="descname">GoogleCloudStorageToBigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_bq.html#GoogleCloudStorageToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Loads files from Google cloud storage into BigQuery.</p>
<p>The schema to be used for the BigQuery table may be specified in one of
two ways. You may either directly pass the schema fields in, or you may
point the operator to a Google cloud storage object name. The object in
Google cloud storage must be a JSON file with the schema fields in it.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to load from. (templated)</li>
<li><strong>source_objects</strong> (<em>list of str</em>) – List of Google cloud storage URIs to load from. (templated)
If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li>
<li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (&lt;project&gt;.)&lt;dataset&gt;.&lt;table&gt;
BigQuery table to load data into. If &lt;project&gt; is not included,
project will be the project defined in the connection json. (templated)</li>
<li><strong>schema_fields</strong> (<em>list</em>) – If set, the schema field list as defined here:
<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load</a>
Should not be set when source_format is ‘DATASTORE_BACKUP’.</li>
<li><strong>schema_object</strong> (<em>string</em>) – If set, a GCS object path pointing to a .json file that
contains the schema for the table. (templated)</li>
<li><strong>source_format</strong> (<em>string</em>) – File format to export.</li>
<li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source.
Possible values include GZIP and NONE.
The default value is NONE.
This setting is ignored for Google Cloud Bigtable,
Google Cloud Datastore backups and Avro formats.</li>
<li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li>
<li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li>
<li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li>
<li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when loading from a CSV.</li>
<li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can
ignore when running the job.</li>
<li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li>
<li><strong>ignore_unknown_values</strong> (<em>bool</em>) – [Optional] Indicates if BigQuery should allow
extra values that are not represented in the table schema.
If true, the extra values are ignored. If false, records with extra columns
are treated as bad records, and if there are too many bad records, an
invalid error is returned in the job result.</li>
<li><strong>allow_quoted_newlines</strong> (<em>bool</em>) – Whether to allow quoted newlines (true) or not (false).</li>
<li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns.
The missing values are treated as nulls. If false, records with missing trailing
columns are treated as bad records, and if there are too many bad records, an
invalid error is returned in the job result. Only applicable to CSV, ignored
for other formats.</li>
<li><strong>max_id_key</strong> (<em>string</em>) – If set, the name of a column in the BigQuery table
that’s to be loaded. This will be used to select the MAX value from
BigQuery after the load occurs. The results will be returned by the
execute() command, which in turn gets stored in XCom for future
operators to use. This can be helpful with incremental loads–during
future executions, you can pick up from the max ID.</li>
<li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google
cloud storage hook.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to
work, the service account making the request must have domain-wide
delegation enabled.</li>
<li><strong>schema_update_options</strong> (<em>list</em>) – Allows the schema of the destination
table to be updated as a side effect of the load job.</li>
<li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li>
<li><strong>external_table</strong> (<em>bool</em>) – Flag to specify if the destination table should be
a BigQuery external table. Default Value is False.</li>
<li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e.
partition by field, type and expiration as per API specifications.
Note that ‘field’ is not available in concurrency with
dataset.table$partition.</li>
<li><strong>cluster_fields</strong> (<em>list of str</em>) – Request that the result of this load be stored sorted
by one or more columns. This is only available in conjunction with
time_partitioning. The order of columns given determines the sort order.
Not applicable for external tables.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragetogooglecloudstorageoperator">
<span id="id125"></span><h5>GoogleCloudStorageToGoogleCloudStorageOperator<a class="headerlink" href="#googlecloudstoragetogooglecloudstorageoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs.html#GoogleCloudStorageToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copies objects from a bucket to another, with renaming if requested.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket</strong> (<em>string</em>) – The source Google cloud storage bucket where the
object is. (templated)</li>
<li><strong>source_object</strong> (<em>string</em>) – <p>The source name of the object to copy in the Google cloud
storage bucket. (templated)
If wildcards are used in this argument:</p>
<blockquote>
<div>You can use only one wildcard for objects (filenames) within your
bucket. The wildcard can appear inside the object name or at the
end of the object name. Appending a wildcard to the bucket name is
unsupported.</div></blockquote>
</li>
<li><strong>destination_bucket</strong> (<em>string</em>) – The destination Google cloud storage bucket
where the object should be. (templated)</li>
<li><strong>destination_object</strong> (<em>string</em>) – The destination name of the object in the
destination Google cloud storage bucket. (templated)
If a wildcard is supplied in the source_object argument, this is the
prefix that will be prepended to the final destination objects’ paths.
Note that the source path’s part before the wildcard will be removed;
if it needs to be retained it should be appended to destination_object.
For example, with prefix <code class="docutils literal notranslate"><span class="pre">foo/*</span></code> and destination_object <code class="docutils literal notranslate"><span class="pre">blah/</span></code>, the
file <code class="docutils literal notranslate"><span class="pre">foo/baz</span></code> will be copied to <code class="docutils literal notranslate"><span class="pre">blah/baz</span></code>; to retain the prefix write
the destination_object as e.g. <code class="docutils literal notranslate"><span class="pre">blah/foo</span></code>, in which case the copied file
will be named <code class="docutils literal notranslate"><span class="pre">blah/foo/baz</span></code>.</li>
<li><strong>move_object</strong> (<em>bool</em>) – When move object is True, the object is moved instead
of copied to the new location. This is the equivalent of a mv command
as opposed to a cp command.</li>
<li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when
connecting to Google cloud storage.</li>
<li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt><strong>Examples</strong>:</dt>
<dd><p class="first">The following Operator would copy a single file named
<code class="docutils literal notranslate"><span class="pre">sales/sales-2017/january.avro</span></code> in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the file named
<code class="docutils literal notranslate"><span class="pre">copied_sales/2017/january-backup.avro`</span> <span class="pre">in</span> <span class="pre">the</span> <span class="pre">``data_backup</span></code> bucket</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;copy_single_file&#39;</span><span class="p">,</span>
<span class="n">source_bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/january.avro&#39;</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="s1">&#39;data_backup&#39;</span><span class="p">,</span>
<span class="n">destination_object</span><span class="o">=</span><span class="s1">&#39;copied_sales/2017/january-backup.avro&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
<p>The following Operator would copy all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code>
folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the
<code class="docutils literal notranslate"><span class="pre">copied_sales/2017</span></code> folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;copy_files&#39;</span><span class="p">,</span>
<span class="n">source_bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/*.avro&#39;</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="s1">&#39;data_backup&#39;</span><span class="p">,</span>
<span class="n">destination_object</span><span class="o">=</span><span class="s1">&#39;copied_sales/2017/&#39;</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
<p>The following Operator would move all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code>
folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the
same folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket, deleting the original files in the
process.</p>
<div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">move_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;move_files&#39;</span><span class="p">,</span>
<span class="n">source_bucket</span><span class="o">=</span><span class="s1">&#39;data&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">=</span><span class="s1">&#39;sales/sales-2017/*.avro&#39;</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="s1">&#39;data_backup&#39;</span><span class="p">,</span>
<span class="n">move_object</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span>
<span class="p">)</span>
</pre></div>
</div>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="googlecloudstoragetogooglecloudstoragetransferoperator">
<span id="id126"></span><h5>GoogleCloudStorageToGoogleCloudStorageTransferOperator<a class="headerlink" href="#googlecloudstoragetogooglecloudstoragetransferoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.gcs_to_gcs_transfer_operator.GoogleCloudStorageToGoogleCloudStorageTransferOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs_transfer_operator.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageTransferOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs_transfer_operator.html#GoogleCloudStorageToGoogleCloudStorageTransferOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs_transfer_operator.GoogleCloudStorageToGoogleCloudStorageTransferOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copies objects from a bucket to another using the GCP Storage Transfer
Service.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket</strong> (<em>str</em>) – The source Google cloud storage bucket where the
object is. (templated)</li>
<li><strong>destination_bucket</strong> (<em>str</em>) – The destination Google cloud storage bucket
where the object should be. (templated)</li>
<li><strong>project_id</strong> (<em>str</em>) – The ID of the Google Cloud Platform Console project that
owns the job</li>
<li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional connection ID to use when connecting to Google Cloud
Storage.</li>
<li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.</li>
<li><strong>description</strong> (<em>str</em>) – Optional transfer service job description</li>
<li><strong>schedule</strong> (<em>dict</em>) – Optional transfer service schedule; see
<a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs</a>.
If not set, run transfer job once as soon as the operator runs</li>
<li><strong>object_conditions</strong> (<em>dict</em>) – Optional transfer service object conditions; see
<a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#ObjectConditions">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#ObjectConditions</a></li>
<li><strong>transfer_options</strong> (<em>dict</em>) – Optional transfer service transfer options; see
<a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#TransferOptions">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#TransferOptions</a></li>
<li><strong>wait</strong> (<em>bool</em>) – Wait for transfer to finish; defaults to <cite>True</cite></li>
</ul>
</td>
</tr>
</tbody>
</table>
<p><strong>Example</strong>:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">gcs_to_gcs_transfer_op</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageTransferOperator</span><span class="p">(</span>
<span class="n">task_id</span><span class="o">=</span><span class="s1">&#39;gcs_to_gcs_transfer_example&#39;</span><span class="p">,</span>
<span class="n">source_bucket</span><span class="o">=</span><span class="s1">&#39;my-source-bucket&#39;</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="s1">&#39;my-destination-bucket&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="s1">&#39;my-gcp-project&#39;</span><span class="p">,</span>
<span class="n">dag</span><span class="o">=</span><span class="n">my_dag</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
</div>
<div class="section" id="mysqltogooglecloudstorageoperator">
<span id="id127"></span><h5>MySqlToGoogleCloudStorageOperator<a class="headerlink" href="#mysqltogooglecloudstorageoperator" title="Permalink to this headline"></a></h5>
<dl class="class">
<dt id="airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.mysql_to_gcs.</code><code class="descname">MySqlToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mysql_to_gcs.html#MySqlToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Copy data from MySQL to Google cloud storage in JSON format.</p>
<dl class="classmethod">
<dt id="airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator.type_map">
<em class="property">classmethod </em><code class="descname">type_map</code><span class="sig-paren">(</span><em>mysql_type</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mysql_to_gcs.html#MySqlToGoogleCloudStorageOperator.type_map"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator.type_map" title="Permalink to this definition"></a></dt>
<dd><p>Helper function that maps from MySQL fields to BigQuery fields. Used
when a schema_filename is set.</p>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="googlecloudstoragehook">
<h4>GoogleCloudStorageHook<a class="headerlink" href="#googlecloudstoragehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcs_hook.</code><code class="descname">GoogleCloudStorageHook</code><span class="sig-paren">(</span><em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Interact with Google Cloud Storage. This hook uses the Google Cloud Platform
connection.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy">
<code class="descname">copy</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket=None</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.copy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy" title="Permalink to this definition"></a></dt>
<dd><p>Copies an object from a bucket to another, with renaming if requested.</p>
<p>destination_bucket or destination_object can be omitted, in which case
source bucket/object is used, but not both.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li>
<li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li>
<li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to.
Can be omitted; then the same bucket is used.</li>
<li><strong>destination_object</strong> (<em>string</em>) – The (renamed) path of the object if given.
Can be omitted; then the same name is used.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket">
<code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>storage_class='MULTI_REGIONAL'</em>, <em>location='US'</em>, <em>project_id=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new bucket. Google Cloud Storage uses a flat namespace, so
you can’t create a bucket with a name that is already in use.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last">For more information, see Bucket Naming Guidelines:
<a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket.</li>
<li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored
and determines the SLA and the cost of storage. Values include</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li>
</ul>
<p>If this value is not specified when the bucket is
created, it will default to STANDARD.</p>
</li>
<li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket.
Object data for objects in the bucket resides in physical storage
within this region. Defaults to US.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p>
</div>
</li>
<li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project.</li>
<li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">If successful, it returns the <code class="docutils literal notranslate"><span class="pre">id</span></code> of the bucket.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete">
<code class="descname">delete</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>generation=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.delete"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete" title="Permalink to this definition"></a></dt>
<dd><p>Delete an object if versioning is not enabled for the bucket, or if generation
parameter is used.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket</strong> (<em>string</em>) – name of the bucket, where the object resides</li>
<li><strong>object</strong> (<em>string</em>) – name of the object to delete</li>
<li><strong>generation</strong> (<em>string</em>) – if present, permanently delete the object of this generation</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if succeeded</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download">
<code class="descname">download</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.download"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download" title="Permalink to this definition"></a></dt>
<dd><p>Get a file from Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to fetch from.</li>
<li><strong>object</strong> (<em>string</em>) – The object to fetch.</li>
<li><strong>filename</strong> (<em>string</em>) – If set, a local file path where the file should be written to.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists">
<code class="descname">exists</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists" title="Permalink to this definition"></a></dt>
<dd><p>Checks for the existence of a file in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Returns a Google Cloud Storage service object.</p>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c">
<code class="descname">get_crc32c</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_crc32c"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c" title="Permalink to this definition"></a></dt>
<dd><p>Gets the CRC32c checksum of an object in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash">
<code class="descname">get_md5hash</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_md5hash"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash" title="Permalink to this definition"></a></dt>
<dd><p>Gets the MD5 hash of an object in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size">
<code class="descname">get_size</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_size"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size" title="Permalink to this definition"></a></dt>
<dd><p>Gets the size of a file in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud storage bucket.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_bucket_acl">
<code class="descname">insert_bucket_acl</code><span class="sig-paren">(</span><em>bucket</em>, <em>entity</em>, <em>role</em>, <em>user_project</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.insert_bucket_acl"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_bucket_acl" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new ACL entry on the specified bucket.
See: <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert">https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li>
<li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms:
user-userId, user-email, group-groupId, group-email, domain-domain,
project-team-projectId, allUsers, allAuthenticatedUsers.
See: <a class="reference external" href="https://cloud.google.com/storage/docs/access-control/lists#scopes">https://cloud.google.com/storage/docs/access-control/lists#scopes</a></li>
<li><strong>role</strong> (<em>str</em>) – The access permission for the entity.
Acceptable values are: “OWNER”, “READER”, “WRITER”.</li>
<li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request.
Required for Requester Pays buckets.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_object_acl">
<code class="descname">insert_object_acl</code><span class="sig-paren">(</span><em>bucket</em>, <em>object_name</em>, <em>entity</em>, <em>role</em>, <em>generation</em>, <em>user_project</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.insert_object_acl"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_object_acl" title="Permalink to this definition"></a></dt>
<dd><p>Creates a new ACL entry on the specified object.
See: <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert">https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert</a></p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li>
<li><strong>object_name</strong> (<em>str</em>) – Name of the object. For information about how to URL encode
object names to be path safe, see:
<a class="reference external" href="https://cloud.google.com/storage/docs/json_api/#encoding">https://cloud.google.com/storage/docs/json_api/#encoding</a></li>
<li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms:
user-userId, user-email, group-groupId, group-email, domain-domain,
project-team-projectId, allUsers, allAuthenticatedUsers
See: <a class="reference external" href="https://cloud.google.com/storage/docs/access-control/lists#scopes">https://cloud.google.com/storage/docs/access-control/lists#scopes</a></li>
<li><strong>role</strong> (<em>str</em>) – The access permission for the entity.
Acceptable values are: “OWNER”, “READER”.</li>
<li><strong>generation</strong> (<em>str</em>) – (Optional) If present, selects a specific revision of this
object (as opposed to the latest version, the default).</li>
<li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request.
Required for Requester Pays buckets.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after">
<code class="descname">is_updated_after</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>ts</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.is_updated_after"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after" title="Permalink to this definition"></a></dt>
<dd><p>Checks if an object is updated in Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li>
<li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud
storage bucket.</li>
<li><strong>ts</strong> (<em>datetime</em>) – The timestamp to check against.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list">
<code class="descname">list</code><span class="sig-paren">(</span><em>bucket</em>, <em>versions=None</em>, <em>maxResults=None</em>, <em>prefix=None</em>, <em>delimiter=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list" title="Permalink to this definition"></a></dt>
<dd><p>List all objects from the bucket with the give string prefix in name</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>bucket</strong> (<em>string</em>) – bucket name</li>
<li><strong>versions</strong> (<em>boolean</em>) – if true, list all versions of the objects</li>
<li><strong>maxResults</strong> (<em>integer</em>) – max count of items to return in a single page of responses</li>
<li><strong>prefix</strong> (<em>string</em>) – prefix string which filters objects whose name begin with
this prefix</li>
<li><strong>delimiter</strong> (<em>string</em>) – filters objects based on the delimiter (for e.g ‘.csv’)</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">a stream of object names matching the filtering criteria</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite">
<code class="descname">rewrite</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.rewrite"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite" title="Permalink to this definition"></a></dt>
<dd><p>Has the same functionality as copy, except that will work on files
over 5 TB, as well as when copying between locations and/or storage
classes.</p>
<p>destination_object can be omitted, in which case source_object is used.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li>
<li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li>
<li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to.</li>
<li><strong>destination_object</strong> – The (renamed) path of the object if given.
Can be omitted; then the same name is used.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="method">
<dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload">
<code class="descname">upload</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename</em>, <em>mime_type='application/octet-stream'</em>, <em>gzip=False</em>, <em>multipart=False</em>, <em>num_retries=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload" title="Permalink to this definition"></a></dt>
<dd><p>Uploads a local file to Google Cloud Storage.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to.</li>
<li><strong>object</strong> (<em>string</em>) – The object name to set when uploading the local file.</li>
<li><strong>filename</strong> (<em>string</em>) – The local file path to the file to be uploaded.</li>
<li><strong>mime_type</strong> (<em>str</em>) – The MIME type to set when uploading the file.</li>
<li><strong>gzip</strong> (<em>bool</em>) – Option to compress file for upload</li>
<li><strong>multipart</strong> (<em>bool</em><em> or </em><em>int</em>) – If True, the upload will be split into multiple HTTP requests. The
default size is 256MiB per request. Pass a number instead of True to
specify the request size, which must be a multiple of 262144 (256KiB).</li>
<li><strong>num_retries</strong> (<em>int</em>) – The number of times to attempt to re-upload the file (or individual
chunks, in the case of multipart uploads). Retries are attempted
with exponential backoff.</li>
</ul>
</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
<div class="section" id="gcptransferservicehook">
<h4>GCPTransferServiceHook<a class="headerlink" href="#gcptransferservicehook" title="Permalink to this headline"></a></h4>
<dl class="class">
<dt id="airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook">
<em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_transfer_hook.</code><code class="descname">GCPTransferServiceHook</code><span class="sig-paren">(</span><em>api_version='v1'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_transfer_hook.html#GCPTransferServiceHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p>
<p>Hook for GCP Storage Transfer Service.</p>
<dl class="method">
<dt id="airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook.get_conn">
<code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_transfer_hook.html#GCPTransferServiceHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook.get_conn" title="Permalink to this definition"></a></dt>
<dd><p>Retrieves connection to Google Storage Transfer service.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Storage Transfer service object</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td>
</tr>
</tbody>
</table>
</dd></dl>
</dd></dl>
</div>
</div>
<div class="section" id="google-kubernetes-engine">
<h3>Google Kubernetes Engine<a class="headerlink" href="#google-kubernetes-engine" title="Permalink to this headline"></a></h3>
<div class="section" id="google-kubernetes-engine-cluster-operators">
<h4>Google Kubernetes Engine Cluster Operators<a class="headerlink" href="#google-kubernetes-engine-cluster-operators" title="Permalink to this headline"></a></h4>
<ul class="simple">
<li><a class="reference internal" href="#id128"><span class="std std-ref">GKEClusterDeleteOperator</span></a> : Creates a Kubernetes Cluster in Google Cloud Platform</li>
<li><a class="reference internal" href="#id129"><span class="std std-ref">GKEPodOperator</span></a> : Deletes a Kubernetes Cluster in Google Cloud Platform</li>
</ul>
<div class="section" id="gkeclustercreateoperator">
<h5>GKEClusterCreateOperator<a class="headerlink" href="#gkeclustercreateoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="gkeclusterdeleteoperator">
<span id="id128"></span><h5>GKEClusterDeleteOperator<a class="headerlink" href="#gkeclusterdeleteoperator" title="Permalink to this headline"></a></h5>
</div>
<div class="section" id="gkepodoperator">
<span id="id129"></span><h5>GKEPodOperator<a class="headerlink" href="#gkepodoperator" title="Permalink to this headline"></a></h5>
</div>
</div>
<div class="section" id="google-kubernetes-engine-hook">
<span id="id130"></span><h4>Google Kubernetes Engine Hook<a class="headerlink" href="#google-kubernetes-engine-hook" title="Permalink to this headline"></a></h4>
</div>
</div>
</div>
<div class="section" id="qubole">
<span id="id131"></span><h2>Qubole<a class="headerlink" href="#qubole" title="Permalink to this headline"></a></h2>
<p>Apache Airflow has a native operator and hooks to talk to <a class="reference external" href="https://qubole.com/">Qubole</a>,
which lets you submit your big data jobs directly to Qubole from Apache Airflow.</p>
<div class="section" id="quboleoperator">
<h3>QuboleOperator<a class="headerlink" href="#quboleoperator" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.operators.qubole_operator.QuboleOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_operator.</code><code class="descname">QuboleOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_operator.html#QuboleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_operator.QuboleOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p>
<p>Execute tasks (commands) on QDS (<a class="reference external" href="https://qubole.com">https://qubole.com</a>).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</td>
</tr>
</tbody>
</table>
<dl class="docutils">
<dt>kwargs:</dt>
<dd><table class="first docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">command_type:</th><td class="field-body">type of command to be executed, e.g. hivecmd, shellcmd, hadoopcmd</td>
</tr>
<tr class="field-even field"><th class="field-name">tags:</th><td class="field-body">array of tags to be assigned with the command</td>
</tr>
<tr class="field-odd field"><th class="field-name">cluster_label:</th><td class="field-body">cluster label on which the command will be executed</td>
</tr>
<tr class="field-even field"><th class="field-name">name:</th><td class="field-body">name to be given to command</td>
</tr>
<tr class="field-odd field"><th class="field-name">notify:</th><td class="field-body">whether to send email on command completion or not (default is False)</td>
</tr>
</tbody>
</table>
<p><strong>Arguments specific to command types</strong></p>
<dl class="last docutils">
<dt>hivecmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">query:</th><td class="field-body">inline query statement</td>
</tr>
<tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr>
<tr class="field-even field"><td>&#160;</td><td class="field-body">s3 location containing query statement</td>
</tr>
<tr class="field-odd field"><th class="field-name">sample_size:</th><td class="field-body">size of sample in bytes on which to run query</td>
</tr>
<tr class="field-even field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td>
</tr>
</tbody>
</table>
</dd>
<dt>prestocmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">query:</th><td class="field-body">inline query statement</td>
</tr>
<tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr>
<tr class="field-even field"><td>&#160;</td><td class="field-body">s3 location containing query statement</td>
</tr>
<tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td>
</tr>
</tbody>
</table>
</dd>
<dt>hadoopcmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">sub_commnad:</th><td class="field-body">must be one these [“jar”, “s3distcp”, “streaming”] followed by
1 or more args</td>
</tr>
</tbody>
</table>
</dd>
<dt>shellcmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">script:</th><td class="field-body">inline command with args</td>
</tr>
<tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr>
<tr class="field-even field"><td>&#160;</td><td class="field-body">s3 location containing query statement</td>
</tr>
<tr class="field-odd field"><th class="field-name">files:</th><td class="field-body">list of files in s3 bucket as file1,file2 format. These files will be
copied into the working directory where the qubole command is being
executed.</td>
</tr>
<tr class="field-even field"><th class="field-name">archives:</th><td class="field-body">list of archives in s3 bucket as archive1,archive2 format. These
will be unarchived intothe working directory where the qubole command is
being executed</td>
</tr>
<tr class="field-odd field"><th class="field-name">parameters:</th><td class="field-body">any extra args which need to be passed to script (only when
script_location is supplied)</td>
</tr>
</tbody>
</table>
</dd>
<dt>pigcmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">script:</th><td class="field-body">inline query statement (latin_statements)</td>
</tr>
<tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr>
<tr class="field-even field"><td>&#160;</td><td class="field-body">s3 location containing pig query</td>
</tr>
<tr class="field-odd field"><th class="field-name">parameters:</th><td class="field-body">any extra args which need to be passed to script (only when
script_location is supplied</td>
</tr>
</tbody>
</table>
</dd>
<dt>sparkcmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">program:</th><td class="field-body">the complete Spark Program in Scala, SQL, Command, R, or Python</td>
</tr>
<tr class="field-even field"><th class="field-name">cmdline:</th><td class="field-body">spark-submit command line, all required information must be specify
in cmdline itself.</td>
</tr>
<tr class="field-odd field"><th class="field-name">sql:</th><td class="field-body">inline sql query</td>
</tr>
<tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr>
<tr class="field-even field"><td>&#160;</td><td class="field-body">s3 location containing query statement</td>
</tr>
<tr class="field-odd field"><th class="field-name">language:</th><td class="field-body">language of the program, Scala, SQL, Command, R, or Python</td>
</tr>
<tr class="field-even field"><th class="field-name">app_id:</th><td class="field-body">ID of an Spark job server app</td>
</tr>
<tr class="field-odd field"><th class="field-name">arguments:</th><td class="field-body">spark-submit command line arguments</td>
</tr>
<tr class="field-even field"><th class="field-name" colspan="2">user_program_arguments:</th></tr>
<tr class="field-even field"><td>&#160;</td><td class="field-body">arguments that the user program takes in</td>
</tr>
<tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td>
</tr>
</tbody>
</table>
</dd>
<dt>dbtapquerycmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">db_tap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td>
</tr>
<tr class="field-even field"><th class="field-name">query:</th><td class="field-body">inline query statement</td>
</tr>
<tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td>
</tr>
</tbody>
</table>
</dd>
<dt>dbexportcmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">mode:</th><td class="field-body">1 (simple), 2 (advance)</td>
</tr>
<tr class="field-even field"><th class="field-name">hive_table:</th><td class="field-body">Name of the hive table</td>
</tr>
<tr class="field-odd field"><th class="field-name">partition_spec:</th><td class="field-body">partition specification for Hive table.</td>
</tr>
<tr class="field-even field"><th class="field-name">dbtap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td>
</tr>
<tr class="field-odd field"><th class="field-name">db_table:</th><td class="field-body">name of the db table</td>
</tr>
<tr class="field-even field"><th class="field-name">db_update_mode:</th><td class="field-body">allowinsert or updateonly</td>
</tr>
<tr class="field-odd field"><th class="field-name">db_update_keys:</th><td class="field-body">columns used to determine the uniqueness of rows</td>
</tr>
<tr class="field-even field"><th class="field-name">export_dir:</th><td class="field-body">HDFS/S3 location from which data will be exported.</td>
</tr>
<tr class="field-odd field"><th class="field-name" colspan="2">fields_terminated_by:</th></tr>
<tr class="field-odd field"><td>&#160;</td><td class="field-body">hex of the char used as column separator in the dataset</td>
</tr>
</tbody>
</table>
</dd>
<dt>dbimportcmd:</dt>
<dd><table class="first last docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">mode:</th><td class="field-body">1 (simple), 2 (advance)</td>
</tr>
<tr class="field-even field"><th class="field-name">hive_table:</th><td class="field-body">Name of the hive table</td>
</tr>
<tr class="field-odd field"><th class="field-name">dbtap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td>
</tr>
<tr class="field-even field"><th class="field-name">db_table:</th><td class="field-body">name of the db table</td>
</tr>
<tr class="field-odd field"><th class="field-name">where_clause:</th><td class="field-body">where clause, if any</td>
</tr>
<tr class="field-even field"><th class="field-name">parallelism:</th><td class="field-body">number of parallel db connections to use for extracting data</td>
</tr>
<tr class="field-odd field"><th class="field-name">extract_query:</th><td class="field-body">SQL query to extract data from db. $CONDITIONS must be part
of the where clause.</td>
</tr>
<tr class="field-even field"><th class="field-name">boundary_query:</th><td class="field-body">Query to be used get range of row IDs to be extracted</td>
</tr>
<tr class="field-odd field"><th class="field-name">split_column:</th><td class="field-body">Column used as row ID to split data into ranges (mode 2)</td>
</tr>
</tbody>
</table>
</dd>
</dl>
</dd>
</dl>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p>Following fields are template-supported : <code class="docutils literal notranslate"><span class="pre">query</span></code>, <code class="docutils literal notranslate"><span class="pre">script_location</span></code>,
<code class="docutils literal notranslate"><span class="pre">sub_command</span></code>, <code class="docutils literal notranslate"><span class="pre">script</span></code>, <code class="docutils literal notranslate"><span class="pre">files</span></code>, <code class="docutils literal notranslate"><span class="pre">archives</span></code>, <code class="docutils literal notranslate"><span class="pre">program</span></code>, <code class="docutils literal notranslate"><span class="pre">cmdline</span></code>,
<code class="docutils literal notranslate"><span class="pre">sql</span></code>, <code class="docutils literal notranslate"><span class="pre">where_clause</span></code>, <code class="docutils literal notranslate"><span class="pre">extract_query</span></code>, <code class="docutils literal notranslate"><span class="pre">boundary_query</span></code>, <code class="docutils literal notranslate"><span class="pre">macros</span></code>,
<code class="docutils literal notranslate"><span class="pre">tags</span></code>, <code class="docutils literal notranslate"><span class="pre">name</span></code>, <code class="docutils literal notranslate"><span class="pre">parameters</span></code>, <code class="docutils literal notranslate"><span class="pre">dbtap_id</span></code>, <code class="docutils literal notranslate"><span class="pre">hive_table</span></code>, <code class="docutils literal notranslate"><span class="pre">db_table</span></code>,
<code class="docutils literal notranslate"><span class="pre">split_column</span></code>, <code class="docutils literal notranslate"><span class="pre">note_id</span></code>, <code class="docutils literal notranslate"><span class="pre">db_update_keys</span></code>, <code class="docutils literal notranslate"><span class="pre">export_dir</span></code>,
<code class="docutils literal notranslate"><span class="pre">partition_spec</span></code>, <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code>, <code class="docutils literal notranslate"><span class="pre">arguments</span></code>, <code class="docutils literal notranslate"><span class="pre">user_program_arguments</span></code>.</p>
<blockquote class="last">
<div>You can also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template driven use cases.</div></blockquote>
</div>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">In QuboleOperator there is a default handler for task failures and retries,
which generally kills the command running at QDS for the corresponding task
instance. You can override this behavior by providing your own failure and retry
handler in task definition.</p>
</div>
</dd></dl>
</div>
<div class="section" id="qubolepartitionsensor">
<h3>QubolePartitionSensor<a class="headerlink" href="#qubolepartitionsensor" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.sensors.qubole_sensor.QubolePartitionSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.qubole_sensor.</code><code class="descname">QubolePartitionSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/qubole_sensor.html#QubolePartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.qubole_sensor.QubolePartitionSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.sensors.qubole_sensor.QuboleSensor" title="airflow.contrib.sensors.qubole_sensor.QuboleSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.qubole_sensor.QuboleSensor</span></code></a></p>
<p>Wait for a Hive partition to show up in QHS (Qubole Hive Service)
and check for its presence via QDS APIs</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</li>
<li><strong>data</strong> (<em>a JSON object</em>) – a JSON object containing payload, whose presence needs to be checked.
Check this <a class="reference external" href="https://github.com/apache/airflow/blob/master/airflow/contrib/example_dags/example_qubole_sensor.py">example</a> for sample payload
structure.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">Both <code class="docutils literal notranslate"><span class="pre">data</span></code> and <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code> fields support templating. You can
also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template-driven use cases.</p>
</div>
</dd></dl>
</div>
<div class="section" id="qubolefilesensor">
<h3>QuboleFileSensor<a class="headerlink" href="#qubolefilesensor" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.sensors.qubole_sensor.QuboleFileSensor">
<em class="property">class </em><code class="descclassname">airflow.contrib.sensors.qubole_sensor.</code><code class="descname">QuboleFileSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/qubole_sensor.html#QuboleFileSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.qubole_sensor.QuboleFileSensor" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.sensors.qubole_sensor.QuboleSensor" title="airflow.contrib.sensors.qubole_sensor.QuboleSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.qubole_sensor.QuboleSensor</span></code></a></p>
<p>Wait for a file or folder to be present in cloud storage
and check for its presence via QDS APIs</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</li>
<li><strong>data</strong> (<em>a JSON object</em>) – <p>a JSON object containing payload, whose presence needs to be checked
Check this <a class="reference external" href="https://github.com/apache/airflow/blob/master/airflow/contrib/example_dags/example_qubole_sensor.py">example</a> for sample payload
structure.</p>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">Both <code class="docutils literal notranslate"><span class="pre">data</span></code> and <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code> fields support templating. You can
also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template-driven use cases.</p>
</div>
</dd></dl>
</div>
<div class="section" id="qubolecheckoperator">
<h3>QuboleCheckOperator<a class="headerlink" href="#qubolecheckoperator" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.operators.qubole_check_operator.QuboleCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_check_operator.</code><code class="descname">QuboleCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_check_operator.html#QuboleCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_check_operator.QuboleCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a>, <a class="reference internal" href="#airflow.contrib.operators.qubole_operator.QuboleOperator" title="airflow.contrib.operators.qubole_operator.QuboleOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.qubole_operator.QuboleOperator</span></code></a></p>
<p>Performs checks against Qubole Commands. <code class="docutils literal notranslate"><span class="pre">QuboleCheckOperator</span></code> expects
a command that will be executed on QDS.
By default, each value on first row of the result of this Qubole Command
is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the
values return <code class="docutils literal notranslate"><span class="pre">False</span></code>, the check is failed and errors out.</p>
<p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">False</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">0</span></code></li>
<li>Empty string (<code class="docutils literal notranslate"><span class="pre">&quot;&quot;</span></code>)</li>
<li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li>
<li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li>
</ul>
<p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if
the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could,
for instance, check that the table has the same number of rows as
the source table upstream, or that the count of today’s partition is
greater than yesterday’s partition, or that a set of metrics are less
than 3 standard deviation for the 7 day average.</p>
<p>This operator can be used as a data quality check in your pipeline, and
depending on where you put it in your DAG, you have the choice to
stop the critical path, preventing from
publishing dubious data, or on the side and receive email alerts
without stopping the progress of the DAG.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</td>
</tr>
</tbody>
</table>
<p>kwargs:</p>
<blockquote>
<div><p>Arguments specific to Qubole command can be referred from QuboleOperator docs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name" colspan="2">results_parser_callable:</th></tr>
<tr class="field-odd field"><td>&#160;</td><td class="field-body">This is an optional parameter to
extend the flexibility of parsing the results of Qubole
command to the users. This is a python callable which
can hold the logic to parse list of rows returned by Qubole command.
By default, only the values on first row are used for performing checks.
This callable should return a list of records on
which the checks have to be performed.</td>
</tr>
</tbody>
</table>
</div></blockquote>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">All fields in common with template fields of
QuboleOperator and CheckOperator are template-supported.</p>
</div>
</dd></dl>
</div>
<div class="section" id="qubolevaluecheckoperator">
<h3>QuboleValueCheckOperator<a class="headerlink" href="#qubolevaluecheckoperator" title="Permalink to this headline"></a></h3>
<dl class="class">
<dt id="airflow.contrib.operators.qubole_check_operator.QuboleValueCheckOperator">
<em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_check_operator.</code><code class="descname">QuboleValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_check_operator.html#QuboleValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_check_operator.QuboleValueCheckOperator" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a>, <a class="reference internal" href="#airflow.contrib.operators.qubole_operator.QuboleOperator" title="airflow.contrib.operators.qubole_operator.QuboleOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.qubole_operator.QuboleOperator</span></code></a></p>
<p>Performs a simple value check using Qubole command.
By default, each value on the first row of this
Qubole command is compared with a pre-defined value.
The check fails and errors out if the output of the command
is not within the permissible limit of expected value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</li>
<li><strong>pass_value</strong> (<em>str/int/float</em>) – Expected value of the query results.</li>
<li><strong>tolerance</strong> (<em>int/float</em>) – Defines the permissible pass_value range, for example if
tolerance is 2, the Qubole command output can be anything between
-2*pass_value and 2*pass_value, without the operator erring out.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p>kwargs:</p>
<blockquote>
<div><p>Arguments specific to Qubole command can be referred from QuboleOperator docs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name" colspan="2">results_parser_callable:</th></tr>
<tr class="field-odd field"><td>&#160;</td><td class="field-body">This is an optional parameter to
extend the flexibility of parsing the results of Qubole
command to the users. This is a python callable which
can hold the logic to parse list of rows returned by Qubole command.
By default, only the values on first row are used for performing checks.
This callable should return a list of records on
which the checks have to be performed.</td>
</tr>
</tbody>
</table>
</div></blockquote>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">All fields in common with template fields of
QuboleOperator and ValueCheckOperator are template-supported.</p>
</div>
</dd></dl>
</div>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="metrics.html" class="btn btn-neutral float-right" title="Metrics" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="api.html" class="btn btn-neutral" title="Experimental Rest API" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>