| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>Integration — Airflow Documentation</title> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="_static/pygments.css" type="text/css" /> |
| <link rel="index" title="Index" href="genindex.html" /> |
| <link rel="search" title="Search" href="search.html" /> |
| <link rel="next" title="Lineage" href="lineage.html" /> |
| <link rel="prev" title="Experimental Rest API" href="api.html" /> |
| |
| |
| <script src="_static/js/modernizr.min.js"></script> |
| |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search"> |
| |
| |
| |
| <a href="index.html" class="icon icon-home"> Airflow |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="howto/index.html">How-to Guides</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling & Triggers</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="timezone.html">Time zones</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="api.html">Experimental Rest API</a></li> |
| <li class="toctree-l1 current"><a class="current reference internal" href="#">Integration</a><ul> |
| <li class="toctree-l2"><a class="reference internal" href="#reverse-proxy">Reverse Proxy</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="#azure-microsoft-azure">Azure: Microsoft Azure</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#azure-blob-storage">Azure Blob Storage</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#wasbblobsensor">WasbBlobSensor</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#wasbprefixsensor">WasbPrefixSensor</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#filetowasboperator">FileToWasbOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#wasbhook">WasbHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#azure-file-share">Azure File Share</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#azurefilesharehook">AzureFileShareHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#logging">Logging</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#azure-data-lake">Azure Data Lake</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#azuredatalakehook">AzureDataLakeHook</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#aws-amazon-web-services">AWS: Amazon Web Services</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-emr">AWS EMR</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#emraddstepsoperator">EmrAddStepsOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#emrcreatejobflowoperator">EmrCreateJobFlowOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#emrterminatejobflowoperator">EmrTerminateJobFlowOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#emrhook">EmrHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-s3">AWS S3</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#s3hook">S3Hook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3filetransformoperator">S3FileTransformOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3listoperator">S3ListOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3togooglecloudstorageoperator">S3ToGoogleCloudStorageOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3tohivetransfer">S3ToHiveTransfer</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-ec2-container-service">AWS EC2 Container Service</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#ecsoperator">ECSOperator</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-batch-service">AWS Batch Service</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#awsbatchoperator">AWSBatchOperator</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-redshift">AWS RedShift</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#awsredshiftclustersensor">AwsRedshiftClusterSensor</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#redshifthook">RedshiftHook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#redshifttos3transfer">RedshiftToS3Transfer</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3toredshifttransfer">S3ToRedshiftTransfer</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#databricks">Databricks</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#databrickssubmitrunoperator">DatabricksSubmitRunOperator</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#gcp-google-cloud-platform">GCP: Google Cloud Platform</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#id22">Logging</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#bigquery">BigQuery</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#bigquery-operators">BigQuery Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#bigqueryhook">BigQueryHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-sql">Cloud SQL</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-sql-operators">Cloud SQL Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-sql-hook">Cloud SQL Hook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#compute-engine">Compute Engine</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#compute-engine-operators">Compute Engine Operators</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-functions">Cloud Functions</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-functions-operators">Cloud Functions Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-functions-hook">Cloud Functions Hook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-dataflow">Cloud DataFlow</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#dataflow-operators">DataFlow Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#dataflowhook">DataFlowHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-dataproc">Cloud DataProc</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#dataproc-operators">DataProc Operators</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-datastore">Cloud Datastore</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#datastore-operators">Datastore Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datastorehook">DatastoreHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-ml-engine">Cloud ML Engine</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-operators">Cloud ML Engine Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-hook">Cloud ML Engine Hook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-storage">Cloud Storage</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#storage-operators">Storage Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#googlecloudstoragehook">GoogleCloudStorageHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#google-kubernetes-engine">Google Kubernetes Engine</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-cluster-operators">Google Kubernetes Engine Cluster Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-hook">Google Kubernetes Engine Hook</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="lineage.html">Lineage</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="code.html">API Reference</a></li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="index.html">Airflow</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="index.html">Docs</a> »</li> |
| |
| <li>Integration</li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| |
| <a href="_sources/integration.rst.txt" rel="nofollow"> View page source</a> |
| |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <div class="section" id="integration"> |
| <h1>Integration<a class="headerlink" href="#integration" title="Permalink to this headline">¶</a></h1> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#reverseproxy"><span class="std std-ref">Reverse Proxy</span></a></li> |
| <li><a class="reference internal" href="#azure"><span class="std std-ref">Azure: Microsoft Azure</span></a></li> |
| <li><a class="reference internal" href="#aws"><span class="std std-ref">AWS: Amazon Web Services</span></a></li> |
| <li><a class="reference internal" href="#databricks"><span class="std std-ref">Databricks</span></a></li> |
| <li><a class="reference internal" href="#gcp"><span class="std std-ref">GCP: Google Cloud Platform</span></a></li> |
| </ul> |
| <div class="section" id="reverse-proxy"> |
| <span id="reverseproxy"></span><h2>Reverse Proxy<a class="headerlink" href="#reverse-proxy" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow can be set up behind a reverse proxy, with the ability to set its endpoint with great |
| flexibility.</p> |
| <p>For example, you can configure your reverse proxy to get:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">lab</span><span class="o">.</span><span class="n">mycompany</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span><span class="o">/</span> |
| </pre></div> |
| </div> |
| <p>To do so, you need to set the following setting in your <cite>airflow.cfg</cite>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">base_url</span> <span class="o">=</span> <span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="n">my_host</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span> |
| </pre></div> |
| </div> |
| <p>Additionally if you use Celery Executor, you can get Flower in <cite>/myorg/flower</cite> with:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">flower_url_prefix</span> <span class="o">=</span> <span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">flower</span> |
| </pre></div> |
| </div> |
| <p>Your reverse proxy (ex: nginx) should be configured as follow:</p> |
| <ul> |
| <li><p class="first">pass the url and http header as it for the Airflow webserver, without any rewrite, for example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server { |
| listen 80; |
| server_name lab.mycompany.com; |
| |
| location /myorg/airflow/ { |
| proxy_pass http://localhost:8080; |
| proxy_set_header Host $host; |
| proxy_redirect off; |
| proxy_http_version 1.1; |
| proxy_set_header Upgrade $http_upgrade; |
| proxy_set_header Connection "upgrade"; |
| } |
| } |
| </pre></div> |
| </div> |
| </li> |
| <li><p class="first">rewrite the url for the flower endpoint:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server { |
| listen 80; |
| server_name lab.mycompany.com; |
| |
| location /myorg/flower/ { |
| rewrite ^/myorg/flower/(.*)$ /$1 break; # remove prefix from http header |
| proxy_pass http://localhost:5555; |
| proxy_set_header Host $host; |
| proxy_redirect off; |
| proxy_http_version 1.1; |
| proxy_set_header Upgrade $http_upgrade; |
| proxy_set_header Connection "upgrade"; |
| } |
| } |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| <p>To ensure that Airflow generates URLs with the correct scheme when |
| running behind a TLS-terminating proxy, you should configure the proxy |
| to set the <cite>X-Forwarded-Proto</cite> header, and enable the <cite>ProxyFix</cite> |
| middleware in your <cite>airflow.cfg</cite>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">enable_proxy_fix</span> <span class="o">=</span> <span class="kc">True</span> |
| </pre></div> |
| </div> |
| <p>Note: you should only enable the <cite>ProxyFix</cite> middleware when running |
| Airflow behind a trusted proxy (AWS ELB, nginx, etc.).</p> |
| </div> |
| <div class="section" id="azure-microsoft-azure"> |
| <span id="azure"></span><h2>Azure: Microsoft Azure<a class="headerlink" href="#azure-microsoft-azure" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow has limited support for Microsoft Azure: interfaces exist only for Azure Blob |
| Storage and Azure Data Lake. Hook, Sensor and Operator for Blob Storage and |
| Azure Data Lake Hook are in contrib section.</p> |
| <div class="section" id="azure-blob-storage"> |
| <h3>Azure Blob Storage<a class="headerlink" href="#azure-blob-storage" title="Permalink to this headline">¶</a></h3> |
| <p>All classes communicate via the Window Azure Storage Blob protocol. Make sure that a |
| Airflow connection of type <cite>wasb</cite> exists. Authorization can be done by supplying a |
| login (=Storage account name) and password (=KEY), or login and SAS token in the extra |
| field (see connection <cite>wasb_default</cite> for an example).</p> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#wasbblobsensor"><span class="std std-ref">WasbBlobSensor</span></a>: Checks if a blob is present on Azure Blob storage.</li> |
| <li><a class="reference internal" href="#wasbprefixsensor"><span class="std std-ref">WasbPrefixSensor</span></a>: Checks if blobs matching a prefix are present on Azure Blob storage.</li> |
| <li><a class="reference internal" href="#filetowasboperator"><span class="std std-ref">FileToWasbOperator</span></a>: Uploads a local file to a container as a blob.</li> |
| <li><a class="reference internal" href="#wasbhook"><span class="std std-ref">WasbHook</span></a>: Interface with Azure Blob Storage.</li> |
| </ul> |
| <div class="section" id="wasbblobsensor"> |
| <span id="id1"></span><h4>WasbBlobSensor<a class="headerlink" href="#wasbblobsensor" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbBlobSensor</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>wasb_conn_id='wasb_default'</em>, <em>check_options=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a blob to arrive on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.check_for_blob()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="wasbprefixsensor"> |
| <span id="id2"></span><h4>WasbPrefixSensor<a class="headerlink" href="#wasbprefixsensor" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbPrefixSensor</code><span class="sig-paren">(</span><em>container_name</em>, <em>prefix</em>, <em>wasb_conn_id='wasb_default'</em>, <em>check_options=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for blobs matching a prefix to arrive on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.check_for_prefix()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="filetowasboperator"> |
| <span id="id3"></span><h4>FileToWasbOperator<a class="headerlink" href="#filetowasboperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_wasb.</code><code class="descname">FileToWasbOperator</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>wasb_conn_id='wasb_default'</em>, <em>load_options=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Uploads a file to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load. (templated)</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container. (templated)</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob. (templated)</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>load_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.load_file()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Blob Storage.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="wasbhook"> |
| <span id="id4"></span><h4>WasbHook<a class="headerlink" href="#wasbhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.wasb_hook.</code><code class="descname">WasbHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure Blob Storage through the wasb:// protocol.</p> |
| <p>Additional options passed in the ‘extra’ field of the connection will be |
| passed to the <cite>BlockBlockService()</cite> constructor. For example, authenticate |
| using a SAS token by adding {“sas_token”: “YOUR_TOKEN”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob"> |
| <code class="descname">check_for_blob</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_blob"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a blob exists on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if the blob exists, False otherwise.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:rtype bool</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix"> |
| <code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>container_name</em>, <em>prefix</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a prefix exists on Azure Blob storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.list_blobs()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if blobs matching the prefix exist, False otherwise.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:rtype bool</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.delete_file"> |
| <code class="descname">delete_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>is_prefix=False</em>, <em>ignore_if_missing=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.delete_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.delete_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>is_prefix</strong> (<em>bool</em>) – If blob_name is a prefix, delete all matching files</li> |
| <li><strong>ignore_if_missing</strong> – if True, then return success even if the</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>blob does not exist. |
| :type ignore_if_missing: bool |
| :param kwargs: Optional keyword arguments that</p> |
| <blockquote> |
| <div><cite>BlockBlobService.create_blob_from_path()</cite> takes.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the BlockBlobService object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_file"> |
| <code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to download.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a string to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – String to load.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_text()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.read_file"> |
| <code class="descname">read_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.read_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.read_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Read a file from Azure Blob Storage and return as a string.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="azure-file-share"> |
| <h3>Azure File Share<a class="headerlink" href="#azure-file-share" title="Permalink to this headline">¶</a></h3> |
| <p>Cloud variant of a SMB file share. Make sure that a Airflow connection of |
| type <cite>wasb</cite> exists. Authorization can be done by supplying a login (=Storage account name) |
| and password (=Storage account key), or login and SAS token in the extra field |
| (see connection <cite>wasb_default</cite> for an example).</p> |
| <div class="section" id="azurefilesharehook"> |
| <h4>AzureFileShareHook<a class="headerlink" href="#azurefilesharehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_fileshare_hook.</code><code class="descname">AzureFileShareHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure FileShare Storage.</p> |
| <p>Additional options passed in the ‘extra’ field of the connection will be |
| passed to the <cite>FileService()</cite> constructor.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory"> |
| <code class="descname">check_for_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a directory exists on Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if the file exists, False otherwise.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:rtype bool</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file"> |
| <code class="descname">check_for_file</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a file exists on Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if the file exists, False otherwise.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:rtype bool</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory"> |
| <code class="descname">create_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.create_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a new direcotry on a Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_directory()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A list of files and directories</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:rtype list</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the FileService object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file"> |
| <code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Where to store the file.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.get_file_to_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream"> |
| <code class="descname">get_file_to_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file_to_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>stream</strong> (<em>file-like object</em>) – A filehandle to store the file to.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.get_file_to_stream()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files"> |
| <code class="descname">list_directories_and_files</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.list_directories_and_files"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the list of directories and files stored on a Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.list_directories_and_files()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A list of files and directories</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:rtype list</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream"> |
| <code class="descname">load_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>count</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a stream to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>stream</strong> (<em>file-like</em>) – Opened file/stream to upload as the file content.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>count</strong> (<em>int</em>) – Size of the stream in bytes</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_stream()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a string to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – String to load.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_text()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="logging"> |
| <h3>Logging<a class="headerlink" href="#logging" title="Permalink to this headline">¶</a></h3> |
| <p>Airflow can be configured to read and write task logs in Azure Blob Storage. |
| See <a class="reference internal" href="howto/write-logs.html#write-logs-azure"><span class="std std-ref">Writing Logs to Azure Blob Storage</span></a>.</p> |
| </div> |
| <div class="section" id="azure-data-lake"> |
| <h3>Azure Data Lake<a class="headerlink" href="#azure-data-lake" title="Permalink to this headline">¶</a></h3> |
| <p>AzureDataLakeHook communicates via a REST API compatible with WebHDFS. Make sure that a |
| Airflow connection of type <cite>azure_data_lake</cite> exists. Authorization can be done by supplying a |
| login (=Client ID), password (=Client Secret) and extra fields tenant (Tenant) and account_name (Account Name)</p> |
| <blockquote> |
| <div>(see connection <cite>azure_data_lake_default</cite> for an example).</div></blockquote> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#azuredatalakehook"><span class="std std-ref">AzureDataLakeHook</span></a>: Interface with Azure Data Lake.</li> |
| </ul> |
| <div class="section" id="azuredatalakehook"> |
| <span id="id5"></span><h4>AzureDataLakeHook<a class="headerlink" href="#azuredatalakehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_data_lake_hook.</code><code class="descname">AzureDataLakeHook</code><span class="sig-paren">(</span><em>azure_data_lake_conn_id='azure_data_lake_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure Data Lake.</p> |
| <p>Client ID and client secret should be in user and password parameters. |
| Tenant and account name should be extra field as |
| {“tenant”: “<TENANT>”, “account_name”: “ACCOUNT_NAME”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – Reference to the Azure Data Lake connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file"> |
| <code class="descname">check_for_file</code><span class="sig-paren">(</span><em>file_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a file exists on Azure Data Lake.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>file_path</strong> (<em>str</em>) – Path and name of the file.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">True if the file exists, False otherwise.</td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:rtype bool</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file"> |
| <code class="descname">download_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.download_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>local_path</strong> (<em>str</em>) – local path. If downloading a single file, will write to this |
| specific file, unless it is an existing directory, in which case a file is |
| created within it. If downloading multiple files, this is the root |
| directory to write within. Will create directories as required.</li> |
| <li><strong>remote_path</strong> (<em>str</em>) – remote path/globstring to use to find remote files. |
| Recursive glob patterns using <cite>**</cite> are not supported.</li> |
| <li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li> |
| <li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories. |
| If False and remote path is a directory, will quit regardless if any files |
| would be overwritten or not. If True, only matching filenames are actually |
| overwritten.</li> |
| <li><strong>buffersize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for internal buffer. This block cannot be bigger than |
| a chunk and cannot be smaller than a block.</li> |
| <li><strong>blocksize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for a block. Within each chunk, we write a smaller |
| block for each API call. This block cannot be bigger than a chunk.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return a AzureDLFileSystem object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file"> |
| <code class="descname">upload_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.upload_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Data Lake.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>local_path</strong> (<em>str</em>) – local path. Can be single file, directory (in which case, |
| upload recursively) or glob pattern. Recursive glob patterns using <cite>**</cite> |
| are not supported.</li> |
| <li><strong>remote_path</strong> (<em>str</em>) – Remote path to upload to; if multiple files, this is the |
| dircetory root to write within.</li> |
| <li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li> |
| <li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories. |
| If False and remote path is a directory, will quit regardless if any files |
| would be overwritten or not. If True, only matching filenames are actually |
| overwritten.</li> |
| <li><strong>buffersize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for internal buffer. This block cannot be bigger than |
| a chunk and cannot be smaller than a block.</li> |
| <li><strong>blocksize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for a block. Within each chunk, we write a smaller |
| block for each API call. This block cannot be bigger than a chunk.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="aws-amazon-web-services"> |
| <span id="aws"></span><h2>AWS: Amazon Web Services<a class="headerlink" href="#aws-amazon-web-services" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow has extensive support for Amazon Web Services. But note that the Hooks, Sensors and |
| Operators are in the contrib section.</p> |
| <div class="section" id="aws-emr"> |
| <h3>AWS EMR<a class="headerlink" href="#aws-emr" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#emraddstepsoperator"><span class="std std-ref">EmrAddStepsOperator</span></a> : Adds steps to an existing EMR JobFlow.</li> |
| <li><a class="reference internal" href="#emrcreatejobflowoperator"><span class="std std-ref">EmrCreateJobFlowOperator</span></a> : Creates an EMR JobFlow, reading the config from the EMR connection.</li> |
| <li><a class="reference internal" href="#emrterminatejobflowoperator"><span class="std std-ref">EmrTerminateJobFlowOperator</span></a> : Terminates an EMR JobFlow.</li> |
| <li><a class="reference internal" href="#emrhook"><span class="std std-ref">EmrHook</span></a> : Interact with AWS EMR.</li> |
| </ul> |
| <div class="section" id="emraddstepsoperator"> |
| <span id="id6"></span><h4>EmrAddStepsOperator<a class="headerlink" href="#emraddstepsoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_add_steps_operator.</code><code class="descname">EmrAddStepsOperator</code><span class="sig-paren">(</span><em>job_flow_id</em>, <em>aws_conn_id='s3_default'</em>, <em>steps=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_add_steps_operator.html#EmrAddStepsOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>An operator that adds steps to an existing EMR job_flow.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_flow_id</strong> – id of the JobFlow to add steps to. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| <li><strong>steps</strong> (<em>list</em>) – boto3 style steps to be added to the jobflow. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_add_steps_operator.html#EmrAddStepsOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="emrcreatejobflowoperator"> |
| <span id="id7"></span><h4>EmrCreateJobFlowOperator<a class="headerlink" href="#emrcreatejobflowoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_create_job_flow_operator.</code><code class="descname">EmrCreateJobFlowOperator</code><span class="sig-paren">(</span><em>aws_conn_id='s3_default'</em>, <em>emr_conn_id='emr_default'</em>, <em>job_flow_overrides=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_create_job_flow_operator.html#EmrCreateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates an EMR JobFlow, reading the config from the EMR connection. |
| A dictionary of JobFlow overrides can be passed that override |
| the config from the connection.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| <li><strong>emr_conn_id</strong> (<em>str</em>) – emr connection to use</li> |
| <li><strong>job_flow_overrides</strong> – boto3 style arguments to override |
| emr_connection extra. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_create_job_flow_operator.html#EmrCreateJobFlowOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="emrterminatejobflowoperator"> |
| <span id="id8"></span><h4>EmrTerminateJobFlowOperator<a class="headerlink" href="#emrterminatejobflowoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_terminate_job_flow_operator.</code><code class="descname">EmrTerminateJobFlowOperator</code><span class="sig-paren">(</span><em>job_flow_id</em>, <em>aws_conn_id='s3_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_terminate_job_flow_operator.html#EmrTerminateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator to terminate EMR JobFlows.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_flow_id</strong> – id of the JobFlow to terminate. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_terminate_job_flow_operator.html#EmrTerminateJobFlowOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="emrhook"> |
| <span id="id9"></span><h4>EmrHook<a class="headerlink" href="#emrhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.emr_hook.EmrHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.emr_hook.</code><code class="descname">EmrHook</code><span class="sig-paren">(</span><em>emr_conn_id=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS EMR. emr_conn_id is only neccessary for using the |
| create_job_flow method.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow"> |
| <code class="descname">create_job_flow</code><span class="sig-paren">(</span><em>job_flow_overrides</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook.create_job_flow"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a job flow using the config from the EMR connection. |
| Keys of the json extra hash may have the arguments of the boto3 |
| run_job_flow method. |
| Overrides for this config may be passed as the job_flow_overrides.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-s3"> |
| <h3>AWS S3<a class="headerlink" href="#aws-s3" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#s3hook"><span class="std std-ref">S3Hook</span></a> : Interact with AWS S3.</li> |
| <li><a class="reference internal" href="#s3filetransformoperator"><span class="std std-ref">S3FileTransformOperator</span></a> : Copies data from a source S3 location to a temporary location on the local filesystem.</li> |
| <li><a class="reference internal" href="#s3listoperator"><span class="std std-ref">S3ListOperator</span></a> : Lists the files matching a key prefix from a S3 location.</li> |
| <li><a class="reference internal" href="#s3togooglecloudstorageoperator"><span class="std std-ref">S3ToGoogleCloudStorageOperator</span></a> : Syncs an S3 location with a Google Cloud Storage bucket.</li> |
| <li><a class="reference internal" href="#s3tohivetransfer"><span class="std std-ref">S3ToHiveTransfer</span></a> : Moves data from S3 to Hive. The operator downloads a file from S3, stores the file locally before loading it into a Hive table.</li> |
| </ul> |
| <div class="section" id="s3hook"> |
| <span id="id10"></span><h4>S3Hook<a class="headerlink" href="#s3hook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.hooks.S3_hook.S3Hook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.S3_hook.</code><code class="descname">S3Hook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS S3, using the boto3 library.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_bucket"> |
| <code class="descname">check_for_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if bucket_name exists.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_key"> |
| <code class="descname">check_for_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a key exists in a bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_prefix"> |
| <code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix</em>, <em>delimiter</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks that a prefix exists in a bucket</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key"> |
| <code class="descname">check_for_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks that a key matching a wildcard expression exists in a bucket</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.copy_object"> |
| <code class="descname">copy_object</code><span class="sig-paren">(</span><em>source_bucket_key</em>, <em>dest_bucket_key</em>, <em>source_bucket_name=None</em>, <em>dest_bucket_name=None</em>, <em>source_version_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.copy_object"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.copy_object" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a copy of an object that is already stored in S3.</p> |
| <p>Note: the S3 connection used here needs to have access to both |
| source and destination bucket/key.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket_key</strong> (<em>str</em>) – <p>The key of the source object.</p> |
| <p>It can be either full s3:// style url or relative path from root level.</p> |
| <p>When it’s specified as a full s3:// url, please omit source_bucket_name.</p> |
| </li> |
| <li><strong>dest_bucket_key</strong> (<em>str</em>) – <p>The key of the object to copy to.</p> |
| <p>The convention to specify <cite>dest_bucket_key</cite> is the same |
| as <cite>source_bucket_key</cite>.</p> |
| </li> |
| <li><strong>source_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket where the source object is in.</p> |
| <p>It should be omitted when <cite>source_bucket_key</cite> is provided as a full s3:// url.</p> |
| </li> |
| <li><strong>dest_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket to where the object is copied.</p> |
| <p>It should be omitted when <cite>dest_bucket_key</cite> is provided as a full s3:// url.</p> |
| </li> |
| <li><strong>source_version_id</strong> (<em>str</em>) – Version ID of the source object (OPTIONAL)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.create_bucket"> |
| <code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>region_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.create_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates an Amazon S3 bucket.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – The name of the bucket</li> |
| <li><strong>region_name</strong> (<em>str</em>) – The name of the aws region in which to create the bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.delete_objects"> |
| <code class="descname">delete_objects</code><span class="sig-paren">(</span><em>bucket</em>, <em>keys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.delete_objects"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.delete_objects" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of the bucket in which you are going to delete object(s)</li> |
| <li><strong>keys</strong> (<em>str</em><em> or </em><em>list</em>) – <p>The key(s) to delete from S3 bucket.</p> |
| <p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a string, it’s supposed to be the key name of |
| the single object to delete.</p> |
| <p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a list, it’s supposed to be the list of the |
| keys to delete.</p> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_bucket"> |
| <code class="descname">get_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.S3.Bucket object</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_key"> |
| <code class="descname">get_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.s3.Object</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_wildcard_key"> |
| <code class="descname">get_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_wildcard_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.s3.Object object matching the wildcard expression</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.list_keys"> |
| <code class="descname">list_keys</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_keys" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists keys in a bucket under prefix and not containing delimiter</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| <li><strong>page_size</strong> (<em>int</em>) – pagination size</li> |
| <li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.list_prefixes"> |
| <code class="descname">list_prefixes</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_prefixes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_prefixes" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists prefixes in a bucket under prefix</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| <li><strong>page_size</strong> (<em>int</em>) – pagination size</li> |
| <li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_bytes"> |
| <code class="descname">load_bytes</code><span class="sig-paren">(</span><em>bytes_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_bytes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_bytes" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads bytes to S3</p> |
| <p>This is provided as a convenience to drop a string in S3. It uses the |
| boto infrastructure to ship a file to s3.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bytes_data</strong> (<em>bytes</em>) – bytes to set as content for the key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>filename</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a local file to S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>filename</strong> (<em>str</em>) – name of the file to load.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists. If replace is False and the key exists, an |
| error will be raised.</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_file_obj"> |
| <code class="descname">load_file_obj</code><span class="sig-paren">(</span><em>file_obj</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file_obj"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file_obj" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a file object to S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_obj</strong> (<em>file-like object</em>) – The file-like object to set as the content for the S3 key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag that indicates whether to overwrite the key |
| if it already exists.</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, S3 encrypts the file on the server, |
| and the file is stored in encrypted form at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em>, <em>encoding='utf-8'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a string to S3</p> |
| <p>This is provided as a convenience to drop a string in S3. It uses the |
| boto infrastructure to ship a file to s3.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – string to set as content for the key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.read_key"> |
| <code class="descname">read_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.read_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.read_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reads a key from S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.select_key"> |
| <code class="descname">select_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em>, <em>expression='SELECT * FROM S3Object'</em>, <em>expression_type='SQL'</em>, <em>input_serialization={'CSV': {}}</em>, <em>output_serialization={'CSV': {}}</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.select_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.select_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reads a key with S3 Select.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| <li><strong>expression</strong> (<em>str</em>) – S3 Select expression</li> |
| <li><strong>expression_type</strong> (<em>str</em>) – S3 Select expression type</li> |
| <li><strong>input_serialization</strong> (<em>dict</em>) – S3 Select input data serialization format</li> |
| <li><strong>output_serialization</strong> (<em>dict</em>) – S3 Select output data serialization format</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">retrieved subset of original data by S3 Select</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about S3 Select parameters: |
| <a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content">http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content</a></p> |
| </div> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3filetransformoperator"> |
| <span id="id11"></span><h4>S3FileTransformOperator<a class="headerlink" href="#s3filetransformoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.s3_file_transform_operator.S3FileTransformOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_file_transform_operator.</code><code class="descname">S3FileTransformOperator</code><span class="sig-paren">(</span><em>source_s3_key</em>, <em>dest_s3_key</em>, <em>transform_script=None</em>, <em>select_expression=None</em>, <em>source_aws_conn_id='aws_default'</em>, <em>dest_aws_conn_id='aws_default'</em>, <em>replace=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_file_transform_operator.html#S3FileTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_file_transform_operator.S3FileTransformOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies data from a source S3 location to a temporary location on the |
| local filesystem. Runs a transformation on this file as specified by |
| the transformation script and uploads the output to a destination S3 |
| location.</p> |
| <p>The locations of the source and the destination files in the local |
| filesystem is provided as an first and second arguments to the |
| transformation script. The transformation script is expected to read the |
| data from source, transform it and write the output to the local |
| destination file. The operator then takes over control and uploads the |
| local destination file to S3.</p> |
| <p>S3 Select is also available to filter the source contents. Users can |
| omit the transformation script if S3 Select expression is specified.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li> |
| <li><strong>source_aws_conn_id</strong> (<em>str</em>) – source s3 connection</li> |
| <li><strong>dest_s3_key</strong> (<em>str</em>) – The key to be written from S3. (templated)</li> |
| <li><strong>dest_aws_conn_id</strong> (<em>str</em>) – destination s3 connection</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Replace dest S3 key if it already exists</li> |
| <li><strong>transform_script</strong> (<em>str</em>) – location of the executable transformation script</li> |
| <li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.s3_file_transform_operator.S3FileTransformOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_file_transform_operator.html#S3FileTransformOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_file_transform_operator.S3FileTransformOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3listoperator"> |
| <span id="id12"></span><h4>S3ListOperator<a class="headerlink" href="#s3listoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.s3_list_operator.S3ListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_list_operator.</code><code class="descname">S3ListOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>aws_conn_id='aws_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_list_operator.html#S3ListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all objects from the bucket with the given string prefix in name.</p> |
| <p>This operator returns a python list with the name of objects which can be |
| used by <cite>xcom</cite> in the downstream task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string to filters the objects whose name begin with |
| such prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The connection ID to use when connecting to S3 storage.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following operator would list all the files |
| (excluding subfolders) from the S3 |
| <code class="docutils literal notranslate"><span class="pre">customers/2018/04/</span></code> key in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">s3_file</span> <span class="o">=</span> <span class="n">S3ListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'list_3s_files'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'customers/2018/04/'</span><span class="p">,</span> |
| <span class="n">delimiter</span><span class="o">=</span><span class="s1">'/'</span><span class="p">,</span> |
| <span class="n">aws_conn_id</span><span class="o">=</span><span class="s1">'aws_customers_conn'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.s3_list_operator.S3ListOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_list_operator.html#S3ListOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3togooglecloudstorageoperator"> |
| <span id="id13"></span><h4>S3ToGoogleCloudStorageOperator<a class="headerlink" href="#s3togooglecloudstorageoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_to_gcs_operator.</code><code class="descname">S3ToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>aws_conn_id='aws_default'</em>, <em>dest_gcs_conn_id=None</em>, <em>dest_gcs=None</em>, <em>delegate_to=None</em>, <em>replace=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_operator.html#S3ToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="airflow.contrib.operators.s3_list_operator.S3ListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.s3_list_operator.S3ListOperator</span></code></a></p> |
| <p>Synchronizes an S3 key, possibly a prefix, with a Google Cloud Storage |
| destination path.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with |
| such prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The source S3 connection</li> |
| <li><strong>dest_gcs_conn_id</strong> (<em>string</em>) – The destination connection ID to use |
| when connecting to Google Cloud Storage.</li> |
| <li><strong>dest_gcs</strong> (<em>string</em>) – The destination Google Cloud Storage bucket and prefix |
| where you want to store the files. (templated)</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Whether you want to replace existing destination files |
| or not.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Example</strong>: |
| .. code-block:: python</p> |
| <blockquote> |
| <div><dl class="docutils"> |
| <dt>s3_to_gcs_op = S3ToGoogleCloudStorageOperator(</dt> |
| <dd>task_id=’s3_to_gcs_example’, |
| bucket=’my-s3-bucket’, |
| prefix=’data/customers-201804’, |
| dest_gcs_conn_id=’google_cloud_default’, |
| dest_gcs=’gs://my.gcs.bucket/some/customers/’, |
| replace=False, |
| dag=my-dag)</dd> |
| </dl> |
| </div></blockquote> |
| <p>Note that <code class="docutils literal notranslate"><span class="pre">bucket</span></code>, <code class="docutils literal notranslate"><span class="pre">prefix</span></code>, <code class="docutils literal notranslate"><span class="pre">delimiter</span></code> and <code class="docutils literal notranslate"><span class="pre">dest_gcs</span></code> are |
| templated, so you can use variables in them if you wish.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_operator.html#S3ToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3tohivetransfer"> |
| <span id="id14"></span><h4>S3ToHiveTransfer<a class="headerlink" href="#s3tohivetransfer" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.s3_to_hive_operator.S3ToHiveTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_to_hive_operator.</code><code class="descname">S3ToHiveTransfer</code><span class="sig-paren">(</span><em>s3_key</em>, <em>field_dict</em>, <em>hive_table</em>, <em>delimiter='</em>, <em>'</em>, <em>create=True</em>, <em>recreate=False</em>, <em>partition=None</em>, <em>headers=False</em>, <em>check_headers=False</em>, <em>wildcard_match=False</em>, <em>aws_conn_id='aws_default'</em>, <em>hive_cli_conn_id='hive_cli_default'</em>, <em>input_compressed=False</em>, <em>tblproperties=None</em>, <em>select_expression=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_hive_operator.html#S3ToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_hive_operator.S3ToHiveTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from S3 to Hive. The operator downloads a file from S3, |
| stores the file locally before loading it into a Hive table. |
| If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, |
| a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated. |
| Hive data types are inferred from the cursor’s metadata from.</p> |
| <p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the tables gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li> |
| <li><strong>field_dict</strong> (<em>dict</em>) – A dictionary of the fields name in the file |
| as keys and their Hive types as values</li> |
| <li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a |
| specific database. (templated)</li> |
| <li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li> |
| <li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every |
| execution</li> |
| <li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns |
| and values. (templated)</li> |
| <li><strong>headers</strong> (<em>bool</em>) – whether the file contains column names on the first |
| line</li> |
| <li><strong>check_headers</strong> (<em>bool</em>) – whether the column names on the first line should be |
| checked against the keys of field_dict</li> |
| <li><strong>wildcard_match</strong> (<em>bool</em>) – whether the s3_key should be interpreted as a Unix |
| wildcard pattern</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – source s3 connection</li> |
| <li><strong>hive_cli_conn_id</strong> (<em>str</em>) – destination hive connection</li> |
| <li><strong>input_compressed</strong> (<em>bool</em>) – Boolean to determine if file decompression is |
| required to process headers</li> |
| <li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li> |
| <li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.s3_to_hive_operator.S3ToHiveTransfer.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_hive_operator.html#S3ToHiveTransfer.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_hive_operator.S3ToHiveTransfer.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-ec2-container-service"> |
| <h3>AWS EC2 Container Service<a class="headerlink" href="#aws-ec2-container-service" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#ecsoperator"><span class="std std-ref">ECSOperator</span></a> : Execute a task on AWS EC2 Container Service.</li> |
| </ul> |
| <div class="section" id="ecsoperator"> |
| <span id="id15"></span><h4>ECSOperator<a class="headerlink" href="#ecsoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.ecs_operator.ECSOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.ecs_operator.</code><code class="descname">ECSOperator</code><span class="sig-paren">(</span><em>task_definition</em>, <em>cluster</em>, <em>overrides</em>, <em>aws_conn_id=None</em>, <em>region_name=None</em>, <em>launch_type='EC2'</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a task on AWS EC2 Container Service</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>task_definition</strong> (<em>str</em>) – the task definition name on EC2 Container Service</li> |
| <li><strong>cluster</strong> (<em>str</em>) – the cluster name on EC2 Container Service</li> |
| <li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive (templated): |
| <a class="reference external" href="http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task">http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task</a></li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None, |
| credential boto3 strategy will be used |
| (<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li> |
| <li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook. |
| Override the region_name in connection (if provided)</li> |
| <li><strong>launch_type</strong> (<em>str</em>) – the launch type on which to run your task (‘EC2’ or ‘FARGATE’)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.ecs_operator.ECSOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.ecs_operator.ECSOperator.on_kill"> |
| <code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator.on_kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Override this method to cleanup subprocesses when a task instance |
| gets killed. Any use of the threading, subprocess or multiprocessing |
| module within an operator needs to be cleaned up or it will leave |
| ghost processes behind.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-batch-service"> |
| <h3>AWS Batch Service<a class="headerlink" href="#aws-batch-service" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#awsbatchoperator"><span class="std std-ref">AWSBatchOperator</span></a> : Execute a task on AWS Batch Service.</li> |
| </ul> |
| <div class="section" id="awsbatchoperator"> |
| <span id="id16"></span><h4>AWSBatchOperator<a class="headerlink" href="#awsbatchoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.awsbatch_operator.</code><code class="descname">AWSBatchOperator</code><span class="sig-paren">(</span><em>job_name</em>, <em>job_definition</em>, <em>job_queue</em>, <em>overrides</em>, <em>max_retries=4200</em>, <em>aws_conn_id=None</em>, <em>region_name=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a job on AWS Batch Service</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – the name for the job that will run on AWS Batch</li> |
| <li><strong>job_definition</strong> (<em>str</em>) – the job definition name on AWS Batch</li> |
| <li><strong>job_queue</strong> (<em>str</em>) – the queue name on AWS Batch</li> |
| <li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive on |
| containerOverrides (templated). |
| <a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job">http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job</a></li> |
| <li><strong>max_retries</strong> (<em>int</em>) – exponential backoff retries while waiter is not merged, |
| 4200 = 48 hours</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None, |
| credential boto3 strategy will be used |
| (<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li> |
| <li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook. |
| Override the region_name in connection (if provided)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.on_kill"> |
| <code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator.on_kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Override this method to cleanup subprocesses when a task instance |
| gets killed. Any use of the threading, subprocess or multiprocessing |
| module within an operator needs to be cleaned up or it will leave |
| ghost processes behind.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-redshift"> |
| <h3>AWS RedShift<a class="headerlink" href="#aws-redshift" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#awsredshiftclustersensor"><span class="std std-ref">AwsRedshiftClusterSensor</span></a> : Waits for a Redshift cluster to reach a specific status.</li> |
| <li><a class="reference internal" href="#redshifthook"><span class="std std-ref">RedshiftHook</span></a> : Interact with AWS Redshift, using the boto3 library.</li> |
| <li><a class="reference internal" href="#redshifttos3transfer"><span class="std std-ref">RedshiftToS3Transfer</span></a> : Executes an unload command to S3 as CSV with or without headers.</li> |
| <li><a class="reference internal" href="#s3toredshifttransfer"><span class="std std-ref">S3ToRedshiftTransfer</span></a> : Executes an copy command from S3 as CSV with or without headers.</li> |
| </ul> |
| <div class="section" id="awsredshiftclustersensor"> |
| <span id="id17"></span><h4>AwsRedshiftClusterSensor<a class="headerlink" href="#awsredshiftclustersensor" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.aws_redshift_cluster_sensor.</code><code class="descname">AwsRedshiftClusterSensor</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>target_status='available'</em>, <em>aws_conn_id='aws_default'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a Redshift cluster to reach a specific status.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – The identifier for the cluster being pinged.</li> |
| <li><strong>target_status</strong> (<em>str</em>) – The cluster status desired.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="redshifthook"> |
| <span id="id18"></span><h4>RedshiftHook<a class="headerlink" href="#redshifthook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.redshift_hook.</code><code class="descname">RedshiftHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS Redshift, using the boto3 library</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status"> |
| <code class="descname">cluster_status</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.cluster_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return status of a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot"> |
| <code class="descname">create_cluster_snapshot</code><span class="sig-paren">(</span><em>snapshot_identifier</em>, <em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.create_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a snapshot of a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster"> |
| <code class="descname">delete_cluster</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>skip_final_cluster_snapshot=True</em>, <em>final_cluster_snapshot_identifier=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.delete_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete a cluster and optionally create a snapshot</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| <li><strong>skip_final_cluster_snapshot</strong> (<em>bool</em>) – determines cluster snapshot creation</li> |
| <li><strong>final_cluster_snapshot_identifier</strong> (<em>str</em>) – name of final cluster snapshot</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots"> |
| <code class="descname">describe_cluster_snapshots</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.describe_cluster_snapshots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets a list of snapshots for a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot"> |
| <code class="descname">restore_from_cluster_snapshot</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>snapshot_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.restore_from_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Restores a cluster from its snapshot</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| <li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="redshifttos3transfer"> |
| <span id="id19"></span><h4>RedshiftToS3Transfer<a class="headerlink" href="#redshifttos3transfer" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.redshift_to_s3_operator.</code><code class="descname">RedshiftToS3Transfer</code><span class="sig-paren">(</span><em>schema</em>, <em>table</em>, <em>s3_bucket</em>, <em>s3_key</em>, <em>redshift_conn_id='redshift_default'</em>, <em>aws_conn_id='aws_default'</em>, <em>unload_options=()</em>, <em>autocommit=False</em>, <em>parameters=None</em>, <em>include_header=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/redshift_to_s3_operator.html#RedshiftToS3Transfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes an UNLOAD command to s3 as a CSV with headers</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li> |
| <li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li> |
| <li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li> |
| <li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li> |
| <li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li> |
| <li><strong>unload_options</strong> (<em>list</em>) – reference to a list of UNLOAD options</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/redshift_to_s3_operator.html#RedshiftToS3Transfer.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3toredshifttransfer"> |
| <span id="id20"></span><h4>S3ToRedshiftTransfer<a class="headerlink" href="#s3toredshifttransfer" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_to_redshift_operator.</code><code class="descname">S3ToRedshiftTransfer</code><span class="sig-paren">(</span><em>schema</em>, <em>table</em>, <em>s3_bucket</em>, <em>s3_key</em>, <em>redshift_conn_id='redshift_default'</em>, <em>aws_conn_id='aws_default'</em>, <em>copy_options=()</em>, <em>autocommit=False</em>, <em>parameters=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_redshift_operator.html#S3ToRedshiftTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes an COPY command to load files from s3 to Redshift</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li> |
| <li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li> |
| <li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li> |
| <li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li> |
| <li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li> |
| <li><strong>copy_options</strong> (<em>list</em>) – reference to a list of COPY options</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_redshift_operator.html#S3ToRedshiftTransfer.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="databricks"> |
| <span id="id21"></span><h2>Databricks<a class="headerlink" href="#databricks" title="Permalink to this headline">¶</a></h2> |
| <p><a class="reference external" href="https://databricks.com/">Databricks</a> has contributed an Airflow operator which enables |
| submitting runs to the Databricks platform. Internally the operator talks to the |
| <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">endpoint</a>.</p> |
| <div class="section" id="databrickssubmitrunoperator"> |
| <h3>DatabricksSubmitRunOperator<a class="headerlink" href="#databrickssubmitrunoperator" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.databricks_operator.</code><code class="descname">DatabricksSubmitRunOperator</code><span class="sig-paren">(</span><em>json=None</em>, <em>spark_jar_task=None</em>, <em>notebook_task=None</em>, <em>new_cluster=None</em>, <em>existing_cluster_id=None</em>, <em>libraries=None</em>, <em>run_name=None</em>, <em>timeout_seconds=None</em>, <em>databricks_conn_id='databricks_default'</em>, <em>polling_period_seconds=30</em>, <em>databricks_retry_limit=3</em>, <em>databricks_retry_delay=1</em>, <em>do_xcom_push=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Submits an Spark job run to Databricks using the |
| <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">api/2.0/jobs/runs/submit</a> |
| API endpoint.</p> |
| <p>There are two ways to instantiate this operator.</p> |
| <p>In the first way, you can take the JSON payload that you typically use |
| to call the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint and pass it directly |
| to our <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> through the <code class="docutils literal notranslate"><span class="pre">json</span></code> parameter. |
| For example</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">json</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'new_cluster'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'spark_version'</span><span class="p">:</span> <span class="s1">'2.1.0-db3-scala2.11'</span><span class="p">,</span> |
| <span class="s1">'num_workers'</span><span class="p">:</span> <span class="mi">2</span> |
| <span class="p">},</span> |
| <span class="s1">'notebook_task'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'notebook_path'</span><span class="p">:</span> <span class="s1">'/Users/airflow@example.com/PrepareData'</span><span class="p">,</span> |
| <span class="p">},</span> |
| <span class="p">}</span> |
| <span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span><span class="n">task_id</span><span class="o">=</span><span class="s1">'notebook_run'</span><span class="p">,</span> <span class="n">json</span><span class="o">=</span><span class="n">json</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Another way to accomplish the same thing is to use the named parameters |
| of the <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> directly. Note that there is exactly |
| one named parameter for each top level parameter in the <code class="docutils literal notranslate"><span class="pre">runs/submit</span></code> |
| endpoint. In this method, your code would look like this:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">new_cluster</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'spark_version'</span><span class="p">:</span> <span class="s1">'2.1.0-db3-scala2.11'</span><span class="p">,</span> |
| <span class="s1">'num_workers'</span><span class="p">:</span> <span class="mi">2</span> |
| <span class="p">}</span> |
| <span class="n">notebook_task</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'notebook_path'</span><span class="p">:</span> <span class="s1">'/Users/airflow@example.com/PrepareData'</span><span class="p">,</span> |
| <span class="p">}</span> |
| <span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'notebook_run'</span><span class="p">,</span> |
| <span class="n">new_cluster</span><span class="o">=</span><span class="n">new_cluster</span><span class="p">,</span> |
| <span class="n">notebook_task</span><span class="o">=</span><span class="n">notebook_task</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>In the case where both the json parameter <strong>AND</strong> the named parameters |
| are provided, they will be merged together. If there are conflicts during the merge, |
| the named parameters will take precedence and override the top level <code class="docutils literal notranslate"><span class="pre">json</span></code> keys.</p> |
| <dl class="docutils"> |
| <dt>Currently the named parameters that <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> supports are</dt> |
| <dd><ul class="first last simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">notebook_task</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">new_cluster</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">libraries</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">run_name</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">timeout_seconds</span></code></li> |
| </ul> |
| </dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>json</strong> (<em>dict</em>) – <p>A JSON object containing API parameters which will be passed |
| directly to the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint. The other named parameters |
| (i.e. <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code>, <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code>..) to this operator will |
| be merged with this json dictionary if they are provided. |
| If there are conflicts during the merge, the named parameters will |
| take precedence and override the top level json keys. (templated)</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information about templating see <a class="reference internal" href="concepts.html#jinja-templating"><span class="std std-ref">Jinja Templating</span></a>. |
| <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">https://docs.databricks.com/api/latest/jobs.html#runs-submit</a></p> |
| </div> |
| </li> |
| <li><strong>spark_jar_task</strong> (<em>dict</em>) – <p>The main class and parameters for the JAR task. Note that |
| the actual JAR is specified in the <code class="docutils literal notranslate"><span class="pre">libraries</span></code>. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask">https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask</a></p> |
| </div> |
| </li> |
| <li><strong>notebook_task</strong> (<em>dict</em>) – <p>The notebook path and parameters for the notebook task. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask">https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask</a></p> |
| </div> |
| </li> |
| <li><strong>new_cluster</strong> (<em>dict</em>) – <p>Specs for a new cluster on which this task will be run. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster">https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster</a></p> |
| </div> |
| </li> |
| <li><strong>existing_cluster_id</strong> (<em>string</em>) – ID for existing cluster on which to run this task. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified. |
| This field will be templated.</li> |
| <li><strong>libraries</strong> (<em>list of dicts</em>) – <p>Libraries which this run will use. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary">https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary</a></p> |
| </div> |
| </li> |
| <li><strong>run_name</strong> (<em>string</em>) – The run name used for this task. |
| By default this will be set to the Airflow <code class="docutils literal notranslate"><span class="pre">task_id</span></code>. This <code class="docutils literal notranslate"><span class="pre">task_id</span></code> is a |
| required parameter of the superclass <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code>. |
| This field will be templated.</li> |
| <li><strong>timeout_seconds</strong> (<em>int32</em>) – The timeout for this run. By default a value of 0 is used |
| which means to have no timeout. |
| This field will be templated.</li> |
| <li><strong>databricks_conn_id</strong> (<em>string</em>) – The name of the Airflow connection to use. |
| By default and in the common case this will be <code class="docutils literal notranslate"><span class="pre">databricks_default</span></code>. To use |
| token based authentication, provide the key <code class="docutils literal notranslate"><span class="pre">token</span></code> in the extra field for the |
| connection.</li> |
| <li><strong>polling_period_seconds</strong> (<em>int</em>) – Controls the rate which we poll for the result of |
| this run. By default the operator will poll every 30 seconds.</li> |
| <li><strong>databricks_retry_limit</strong> (<em>int</em>) – Amount of times retry if the Databricks backend is |
| unreachable. Its value must be greater than or equal to 1.</li> |
| <li><strong>databricks_retry_delay</strong> (<em>float</em>) – Number of seconds to wait between retries (it |
| might be a floating point number).</li> |
| <li><strong>do_xcom_push</strong> (<em>boolean</em>) – Whether we should push run_id and run_page_url to xcom.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.on_kill"> |
| <code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator.on_kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Override this method to cleanup subprocesses when a task instance |
| gets killed. Any use of the threading, subprocess or multiprocessing |
| module within an operator needs to be cleaned up or it will leave |
| ghost processes behind.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="gcp-google-cloud-platform"> |
| <span id="gcp"></span><h2>GCP: Google Cloud Platform<a class="headerlink" href="#gcp-google-cloud-platform" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow has extensive support for the Google Cloud Platform. But note that most Hooks and |
| Operators are in the contrib section. Meaning that they have a <em>beta</em> status, meaning that |
| they can have breaking changes between minor releases.</p> |
| <p>See the <a class="reference internal" href="howto/manage-connections.html#connection-type-gcp"><span class="std std-ref">GCP connection type</span></a> documentation to |
| configure connections to GCP.</p> |
| <div class="section" id="id22"> |
| <h3>Logging<a class="headerlink" href="#id22" title="Permalink to this headline">¶</a></h3> |
| <p>Airflow can be configured to read and write task logs in Google Cloud Storage. |
| See <a class="reference internal" href="howto/write-logs.html#write-logs-gcp"><span class="std std-ref">Writing Logs to Google Cloud Storage</span></a>.</p> |
| </div> |
| <div class="section" id="bigquery"> |
| <h3>BigQuery<a class="headerlink" href="#bigquery" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="bigquery-operators"> |
| <h4>BigQuery Operators<a class="headerlink" href="#bigquery-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#bigquerycheckoperator"><span class="std std-ref">BigQueryCheckOperator</span></a> : Performs checks against a SQL query that will return a single row with different values.</li> |
| <li><a class="reference internal" href="#bigqueryvaluecheckoperator"><span class="std std-ref">BigQueryValueCheckOperator</span></a> : Performs a simple value check using SQL code.</li> |
| <li><a class="reference internal" href="#bigqueryintervalcheckoperator"><span class="std std-ref">BigQueryIntervalCheckOperator</span></a> : Checks that the values of metrics given as SQL expressions are within a certain tolerance of the ones from days_back before.</li> |
| <li><a class="reference internal" href="#bigquerycreateemptytableoperator"><span class="std std-ref">BigQueryCreateEmptyTableOperator</span></a> : Creates a new, empty table in the specified BigQuery dataset optionally with schema.</li> |
| <li><a class="reference internal" href="#bigquerycreateexternaltableoperator"><span class="std std-ref">BigQueryCreateExternalTableOperator</span></a> : Creates a new, external table in the dataset with the data in Google Cloud Storage.</li> |
| <li><a class="reference internal" href="#bigquerydeletedatasetoperator"><span class="std std-ref">BigQueryDeleteDatasetOperator</span></a> : Deletes an existing BigQuery dataset.</li> |
| <li><a class="reference internal" href="#bigqueryoperator"><span class="std std-ref">BigQueryOperator</span></a> : Executes BigQuery SQL queries in a specific BigQuery database.</li> |
| <li><a class="reference internal" href="#bigquerytobigqueryoperator"><span class="std std-ref">BigQueryToBigQueryOperator</span></a> : Copy a BigQuery table to another BigQuery table.</li> |
| <li><a class="reference internal" href="#bigquerytocloudstorageoperator"><span class="std std-ref">BigQueryToCloudStorageOperator</span></a> : Transfers a BigQuery table to a Google Cloud Storage bucket</li> |
| </ul> |
| <div class="section" id="bigquerycheckoperator"> |
| <span id="id23"></span><h5>BigQueryCheckOperator<a class="headerlink" href="#bigquerycheckoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryCheckOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>use_legacy_sql=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a></p> |
| <p>Performs checks against BigQuery. The <code class="docutils literal notranslate"><span class="pre">BigQueryCheckOperator</span></code> expects |
| a sql query that will return a single row. Each value on that |
| first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average.</p> |
| <p>This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alterts |
| without stopping the progress of the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to the BigQuery database</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigqueryvaluecheckoperator"> |
| <span id="id24"></span><h5>BigQueryValueCheckOperator<a class="headerlink" href="#bigqueryvaluecheckoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryValueCheckOperator</code><span class="sig-paren">(</span><em>sql</em>, <em>pass_value</em>, <em>tolerance=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>use_legacy_sql=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a></p> |
| <p>Performs a simple value check using sql code.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigqueryintervalcheckoperator"> |
| <span id="id25"></span><h5>BigQueryIntervalCheckOperator<a class="headerlink" href="#bigqueryintervalcheckoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryIntervalCheckOperator</code><span class="sig-paren">(</span><em>table</em>, <em>metrics_thresholds</em>, <em>date_filter_column='ds'</em>, <em>days_back=-7</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>use_legacy_sql=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryIntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.IntervalCheckOperator" title="airflow.operators.check_operator.IntervalCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.IntervalCheckOperator</span></code></a></p> |
| <p>Checks that the values of metrics given as SQL expressions are within |
| a certain tolerance of the ones from days_back before.</p> |
| <p>This method constructs a query like so</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">SELECT</span> <span class="p">{</span><span class="n">metrics_threshold_dict_key</span><span class="p">}</span> <span class="n">FROM</span> <span class="p">{</span><span class="n">table</span><span class="p">}</span> |
| <span class="n">WHERE</span> <span class="p">{</span><span class="n">date_filter_column</span><span class="p">}</span><span class="o">=<</span><span class="n">date</span><span class="o">></span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – the table name</li> |
| <li><strong>days_back</strong> (<em>int</em>) – number of days between ds and the ds we want to check |
| against. Defaults to 7 days</li> |
| <li><strong>metrics_threshold</strong> (<em>dict</em>) – a dictionary of ratios indexed by metrics, for |
| example ‘COUNT(*)’: 1.5 would require a 50 percent or less difference |
| between the current day, and the prior days_back.</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerygetdataoperator"> |
| <span id="id26"></span><h5>BigQueryGetDataOperator<a class="headerlink" href="#bigquerygetdataoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_get_data.</code><code class="descname">BigQueryGetDataOperator</code><span class="sig-paren">(</span><em>dataset_id</em>, <em>table_id</em>, <em>max_results='100'</em>, <em>selected_fields=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_get_data.html#BigQueryGetDataOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Fetches the data from a BigQuery table (alternatively fetch data for selected columns) |
| and returns data in a python list. The number of elements in the returned list will |
| be equal to the number of rows fetched. Each element in the list will again be a list |
| where element would represent the columns values for that row.</p> |
| <p><strong>Example Result</strong>: <code class="docutils literal notranslate"><span class="pre">[['Tony',</span> <span class="pre">'10'],</span> <span class="pre">['Mike',</span> <span class="pre">'20'],</span> <span class="pre">['Steve',</span> <span class="pre">'15']]</span></code></p> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">If you pass fields to <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> which are in different order than the |
| order of columns already in |
| BQ table, the data will still be in the order of BQ table. |
| For example if the BQ table has 3 columns as |
| <code class="docutils literal notranslate"><span class="pre">[A,B,C]</span></code> and you pass ‘B,A’ in the <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> |
| the data would still be of the form <code class="docutils literal notranslate"><span class="pre">'A,B'</span></code>.</p> |
| </div> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">get_data</span> <span class="o">=</span> <span class="n">BigQueryGetDataOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'get_data_from_bq'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'test_dataset'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Transaction_partitions'</span><span class="p">,</span> |
| <span class="n">max_results</span><span class="o">=</span><span class="s1">'100'</span><span class="p">,</span> |
| <span class="n">selected_fields</span><span class="o">=</span><span class="s1">'DATE'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dataset_id</strong> – The dataset ID of the requested table. (templated)</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The table ID of the requested table. (templated)</li> |
| <li><strong>max_results</strong> (<em>string</em>) – The maximum number of records (rows) to be fetched |
| from the table. (templated)</li> |
| <li><strong>selected_fields</strong> (<em>string</em>) – List of fields to return (comma-separated). If |
| unspecified, all fields are returned.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_get_data.html#BigQueryGetDataOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerycreateemptytableoperator"> |
| <span id="id27"></span><h5>BigQueryCreateEmptyTableOperator<a class="headerlink" href="#bigquerycreateemptytableoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyTableOperator</code><span class="sig-paren">(</span><em>dataset_id</em>, <em>table_id</em>, <em>project_id=None</em>, <em>schema_fields=None</em>, <em>gcs_schema_object=None</em>, <em>time_partitioning={}</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new, empty table in the specified BigQuery dataset, |
| optionally with schema.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it. |
| You can also create a table without schema.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The project to create the table into. (templated)</li> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The dataset to create the table into. (templated)</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The Name of the table to be created. (templated)</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}]</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><strong>gcs_schema_object</strong> (<em>string</em>) – Full path to the JSON file containing |
| schema (templated). For |
| example: <code class="docutils literal notranslate"><span class="pre">gs://test-bucket/dir1/dir2/employee_schema.json</span></code></li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – <p>configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning</a></p> |
| </div> |
| </li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – <p>a dictionary containing labels for the table, passed to BigQuery</p> |
| <p><strong>Example (with schema JSON in GCS)</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'BigQueryCreateEmptyTableOperator_task'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'ODS'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Employees'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'internal-gcp-project'</span><span class="p">,</span> |
| <span class="n">gcs_schema_object</span><span class="o">=</span><span class="s1">'gs://schema-bucket/employee_schema.json'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><strong>Corresponding Schema file</strong> (<code class="docutils literal notranslate"><span class="pre">employee_schema.json</span></code>):</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span> |
| <span class="p">{</span> |
| <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">,</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span> |
| <span class="p">},</span> |
| <span class="p">{</span> |
| <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">,</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span> |
| <span class="p">}</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| <p><strong>Example (with schema in the DAG)</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'BigQueryCreateEmptyTableOperator_task'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'ODS'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Employees'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'internal-gcp-project'</span><span class="p">,</span> |
| <span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}],</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyTableOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerycreateexternaltableoperator"> |
| <span id="id28"></span><h5>BigQueryCreateExternalTableOperator<a class="headerlink" href="#bigquerycreateexternaltableoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateExternalTableOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>source_objects</em>, <em>destination_project_dataset_table</em>, <em>schema_fields=None</em>, <em>schema_object=None</em>, <em>source_format='CSV'</em>, <em>compression='NONE'</em>, <em>skip_leading_rows=0</em>, <em>field_delimiter='</em>, <em>'</em>, <em>max_bad_records=0</em>, <em>quote_character=None</em>, <em>allow_quoted_newlines=False</em>, <em>allow_jagged_rows=False</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>src_fmt_configs={}</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateExternalTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new external table in the dataset with the data in Google Cloud |
| Storage.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to point the external table to. (templated)</li> |
| <li><strong>source_objects</strong> – List of Google cloud storage URIs to point |
| table to. (templated) |
| If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (<project>.)<dataset>.<table> |
| BigQuery table to load data into (templated). If <project> is not included, |
| project will be the project defined in the connection json.</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}]</span> |
| </pre></div> |
| </div> |
| <p>Should not be set when source_format is ‘DATASTORE_BACKUP’.</p> |
| </li> |
| <li><strong>schema_object</strong> – If set, a GCS object path pointing to a .json file that |
| contains the schema for the table. (templated)</li> |
| <li><strong>schema_object</strong> – string</li> |
| <li><strong>source_format</strong> (<em>string</em>) – File format of the data.</li> |
| <li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source. |
| Possible values include GZIP and NONE. |
| The default value is NONE. |
| This setting is ignored for Google Cloud Bigtable, |
| Google Cloud Datastore backups and Avro formats.</li> |
| <li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use for the CSV.</li> |
| <li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can |
| ignore when running the job.</li> |
| <li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li> |
| <li><strong>allow_quoted_newlines</strong> (<em>boolean</em>) – Whether to allow quoted newlines (true) or not (false).</li> |
| <li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns. |
| The missing values are treated as nulls. If false, records with missing trailing |
| columns are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result. Only applicable to CSV, ignored |
| for other formats.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:param labels a dictionary containing labels for the table, passed to BigQuery |
| :type labels: dict</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateExternalTableOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerydeletedatasetoperator"> |
| <span id="id29"></span><h5>BigQueryDeleteDatasetOperator<a class="headerlink" href="#bigquerydeletedatasetoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryDeleteDatasetOperator</code><span class="sig-paren">(</span><em>dataset_id</em>, <em>project_id=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryDeleteDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>” |
| This operator deletes an existing dataset from your Project in Big query. |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete</a> |
| :param project_id: The project id of the dataset. |
| :type project_id: string |
| :param dataset_id: The dataset to be deleted. |
| :type dataset_id: string</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">delete_temp_data</span> <span class="o">=</span> <span class="n">BigQueryDeleteDatasetOperator</span><span class="p">(</span> |
| <span class="n">dataset_id</span> <span class="o">=</span> <span class="s1">'temp-dataset'</span><span class="p">,</span> |
| <span class="n">project_id</span> <span class="o">=</span> <span class="s1">'temp-project'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'_my_gcp_conn_'</span><span class="p">,</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'Deletetemp'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryDeleteDatasetOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigqueryoperator"> |
| <span id="id30"></span><h5>BigQueryOperator<a class="headerlink" href="#bigqueryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryOperator</code><span class="sig-paren">(</span><em>bql=None</em>, <em>sql=None</em>, <em>destination_dataset_table=False</em>, <em>write_disposition='WRITE_EMPTY'</em>, <em>allow_large_results=False</em>, <em>flatten_results=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>udf_config=False</em>, <em>use_legacy_sql=True</em>, <em>maximum_billing_tier=None</em>, <em>maximum_bytes_billed=None</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>schema_update_options=()</em>, <em>query_params=None</em>, <em>labels=None</em>, <em>priority='INTERACTIVE'</em>, <em>time_partitioning={}</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes BigQuery SQL queries in a specific BigQuery database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'.</em>) – (Deprecated. Use <cite>sql</cite> parameter instead) the sql code to be |
| executed (templated)</li> |
| <li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'.</em>) – the sql code to be executed (templated)</li> |
| <li><strong>destination_dataset_table</strong> (<em>string</em>) – A dotted |
| (<project>.|<project>:)<dataset>.<table> that, if set, will store the results |
| of the query. (templated)</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – Specifies the action that occurs if the destination table |
| already exists. (default: ‘WRITE_EMPTY’)</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – Specifies whether the job is allowed to create new tables. |
| (default: ‘CREATE_IF_NEEDED’)</li> |
| <li><strong>allow_large_results</strong> (<em>boolean</em>) – Whether to allow large results.</li> |
| <li><strong>flatten_results</strong> (<em>boolean</em>) – If true and query uses legacy SQL dialect, flattens |
| all nested and repeated fields in the query results. <code class="docutils literal notranslate"><span class="pre">allow_large_results</span></code> |
| must be <code class="docutils literal notranslate"><span class="pre">true</span></code> if this is set to <code class="docutils literal notranslate"><span class="pre">false</span></code>. For standard SQL queries, this |
| flag is ignored and results are never flattened.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>udf_config</strong> (<em>list</em>) – The User Defined Function configuration for the query. |
| See <a class="reference external" href="https://cloud.google.com/bigquery/user-defined-functions">https://cloud.google.com/bigquery/user-defined-functions</a> for details.</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) or standard SQL (false).</li> |
| <li><strong>maximum_billing_tier</strong> (<em>integer</em>) – Positive integer that serves as a multiplier |
| of the basic price. |
| Defaults to None, in which case it uses the value set in the project.</li> |
| <li><strong>maximum_bytes_billed</strong> (<em>float</em>) – Limits the bytes billed for this job. |
| Queries that will have bytes billed beyond this limit will fail |
| (without incurring a charge). If unspecified, this will be |
| set to your project default.</li> |
| <li><strong>schema_update_options</strong> (<em>tuple</em>) – Allows the schema of the destination |
| table to be updated as a side effect of the load job.</li> |
| <li><strong>query_params</strong> (<em>dict</em>) – a dictionary containing query parameter types and |
| values, passed to BigQuery.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| <li><strong>priority</strong> (<em>string</em>) – Specifies a priority for the query. |
| Possible values include INTERACTIVE and BATCH. |
| The default value is INTERACTIVE.</li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator.on_kill"> |
| <code class="descname">on_kill</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator.on_kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Override this method to cleanup subprocesses when a task instance |
| gets killed. Any use of the threading, subprocess or multiprocessing |
| module within an operator needs to be cleaned up or it will leave |
| ghost processes behind.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerytabledeleteoperator"> |
| <span id="id31"></span><h5>BigQueryTableDeleteOperator<a class="headerlink" href="#bigquerytabledeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_table_delete_operator.</code><code class="descname">BigQueryTableDeleteOperator</code><span class="sig-paren">(</span><em>deletion_dataset_table</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>ignore_if_missing=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_table_delete_operator.html#BigQueryTableDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Deletes BigQuery tables</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>deletion_dataset_table</strong> (<em>string</em>) – A dotted |
| (<project>.|<project>:)<dataset>.<table> that indicates which table |
| will be deleted. (templated)</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>ignore_if_missing</strong> (<em>boolean</em>) – if True, then return success even if the |
| requested table does not exist.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_table_delete_operator.html#BigQueryTableDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerytobigqueryoperator"> |
| <span id="id32"></span><h5>BigQueryToBigQueryOperator<a class="headerlink" href="#bigquerytobigqueryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_bigquery.</code><code class="descname">BigQueryToBigQueryOperator</code><span class="sig-paren">(</span><em>source_project_dataset_tables</em>, <em>destination_project_dataset_table</em>, <em>write_disposition='WRITE_EMPTY'</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_bigquery.html#BigQueryToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies data from one BigQuery table to another.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about these parameters: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_project_dataset_tables</strong> (<em>list|string</em>) – One or more |
| dotted (project:<a href="#id33"><span class="problematic" id="id34">|</span></a>project.)<dataset>.<table> BigQuery tables to use as the |
| source data. If <project> is not included, project will be the |
| project defined in the connection json. Use a list if there are multiple |
| source tables. (templated)</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The destination BigQuery |
| table. Format is: (project:<a href="#id35"><span class="problematic" id="id36">|</span></a>project.)<dataset>.<table> (templated)</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_bigquery.html#BigQueryToBigQueryOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerytocloudstorageoperator"> |
| <span id="id37"></span><h5>BigQueryToCloudStorageOperator<a class="headerlink" href="#bigquerytocloudstorageoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_gcs.</code><code class="descname">BigQueryToCloudStorageOperator</code><span class="sig-paren">(</span><em>source_project_dataset_table</em>, <em>destination_cloud_storage_uris</em>, <em>compression='NONE'</em>, <em>export_format='CSV'</em>, <em>field_delimiter='</em>, <em>'</em>, <em>print_header=True</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>labels=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_gcs.html#BigQueryToCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Transfers a BigQuery table to a Google Cloud Storage bucket.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about these parameters: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">https://cloud.google.com/bigquery/docs/reference/v2/jobs</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_project_dataset_table</strong> (<em>string</em>) – The dotted |
| (<project>.|<project>:)<dataset>.<table> BigQuery table to use as the source |
| data. If <project> is not included, project will be the project |
| defined in the connection json. (templated)</li> |
| <li><strong>destination_cloud_storage_uris</strong> (<em>list</em>) – The destination Google Cloud |
| Storage URI (e.g. gs://some-bucket/some-file.txt). (templated) Follows |
| convention defined here: |
| https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple</li> |
| <li><strong>compression</strong> (<em>string</em>) – Type of compression to use.</li> |
| <li><strong>export_format</strong> – File format to export.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when extracting to a CSV.</li> |
| <li><strong>print_header</strong> (<em>boolean</em>) – Whether to print a header for a CSV file extract.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_gcs.html#BigQueryToCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="bigqueryhook"> |
| <h4>BigQueryHook<a class="headerlink" href="#bigqueryhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.bigquery_hook.</code><code class="descname">BigQueryHook</code><span class="sig-paren">(</span><em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>use_legacy_sql=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a>, <a class="reference internal" href="code.html#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Interact with BigQuery. This hook uses the Google Cloud Platform |
| connection.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a BigQuery PEP 249 connection object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em>, <em>dialect=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Pandas DataFrame for the results produced by a BigQuery |
| query. The DbApiHook method must be overridden because Pandas |
| doesn’t support PEP 249 connections, except for SQLite. See:</p> |
| <p><a class="reference external" href="https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447">https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447</a> |
| <a class="reference external" href="https://github.com/pydata/pandas/issues/6900">https://github.com/pydata/pandas/issues/6900</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – The BigQuery SQL to execute.</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with (not |
| used, leave to override superclass method)</li> |
| <li><strong>dialect</strong> (<em>string in {'legacy'</em><em>, </em><em>'standard'}</em>) – Dialect of BigQuery SQL – legacy SQL or standard SQL |
| defaults to use <cite>self.use_legacy_sql</cite> if not specified</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service"> |
| <code class="descname">get_service</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_service"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a BigQuery service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows"> |
| <code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Insertion is currently unsupported. Theoretically, you could use |
| BigQuery’s streaming API to insert rows into a table, but this hasn’t |
| been implemented.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists"> |
| <code class="descname">table_exists</code><span class="sig-paren">(</span><em>project_id</em>, <em>dataset_id</em>, <em>table_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks for the existence of a table in Google BigQuery.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google cloud project in which to look for the |
| table. The connection supplied to the hook must provide access to |
| the specified project.</li> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The name of the dataset in which to look for the |
| table.</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The name of the table to check the existence of.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-sql"> |
| <h3>Cloud SQL<a class="headerlink" href="#cloud-sql" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-sql-operators"> |
| <h4>Cloud SQL Operators<a class="headerlink" href="#cloud-sql-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><span class="xref std std-ref">CloudSqlInstanceDatabaseDeleteOperator</span> : deletes a database from a Cloud SQL</li> |
| </ul> |
| <p>instance. |
| - <span class="xref std std-ref">CloudSqlInstanceDatabaseCreateOperator</span> : creates a new database inside a Cloud |
| SQL instance. |
| - <span class="xref std std-ref">CloudSqlInstanceDatabasePatchOperator</span> : updates a database inside a Cloud |
| SQL instance. |
| - <span class="xref std std-ref">CloudSqlInstanceDeleteOperator</span> : delete a Cloud SQL instance. |
| - <a class="reference internal" href="howto/operator.html#cloudsqlinstancecreateoperator"><span class="std std-ref">CloudSqlInstanceCreateOperator</span></a> : create a new Cloud SQL instance. |
| - <a class="reference internal" href="howto/operator.html#cloudsqlinstancepatchoperator"><span class="std std-ref">CloudSqlInstancePatchOperator</span></a> : patch a Cloud SQL instance.</p> |
| <div class="section" id="cloudsqlinstancedatabasedeleteoperator"> |
| <h5>CloudSqlInstanceDatabaseDeleteOperator<a class="headerlink" href="#cloudsqlinstancedatabasedeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseDeleteOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>database</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Deletes a database from a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancedatabasecreateoperator"> |
| <h5>CloudSqlInstanceDatabaseCreateOperator<a class="headerlink" href="#cloudsqlinstancedatabasecreateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseCreateOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Creates a new database inside a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancedatabasepatchoperator"> |
| <h5>CloudSqlInstanceDatabasePatchOperator<a class="headerlink" href="#cloudsqlinstancedatabasepatchoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabasePatchOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>database</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabasePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Updates a resource containing information about a database inside a Cloud SQL |
| instance using patch semantics. |
| See: <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body</a></li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabasePatchOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancedeleteoperator"> |
| <h5>CloudSqlInstanceDeleteOperator<a class="headerlink" href="#cloudsqlinstancedeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDeleteOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Deletes a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance to be deleted.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancecreateoperator"> |
| <h5>CloudSqlInstanceCreateOperator<a class="headerlink" href="#cloudsqlinstancecreateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceCreateOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em>, <em>instance</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Creates a new Cloud SQL instance. |
| If an instance with the same name exists, no action will be taken and |
| the operator will succeed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project to which the newly created Cloud SQL |
| instances should belong.</li> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert</a> |
| #request-body</li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – True if body should be validated, False otherwise.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancepatchoperator"> |
| <h5>CloudSqlInstancePatchOperator<a class="headerlink" href="#cloudsqlinstancepatchoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstancePatchOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em>, <em>instance</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1beta4'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstancePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Updates settings of a Cloud SQL instance.</p> |
| <p>Caution: This is a partial update, so only included values for the settings will be |
| updated.</p> |
| <p>In the request body, supply the relevant portions of an instance resource, according |
| to the rules of patch semantics. |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a></li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstancePatchOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-sql-hook"> |
| <h4>Cloud SQL Hook<a class="headerlink" href="#cloud-sql-hook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for Google Cloud SQL APIs.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database"> |
| <code class="descname">create_database</code><span class="sig-paren">(</span><em>project</em>, <em>instance</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new database inside a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance"> |
| <code class="descname">create_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project to which the newly created |
| Cloud SQL instances should belong.</li> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body</a></li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database"> |
| <code class="descname">delete_database</code><span class="sig-paren">(</span><em>project</em>, <em>instance</em>, <em>database</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a database from a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance"> |
| <code class="descname">delete_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves connection to Cloud SQL.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud SQL services object.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database"> |
| <code class="descname">get_database</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em>, <em>database</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves a database resource from a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database in the instance.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL database resource, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource</a></p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance"> |
| <code class="descname">get_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>instance</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves a resource containing information about a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL instance resource.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database"> |
| <code class="descname">patch_database</code><span class="sig-paren">(</span><em>project</em>, <em>instance</em>, <em>database</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Updates a database resource inside a Cloud SQL instance. |
| This method supports patch semantics. |
| See: <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance"> |
| <code class="descname">patch_instance</code><span class="sig-paren">(</span><em>project_id</em>, <em>body</em>, <em>instance</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Updates settings of a Cloud SQL instance.</p> |
| <p>Caution: This is not a partial update, so you must include values for |
| all the settings that you want to retain.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance.</li> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a></li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the operation succeeded, raises an error otherwise</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="compute-engine"> |
| <h3>Compute Engine<a class="headerlink" href="#compute-engine" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="compute-engine-operators"> |
| <h4>Compute Engine Operators<a class="headerlink" href="#compute-engine-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#gceinstancestartoperator"><span class="std std-ref">GceInstanceStartOperator</span></a> : start an existing Google Compute Engine instance.</li> |
| <li><a class="reference internal" href="#gceinstancestopoperator"><span class="std std-ref">GceInstanceStopOperator</span></a> : stop an existing Google Compute Engine instance.</li> |
| <li><a class="reference internal" href="#gcesetmachinetypeoperator"><span class="std std-ref">GceSetMachineTypeOperator</span></a> : change the machine type for a stopped instance.</li> |
| </ul> |
| <div class="section" id="gceinstancestartoperator"> |
| <span id="id38"></span><h5>GceInstanceStartOperator<a class="headerlink" href="#gceinstancestartoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStartOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>zone</em>, <em>resource_id</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStartOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></p> |
| <p>Start an instance in Google Compute Engine.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform project where the Compute Engine |
| instance exists.</li> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStartOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gceinstancestopoperator"> |
| <span id="id39"></span><h5>GceInstanceStopOperator<a class="headerlink" href="#gceinstancestopoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStopOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>zone</em>, <em>resource_id</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></p> |
| <p>Stop an instance in Google Compute Engine.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform project where the Compute Engine |
| instance exists.</li> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStopOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gcesetmachinetypeoperator"> |
| <span id="id40"></span><h5>GceSetMachineTypeOperator<a class="headerlink" href="#gcesetmachinetypeoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceSetMachineTypeOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>zone</em>, <em>resource_id</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceSetMachineTypeOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></p> |
| <p>Changes the machine type for a stopped instance to the machine type specified in |
| the request.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform project where the Compute Engine |
| instance exists.</li> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Compute Engine setMachineType API, as described in |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body">https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body</a></li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceSetMachineTypeOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="cloud-functions"> |
| <h3>Cloud Functions<a class="headerlink" href="#cloud-functions" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-functions-operators"> |
| <h4>Cloud Functions Operators<a class="headerlink" href="#cloud-functions-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#gcffunctiondeployoperator"><span class="std std-ref">GcfFunctionDeployOperator</span></a> : deploy Google Cloud Function to Google Cloud Platform</li> |
| <li><a class="reference internal" href="#gcffunctiondeleteoperator"><span class="std std-ref">GcfFunctionDeleteOperator</span></a> : delete Google Cloud Function in Google Cloud Platform</li> |
| </ul> |
| <div class="section" id="gcffunctiondeployoperator"> |
| <span id="id41"></span><h5>GcfFunctionDeployOperator<a class="headerlink" href="#gcffunctiondeployoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeployOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>location</em>, <em>body</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>zip_path=None</em>, <em>validate_body=True</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeployOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a function in Google Cloud Functions.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Google Cloud Platform Project ID where the function should |
| be created.</li> |
| <li><strong>location</strong> (<em>str</em>) – Google Cloud Platform region where the function should be created.</li> |
| <li><strong>body</strong> (<em>dict</em><em> or </em><em>google.cloud.functions.v1.CloudFunction</em>) – Body of the Cloud Functions definition. The body must be a |
| Cloud Functions dictionary as described in: |
| <a class="reference external" href="https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions">https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions</a> |
| . Different API versions require different variants of the Cloud Functions |
| dictionary.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID to use to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (for example v1 or v1beta1).</li> |
| <li><strong>zip_path</strong> (<em>str</em>) – Path to zip file containing source code of the function. If the path |
| is set, the sourceUploadUrl should not be specified in the body or it should |
| be empty. Then the zip file will be uploaded using the upload URL generated |
| via generateUploadUrl from the Cloud Functions API.</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – If set to False, body validation is not performed.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeployOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gcffunctiondeleteoperator"> |
| <span id="id42"></span><h5>GcfFunctionDeleteOperator<a class="headerlink" href="#gcffunctiondeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeleteOperator</code><span class="sig-paren">(</span><em>name</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v1'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Deletes the specified function from Google Cloud Functions.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>name</strong> (<em>str</em>) – A fully-qualified function name, matching |
| the pattern: <cite>^projects/[^/]+/locations/[^/]+/functions/[^/]+$</cite></li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID to use to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (for example v1 or v1beta1).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-functions-hook"> |
| <h4>Cloud Functions Hook<a class="headerlink" href="#cloud-functions-hook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_function_hook.</code><code class="descname">GcfHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for the Google Cloud Functions APIs.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function"> |
| <code class="descname">create_new_function</code><span class="sig-paren">(</span><em>full_location</em>, <em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.create_new_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new function in Cloud Function in the location specified in the body.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>full_location</strong> (<em>str</em>) – full location including the project in the form of |
| of /projects/<PROJECT>/location/<LOCATION></li> |
| <li><strong>body</strong> (<em>dict</em>) – body required by the Cloud Functions insert API</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">response returned by the operation</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function"> |
| <code class="descname">delete_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.delete_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the specified Cloud Function.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – name of the function</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">response returned by the operation</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves the connection to Cloud Functions.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud Function services object</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function"> |
| <code class="descname">get_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the Cloud Function with the given name.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – name of the function</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">a CloudFunction object representing the function</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.list_functions"> |
| <code class="descname">list_functions</code><span class="sig-paren">(</span><em>full_location</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.list_functions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.list_functions" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists all Cloud Functions created in the location.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>full_location</strong> (<em>str</em>) – full location including the project in the form of |
| of /projects/<PROJECT>/location/<LOCATION></td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">array of CloudFunction objects - representing functions in the location</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">[dict]</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function"> |
| <code class="descname">update_function</code><span class="sig-paren">(</span><em>name</em>, <em>body</em>, <em>update_mask</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.update_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Updates Cloud Functions according to the specified update mask.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>name</strong> (<em>str</em>) – name of the function</li> |
| <li><strong>body</strong> (<em>str</em>) – body required by the cloud function patch API</li> |
| <li><strong>update_mask</strong> (<em>[</em><em>str</em><em>]</em>) – update mask - array of fields that should be patched</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">response returned by the operation</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip"> |
| <code class="descname">upload_function_zip</code><span class="sig-paren">(</span><em>parent</em>, <em>zip_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.upload_function_zip"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads zip file with sources.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>parent</strong> (<em>str</em>) – Google Cloud Platform project id and region where zip file should |
| be uploaded in the form of /projects/<PROJECT>/location/<LOCATION></li> |
| <li><strong>zip_path</strong> (<em>str</em>) – path of the valid .zip file to upload</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">Upload URL that was returned by generateUploadUrl method</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-dataflow"> |
| <h3>Cloud DataFlow<a class="headerlink" href="#cloud-dataflow" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="dataflow-operators"> |
| <h4>DataFlow Operators<a class="headerlink" href="#dataflow-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#dataflowjavaoperator"><span class="std std-ref">DataFlowJavaOperator</span></a> : launching Cloud Dataflow jobs written in Java.</li> |
| <li><a class="reference internal" href="#dataflowtemplateoperator"><span class="std std-ref">DataflowTemplateOperator</span></a> : launching a templated Cloud DataFlow batch job.</li> |
| <li><a class="reference internal" href="#dataflowpythonoperator"><span class="std std-ref">DataFlowPythonOperator</span></a> : launching Cloud Dataflow jobs written in python.</li> |
| </ul> |
| <div class="section" id="dataflowjavaoperator"> |
| <span id="id43"></span><h5>DataFlowJavaOperator<a class="headerlink" href="#dataflowjavaoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowJavaOperator</code><span class="sig-paren">(</span><em>jar</em>, <em>dataflow_default_options=None</em>, <em>options=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em>, <em>job_class=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowJavaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Java Cloud DataFlow batch job. The parameters of the operation |
| will be passed to the job.</p> |
| <p>It’s a good practice to define dataflow_* parameters in the default_args of the dag |
| like the project, zone and staging location.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'europe-west1-d'</span><span class="p">,</span> |
| <span class="s1">'stagingLocation'</span><span class="p">:</span> <span class="s1">'gs://my-staging-bucket/staging/'</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You need to pass the path to your dataflow as a file reference with the <code class="docutils literal notranslate"><span class="pre">jar</span></code> |
| parameter, the jar needs to be a self executing jar (see documentation here: |
| <a class="reference external" href="https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar">https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar</a>). |
| Use <code class="docutils literal notranslate"><span class="pre">options</span></code> to pass on options to your job.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataFlowOperation</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'datapflow_example'</span><span class="p">,</span> |
| <span class="n">jar</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}pipeline/build/libs/pipeline-example-1.0.jar'</span><span class="p">,</span> |
| <span class="n">options</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'autoscalingAlgorithm'</span><span class="p">:</span> <span class="s1">'BASIC'</span><span class="p">,</span> |
| <span class="s1">'maxNumWorkers'</span><span class="p">:</span> <span class="s1">'50'</span><span class="p">,</span> |
| <span class="s1">'start'</span><span class="p">:</span> <span class="s1">'{{ds}}'</span><span class="p">,</span> |
| <span class="s1">'partitionType'</span><span class="p">:</span> <span class="s1">'DAY'</span><span class="p">,</span> |
| <span class="s1">'labels'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'foo'</span> <span class="p">:</span> <span class="s1">'bar'</span><span class="p">}</span> |
| <span class="p">},</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp-airflow-service-account'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Both <code class="docutils literal notranslate"><span class="pre">jar</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> are templated so you can use variables in them.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowJavaOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'owner'</span><span class="p">:</span> <span class="s1">'airflow'</span><span class="p">,</span> |
| <span class="s1">'depends_on_past'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="s1">'start_date'</span><span class="p">:</span> |
| <span class="p">(</span><span class="mi">2016</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> |
| <span class="s1">'email'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'alex@vanboxel.be'</span><span class="p">],</span> |
| <span class="s1">'email_on_failure'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="s1">'email_on_retry'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="s1">'retries'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="s1">'retry_delay'</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="mi">30</span><span class="p">),</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'us-central1-f'</span><span class="p">,</span> |
| <span class="s1">'stagingLocation'</span><span class="p">:</span> <span class="s1">'gs://bucket/tmp/dataflow/staging/'</span><span class="p">,</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| |
| <span class="n">dag</span> <span class="o">=</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'test-dag'</span><span class="p">,</span> <span class="n">default_args</span><span class="o">=</span><span class="n">default_args</span><span class="p">)</span> |
| |
| <span class="n">task</span> <span class="o">=</span> <span class="n">DataFlowJavaOperator</span><span class="p">(</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp_default'</span><span class="p">,</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'normalize-cal'</span><span class="p">,</span> |
| <span class="n">jar</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}pipeline-ingress-cal-normalize-1.0.jar'</span><span class="p">,</span> |
| <span class="n">options</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'autoscalingAlgorithm'</span><span class="p">:</span> <span class="s1">'BASIC'</span><span class="p">,</span> |
| <span class="s1">'maxNumWorkers'</span><span class="p">:</span> <span class="s1">'50'</span><span class="p">,</span> |
| <span class="s1">'start'</span><span class="p">:</span> <span class="s1">'{{ds}}'</span><span class="p">,</span> |
| <span class="s1">'partitionType'</span><span class="p">:</span> <span class="s1">'DAY'</span> |
| |
| <span class="p">},</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </div> |
| <div class="section" id="dataflowtemplateoperator"> |
| <span id="id44"></span><h5>DataflowTemplateOperator<a class="headerlink" href="#dataflowtemplateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataflowTemplateOperator</code><span class="sig-paren">(</span><em>template</em>, <em>dataflow_default_options=None</em>, <em>parameters=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataflowTemplateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Templated Cloud DataFlow batch job. The parameters of the operation |
| will be passed to the job. |
| It’s a good practice to define dataflow_* parameters in the default_args of the dag |
| like the project, zone and staging location.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters</a> |
| <a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a></p> |
| </div> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'europe-west1-d'</span><span class="p">,</span> |
| <span class="s1">'tempLocation'</span><span class="p">:</span> <span class="s1">'gs://my-staging-bucket/staging/'</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You need to pass the path to your dataflow template as a file reference with the |
| <code class="docutils literal notranslate"><span class="pre">template</span></code> parameter. Use <code class="docutils literal notranslate"><span class="pre">parameters</span></code> to pass on parameters to your job. |
| Use <code class="docutils literal notranslate"><span class="pre">environment</span></code> to pass on runtime environment variables to your job.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataflowTemplateOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'datapflow_example'</span><span class="p">,</span> |
| <span class="n">template</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}'</span><span class="p">,</span> |
| <span class="n">parameters</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'inputFile'</span><span class="p">:</span> <span class="s2">"gs://bucket/input/my_input.txt"</span><span class="p">,</span> |
| <span class="s1">'outputFile'</span><span class="p">:</span> <span class="s2">"gs://bucket/output/my_output.txt"</span> |
| <span class="p">},</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp-airflow-service-account'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><code class="docutils literal notranslate"><span class="pre">template</span></code>, <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">parameters</span></code> are templated so you can |
| use variables in them.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataflowTemplateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataflowpythonoperator"> |
| <span id="id45"></span><h5>DataFlowPythonOperator<a class="headerlink" href="#dataflowpythonoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowPythonOperator</code><span class="sig-paren">(</span><em>py_file</em>, <em>py_options=None</em>, <em>dataflow_default_options=None</em>, <em>options=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Create a new DataFlowPythonOperator. Note that both |
| dataflow_default_options and options will be merged to specify pipeline |
| execution parameter, and dataflow_default_options is expected to save |
| high-level options, for instances, project and zone information, which |
| apply to all dataflow operators in the DAG.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>py_file</strong> (<em>string</em>) – Reference to the python dataflow pipleline file.py, e.g., |
| /some/local/file/path/to/your/python/pipeline/file.</li> |
| <li><strong>py_options</strong> – Additional python options.</li> |
| <li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li> |
| <li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud |
| Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google |
| Cloud Platform for the dataflow job status while the job is in the |
| JOB_STATE_RUNNING state.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute the python dataflow job.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="dataflowhook"> |
| <h4>DataFlowHook<a class="headerlink" href="#dataflowhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_dataflow_hook.</code><code class="descname">DataFlowHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Dataflow service object.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-dataproc"> |
| <h3>Cloud DataProc<a class="headerlink" href="#cloud-dataproc" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="dataproc-operators"> |
| <h4>DataProc Operators<a class="headerlink" href="#dataproc-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#dataprocclustercreateoperator"><span class="std std-ref">DataprocClusterCreateOperator</span></a> : Create a new cluster on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocclusterdeleteoperator"><span class="std std-ref">DataprocClusterDeleteOperator</span></a> : Delete a cluster on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocclusterscaleoperator"><span class="std std-ref">DataprocClusterScaleOperator</span></a> : Scale up or down a cluster on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocpigoperator"><span class="std std-ref">DataProcPigOperator</span></a> : Start a Pig query Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprochiveoperator"><span class="std std-ref">DataProcHiveOperator</span></a> : Start a Hive query Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocsparksqloperator"><span class="std std-ref">DataProcSparkSqlOperator</span></a> : Start a Spark SQL query Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocsparkoperator"><span class="std std-ref">DataProcSparkOperator</span></a> : Start a Spark Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprochadoopoperator"><span class="std std-ref">DataProcHadoopOperator</span></a> : Start a Hadoop Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocpysparkoperator"><span class="std std-ref">DataProcPySparkOperator</span></a> : Start a PySpark Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateOperator</span></a> : Instantiate a WorkflowTemplate on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateinlineoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateInlineOperator</span></a> : Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.</li> |
| </ul> |
| <div class="section" id="dataprocclustercreateoperator"> |
| <span id="id46"></span><h5>DataprocClusterCreateOperator<a class="headerlink" href="#dataprocclustercreateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterCreateOperator</code><span class="sig-paren">(</span><em>cluster_name</em>, <em>project_id</em>, <em>num_workers</em>, <em>zone</em>, <em>network_uri=None</em>, <em>subnetwork_uri=None</em>, <em>internal_ip_only=None</em>, <em>tags=None</em>, <em>storage_bucket=None</em>, <em>init_actions_uris=None</em>, <em>init_action_timeout='10m'</em>, <em>metadata=None</em>, <em>custom_image=None</em>, <em>image_version=None</em>, <em>properties=None</em>, <em>master_machine_type='n1-standard-4'</em>, <em>master_disk_type='pd-standard'</em>, <em>master_disk_size=500</em>, <em>worker_machine_type='n1-standard-4'</em>, <em>worker_disk_type='pd-standard'</em>, <em>worker_disk_size=500</em>, <em>num_preemptible_workers=0</em>, <em>labels=None</em>, <em>region='global'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>service_account=None</em>, <em>service_account_scopes=None</em>, <em>idle_delete_ttl=None</em>, <em>auto_delete_time=None</em>, <em>auto_delete_ttl=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Create a new cluster on Google Cloud Dataproc. The operator will wait until the |
| creation is successful or an error occurs in the creation process.</p> |
| <p>The parameters allow to configure the cluster. Please refer to</p> |
| <p><a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters</a></p> |
| <p>for a detailed explanation on the different parameters. Most of the configuration |
| parameters detailed in the link are available as a parameter to this operator.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster to create. (templated)</li> |
| <li><strong>project_id</strong> (<em>str</em>) – The ID of the google cloud project in which |
| to create the cluster. (templated)</li> |
| <li><strong>num_workers</strong> (<em>int</em>) – The # of workers to spin up. If set to zero will |
| spin up cluster in a single node mode</li> |
| <li><strong>storage_bucket</strong> (<em>string</em>) – The storage bucket to use, setting to None lets dataproc |
| generate a custom one for you</li> |
| <li><strong>init_actions_uris</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – List of GCS uri’s containing |
| dataproc initialization scripts</li> |
| <li><strong>init_action_timeout</strong> (<em>string</em>) – Amount of time executable scripts in |
| init_actions_uris has to complete</li> |
| <li><strong>metadata</strong> (<em>dict</em>) – dict of key-value google compute engine metadata entries |
| to add to all instances</li> |
| <li><strong>image_version</strong> (<em>string</em>) – the version of software inside the Dataproc cluster</li> |
| <li><strong>custom_image</strong> – custom Dataproc image for more info see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/guides/dataproc-images">https://cloud.google.com/dataproc/docs/guides/dataproc-images</a></li> |
| <li><strong>properties</strong> (<em>dict</em>) – dict of properties to set on |
| config files (e.g. spark-defaults.conf), see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/">https://cloud.google.com/dataproc/docs/reference/rest/v1/</a> projects.regions.clusters#SoftwareConfig</li> |
| <li><strong>master_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the master node</li> |
| <li><strong>master_disk_type</strong> (<em>string</em>) – Type of the boot disk for the master node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li> |
| <li><strong>master_disk_size</strong> (<em>int</em>) – Disk size for the master node</li> |
| <li><strong>worker_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the worker nodes</li> |
| <li><strong>worker_disk_type</strong> (<em>string</em>) – Type of the boot disk for the worker node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li> |
| <li><strong>worker_disk_size</strong> (<em>int</em>) – Disk size for the worker nodes</li> |
| <li><strong>num_preemptible_workers</strong> (<em>int</em>) – The # of preemptible worker nodes to spin up</li> |
| <li><strong>labels</strong> (<em>dict</em>) – dict of labels to add to the cluster</li> |
| <li><strong>zone</strong> (<em>string</em>) – The zone where the cluster will be located. (templated)</li> |
| <li><strong>network_uri</strong> (<em>string</em>) – The network uri to be used for machine communication, cannot be |
| specified with subnetwork_uri</li> |
| <li><strong>subnetwork_uri</strong> (<em>string</em>) – The subnetwork uri to be used for machine communication, |
| cannot be specified with network_uri</li> |
| <li><strong>internal_ip_only</strong> (<em>bool</em>) – If true, all instances in the cluster will only |
| have internal IP addresses. This can only be enabled for subnetwork |
| enabled networks</li> |
| <li><strong>tags</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The GCE tags to add to all instances</li> |
| <li><strong>region</strong> – leave as ‘global’, might become relevant in the future. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>service_account</strong> (<em>string</em>) – The service account of the dataproc instances.</li> |
| <li><strong>service_account_scopes</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The URIs of service account scopes to be included.</li> |
| <li><strong>idle_delete_ttl</strong> (<em>int</em>) – The longest duration that cluster would keep alive while |
| staying idle. Passing this threshold will cause cluster to be auto-deleted. |
| A duration in seconds.</li> |
| <li><strong>auto_delete_time</strong> (<em>datetime.datetime</em>) – The time when cluster will be auto-deleted.</li> |
| <li><strong>auto_delete_ttl</strong> (<em>int</em>) – The life duration of cluster, the cluster will be |
| auto-deleted at the end of this duration. |
| A duration in seconds. (If auto_delete_time is set this parameter will be ignored)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">custom_image: string</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocclusterscaleoperator"> |
| <span id="id47"></span><h5>DataprocClusterScaleOperator<a class="headerlink" href="#dataprocclusterscaleoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterScaleOperator</code><span class="sig-paren">(</span><em>cluster_name</em>, <em>project_id</em>, <em>region='global'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>num_workers=2</em>, <em>num_preemptible_workers=0</em>, <em>graceful_decommission_timeout=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Scale, up or down, a cluster on Google Cloud Dataproc. |
| The operator will wait until the cluster is re-scaled.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataprocClusterScaleOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_scale'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">cluster_name</span><span class="o">=</span><span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="n">num_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">num_preemptible_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">graceful_decommission_timeout</span><span class="o">=</span><span class="s1">'1h'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on about scaling clusters have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters">https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to scale. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The region for the dataproc cluster. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>num_workers</strong> (<em>int</em>) – The new number of workers</li> |
| <li><strong>num_preemptible_workers</strong> (<em>int</em>) – The new number of preemptible workers</li> |
| <li><strong>graceful_decommission_timeout</strong> (<em>string</em>) – Timeout for graceful YARN decomissioning. |
| Maximum value is 1d</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocclusterdeleteoperator"> |
| <span id="id48"></span><h5>DataprocClusterDeleteOperator<a class="headerlink" href="#dataprocclusterdeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterDeleteOperator</code><span class="sig-paren">(</span><em>cluster_name</em>, <em>project_id</em>, <em>region='global'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Delete a cluster on Google Cloud Dataproc. The operator will wait until the |
| cluster is destroyed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to create. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocpigoperator"> |
| <span id="id49"></span><h5>DataProcPigOperator<a class="headerlink" href="#dataprocpigoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPigOperator</code><span class="sig-paren">(</span><em>query=None</em>, <em>query_uri=None</em>, <em>variables=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_pig_properties=None</em>, <em>dataproc_pig_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation |
| will be passed to the cluster.</p> |
| <p>It’s a good practice to define dataproc_* parameters in the default_args of the dag |
| like the cluster name and UDFs.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'cluster_name'</span><span class="p">:</span> <span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="s1">'dataproc_pig_jars'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s1">'gs://example/udf/jar/datafu/1.2.0/datafu.jar'</span><span class="p">,</span> |
| <span class="s1">'gs://example/udf/jar/gpig/1.2/gpig.jar'</span> |
| <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You can pass a pig script as string or file reference. Use variables to pass on |
| variables for the pig script to be resolved on the cluster or use the parameters to |
| be resolved in the script as template parameters.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataProcPigOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_pig'</span><span class="p">,</span> |
| <span class="n">query</span><span class="o">=</span><span class="s1">'a_pig_script.pig'</span><span class="p">,</span> |
| <span class="n">variables</span><span class="o">=</span><span class="p">{</span><span class="s1">'out'</span><span class="p">:</span> <span class="s1">'gs://example/output/{{ds}}'</span><span class="p">},</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on about job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs">https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query |
| file (pg or pig extension). (templated)</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a pig script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_pig_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_pig_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for |
| UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprochiveoperator"> |
| <span id="id50"></span><h5>DataProcHiveOperator<a class="headerlink" href="#dataprochiveoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHiveOperator</code><span class="sig-paren">(</span><em>query=None</em>, <em>query_uri=None</em>, <em>variables=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_hive_properties=None</em>, <em>dataproc_hive_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Hive query Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension).</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a hive script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query.</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This name by default |
| is the task_id appended with the execution data, but can be templated. The |
| name will always be appended with a random number to avoid name clashes.</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li> |
| <li><strong>dataproc_hive_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_hive_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for |
| UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocsparksqloperator"> |
| <span id="id51"></span><h5>DataProcSparkSqlOperator<a class="headerlink" href="#dataprocsparksqloperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkSqlOperator</code><span class="sig-paren">(</span><em>query=None</em>, <em>query_uri=None</em>, <em>variables=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_spark_properties=None</em>, <em>dataproc_spark_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Spark SQL query Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension). (templated)</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a spark sql script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocsparkoperator"> |
| <span id="id52"></span><h5>DataProcSparkOperator<a class="headerlink" href="#dataprocsparkoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkOperator</code><span class="sig-paren">(</span><em>main_jar=None</em>, <em>main_class=None</em>, <em>arguments=None</em>, <em>archives=None</em>, <em>files=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_spark_properties=None</em>, <em>dataproc_spark_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Spark Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or |
| the main_class, not both together).</li> |
| <li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both |
| together).</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprochadoopoperator"> |
| <span id="id53"></span><h5>DataProcHadoopOperator<a class="headerlink" href="#dataprochadoopoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHadoopOperator</code><span class="sig-paren">(</span><em>main_jar=None</em>, <em>main_class=None</em>, <em>arguments=None</em>, <em>archives=None</em>, <em>files=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_hadoop_properties=None</em>, <em>dataproc_hadoop_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Hadoop Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or |
| the main_class, not both together).</li> |
| <li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both |
| together).</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_hadoop_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_hadoop_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocpysparkoperator"> |
| <span id="id54"></span><h5>DataProcPySparkOperator<a class="headerlink" href="#dataprocpysparkoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPySparkOperator</code><span class="sig-paren">(</span><em>main</em>, <em>arguments=None</em>, <em>archives=None</em>, <em>pyfiles=None</em>, <em>files=None</em>, <em>job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em>cluster_name='cluster-1'</em>, <em>dataproc_pyspark_properties=None</em>, <em>dataproc_pyspark_jars=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>region='global'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a PySpark Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>main</strong> (<em>string</em>) – [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main |
| Python file to use as the driver. Must be a .py file.</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>pyfiles</strong> (<em>list</em>) – List of Python files to pass to the PySpark framework. |
| Supported file types: .py, .egg, and .zip</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li> |
| <li><strong>dataproc_pyspark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_pyspark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>region</strong> (<em>string</em>) – The specified region where the dataproc cluster is created.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocworkflowtemplateinstantiateoperator"> |
| <span id="id55"></span><h5>DataprocWorkflowTemplateInstantiateOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateOperator</code><span class="sig-paren">(</span><em>template_id</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait |
| until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template_id</strong> (<em>string</em>) – The id of the template. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the template runs</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocworkflowtemplateinstantiateinlineoperator"> |
| <span id="id56"></span><h5>DataprocWorkflowTemplateInstantiateInlineOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateinlineoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateInlineOperator</code><span class="sig-paren">(</span><em>template</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will |
| wait until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template</strong> (<em>map</em>) – The template contents. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the template runs</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="cloud-datastore"> |
| <h3>Cloud Datastore<a class="headerlink" href="#cloud-datastore" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="datastore-operators"> |
| <h4>Datastore Operators<a class="headerlink" href="#datastore-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#datastoreexportoperator"><span class="std std-ref">DatastoreExportOperator</span></a> : Export entities from Google Cloud Datastore to Cloud Storage.</li> |
| <li><a class="reference internal" href="#datastoreimportoperator"><span class="std std-ref">DatastoreImportOperator</span></a> : Import entities from Cloud Storage to Google Cloud Datastore.</li> |
| </ul> |
| <div class="section" id="datastoreexportoperator"> |
| <span id="id57"></span><h5>DatastoreExportOperator<a class="headerlink" href="#datastoreexportoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_export_operator.</code><code class="descname">DatastoreExportOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>namespace=None</em>, <em>datastore_conn_id='google_cloud_default'</em>, <em>cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>entity_filter=None</em>, <em>labels=None</em>, <em>polling_interval_in_seconds=10</em>, <em>overwrite_existing=False</em>, <em>xcom_push=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_export_operator.html#DatastoreExportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Export entities from Google Cloud Datastore to Cloud Storage</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – name of the cloud storage bucket to backup data</li> |
| <li><strong>namespace</strong> (<em>str</em>) – optional namespace path in the specified Cloud Storage bucket |
| to backup data. If this namespace does not exist in GCS, it will be created.</li> |
| <li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the Datastore connection id to use</li> |
| <li><strong>cloud_storage_conn_id</strong> (<em>string</em>) – the name of the cloud storage connection id to |
| force-write backup</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in the |
| export, refer to |
| <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li> |
| <li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li> |
| <li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for |
| execution status again</li> |
| <li><strong>overwrite_existing</strong> (<em>bool</em>) – if the storage bucket + namespace is not empty, it will be |
| emptied prior to exports. This enables overwriting existing backups.</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_export_operator.html#DatastoreExportOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="datastoreimportoperator"> |
| <span id="id58"></span><h5>DatastoreImportOperator<a class="headerlink" href="#datastoreimportoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_import_operator.</code><code class="descname">DatastoreImportOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>file</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em>, <em>datastore_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>polling_interval_in_seconds=10</em>, <em>xcom_push=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_import_operator.html#DatastoreImportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Import entities from Cloud Storage to Google Cloud Datastore</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – container in Cloud Storage to store data</li> |
| <li><strong>file</strong> (<em>string</em>) – path of the backup metadata file in the specified Cloud Storage bucket. |
| It should have the extension .overall_export_metadata</li> |
| <li><strong>namespace</strong> (<em>str</em>) – optional namespace of the backup metadata file in |
| the specified Cloud Storage bucket.</li> |
| <li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in |
| the export, refer to |
| <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li> |
| <li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li> |
| <li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the connection id to use</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for |
| execution status again</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_import_operator.html#DatastoreImportOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="datastorehook"> |
| <h4>DatastoreHook<a class="headerlink" href="#datastorehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.datastore_hook.</code><code class="descname">DatastoreHook</code><span class="sig-paren">(</span><em>datastore_conn_id='google_cloud_datastore_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Interact with Google Cloud Datastore. This hook uses the Google Cloud Platform |
| connection.</p> |
| <p>This object is not threads safe. If you want to make multiple requests |
| simultaneously, you will need to create a hook per thread.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids"> |
| <code class="descname">allocate_ids</code><span class="sig-paren">(</span><em>partialKeys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.allocate_ids"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Allocate IDs for incomplete keys. |
| see <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>partialKeys</strong> – a list of partial keys</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">a list of full keys.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction"> |
| <code class="descname">begin_transaction</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.begin_transaction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a new transaction handle</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction</a></p> |
| </div> |
| </div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">a transaction handle</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.commit"> |
| <code class="descname">commit</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.commit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.commit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Commit a transaction, optionally creating, deleting or modifying some entities.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the commit request</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the response body of the commit request</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation"> |
| <code class="descname">delete_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.delete_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the long-running operation</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket"> |
| <code class="descname">export_to_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.export_to_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Export entities from Cloud Datastore to Cloud Storage for backup</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><em>version='v1'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Storage service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation"> |
| <code class="descname">get_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the latest state of a long-running operation</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket"> |
| <code class="descname">import_from_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>file</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.import_from_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Import a backup from Cloud Storage to Cloud Datastore</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup"> |
| <code class="descname">lookup</code><span class="sig-paren">(</span><em>keys</em>, <em>read_consistency=None</em>, <em>transaction=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.lookup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lookup some entities by key</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>keys</strong> – the keys to lookup</li> |
| <li><strong>read_consistency</strong> – the read consistency to use. default, strong or eventual. |
| Cannot be used with a transaction.</li> |
| <li><strong>transaction</strong> – the transaction to use, if any.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">the response body of the lookup request.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done"> |
| <code class="descname">poll_operation_until_done</code><span class="sig-paren">(</span><em>name</em>, <em>polling_interval_in_seconds</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.poll_operation_until_done"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Poll backup operation state until it’s completed</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback"> |
| <code class="descname">rollback</code><span class="sig-paren">(</span><em>transaction</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.rollback"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Roll back a transaction</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>transaction</strong> – the transaction to roll back</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query"> |
| <code class="descname">run_query</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.run_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run a query for entities.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the query request</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the batch of query results.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-ml-engine"> |
| <h3>Cloud ML Engine<a class="headerlink" href="#cloud-ml-engine" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-ml-engine-operators"> |
| <h4>Cloud ML Engine Operators<a class="headerlink" href="#cloud-ml-engine-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#mlenginebatchpredictionoperator"><span class="std std-ref">MLEngineBatchPredictionOperator</span></a> : Start a Cloud ML Engine batch prediction job.</li> |
| <li><a class="reference internal" href="#mlenginemodeloperator"><span class="std std-ref">MLEngineModelOperator</span></a> : Manages a Cloud ML Engine model.</li> |
| <li><a class="reference internal" href="#mlenginetrainingoperator"><span class="std std-ref">MLEngineTrainingOperator</span></a> : Start a Cloud ML Engine training job.</li> |
| <li><a class="reference internal" href="#mlengineversionoperator"><span class="std std-ref">MLEngineVersionOperator</span></a> : Manages a Cloud ML Engine model version.</li> |
| </ul> |
| <div class="section" id="mlenginebatchpredictionoperator"> |
| <span id="id59"></span><h5>MLEngineBatchPredictionOperator<a class="headerlink" href="#mlenginebatchpredictionoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineBatchPredictionOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>job_id</em>, <em>region</em>, <em>data_format</em>, <em>input_paths</em>, <em>output_path</em>, <em>model_name=None</em>, <em>version_name=None</em>, <em>uri=None</em>, <em>max_worker_count=None</em>, <em>runtime_version=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineBatchPredictionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Google Cloud ML Engine prediction job.</p> |
| <p>NOTE: For model origin, users should consider exactly one from the |
| three options below: |
| 1. Populate ‘uri’ field only, which should be a GCS location that |
| points to a tensorflow savedModel directory. |
| 2. Populate ‘model_name’ field only, which refers to an existing |
| model, and the default version of the model will be used. |
| 3. Populate both ‘model_name’ and ‘version_name’ fields, which |
| refers to a specific version of a specific model.</p> |
| <p>In options 2 and 3, both model and version name should contain the |
| minimal identifier. For instance, call</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MLEngineBatchPredictionOperator</span><span class="p">(</span> |
| <span class="o">...</span><span class="p">,</span> |
| <span class="n">model_name</span><span class="o">=</span><span class="s1">'my_model'</span><span class="p">,</span> |
| <span class="n">version_name</span><span class="o">=</span><span class="s1">'my_version'</span><span class="p">,</span> |
| <span class="o">...</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>if the desired model version is |
| “projects/my_project/models/my_model/versions/my_version”.</p> |
| <p>See <a class="reference external" href="https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs">https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs</a> |
| for further documentation on the parameters.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name where the |
| prediction job is submitted. (templated)</li> |
| <li><strong>job_id</strong> (<em>string</em>) – A unique id for the prediction job on Google Cloud |
| ML Engine. (templated)</li> |
| <li><strong>data_format</strong> (<em>string</em>) – The format of the input data. |
| It will default to ‘DATA_FORMAT_UNSPECIFIED’ if is not provided |
| or is not one of [“TEXT”, “TF_RECORD”, “TF_RECORD_GZIP”].</li> |
| <li><strong>input_paths</strong> (<em>list of string</em>) – A list of GCS paths of input data for batch |
| prediction. Accepting wildcard operator <a href="#id60"><span class="problematic" id="id61">*</span></a>, but only at the end. (templated)</li> |
| <li><strong>output_path</strong> (<em>string</em>) – The GCS path where the prediction results are |
| written to. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the |
| prediction job in. (templated)</li> |
| <li><strong>model_name</strong> (<em>string</em>) – The Google Cloud ML Engine model to use for prediction. |
| If version_name is not provided, the default version of this |
| model will be used. |
| Should not be None if version_name is provided. |
| Should be None if uri is provided. (templated)</li> |
| <li><strong>version_name</strong> (<em>string</em>) – The Google Cloud ML Engine model version to use for |
| prediction. |
| Should be None if uri is provided. (templated)</li> |
| <li><strong>uri</strong> (<em>string</em>) – The GCS path of the saved model to use for prediction. |
| Should be None if model_name is provided. |
| It should be a GCS path pointing to a tensorflow SavedModel. (templated)</li> |
| <li><strong>max_worker_count</strong> (<em>int</em>) – The maximum number of workers to be used |
| for parallel processing. Defaults to 10 if not specified.</li> |
| <li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML Engine runtime version to use |
| for batch prediction.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID used for connection to Google |
| Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must |
| have doamin-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Raises:</dt> |
| <dd><code class="docutils literal notranslate"><span class="pre">ValueError</span></code>: if a unique model/version origin cannot be determined.</dd> |
| </dl> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineBatchPredictionOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="mlenginemodeloperator"> |
| <span id="id62"></span><h5>MLEngineModelOperator<a class="headerlink" href="#mlenginemodeloperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineModelOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineModelOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>model</em>, <em>operation='create'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineModelOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for managing a Google Cloud ML Engine model.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine |
| model belongs. (templated)</li> |
| <li><strong>model</strong> (<em>dict</em>) – <p>A dictionary containing the information about the model. |
| If the <cite>operation</cite> is <cite>create</cite>, then the <cite>model</cite> parameter should |
| contain all the information about this model such as <cite>name</cite>.</p> |
| <p>If the <cite>operation</cite> is <cite>get</cite>, the <cite>model</cite> parameter |
| should contain the <cite>name</cite> of the model.</p> |
| </li> |
| <li><strong>operation</strong> – <p>The operation to perform. Available operations are:</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new model as provided by the <cite>model</cite> parameter.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets a particular model where the name is specified in <cite>model</cite>.</li> |
| </ul> |
| </li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineModelOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineModelOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineModelOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="mlenginetrainingoperator"> |
| <span id="id63"></span><h5>MLEngineTrainingOperator<a class="headerlink" href="#mlenginetrainingoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineTrainingOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>job_id</em>, <em>package_uris</em>, <em>training_python_module</em>, <em>training_args</em>, <em>region</em>, <em>scale_tier=None</em>, <em>runtime_version=None</em>, <em>python_version=None</em>, <em>job_dir=None</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>mode='PRODUCTION'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for launching a MLEngine training job.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name within which MLEngine |
| training job should run (templated).</li> |
| <li><strong>job_id</strong> (<em>string</em>) – A unique templated id for the submitted Google MLEngine |
| training job. (templated)</li> |
| <li><strong>package_uris</strong> (<em>string</em>) – A list of package locations for MLEngine training job, |
| which should include the main training program + any additional |
| dependencies. (templated)</li> |
| <li><strong>training_python_module</strong> (<em>string</em>) – The Python module name to run within MLEngine |
| training job after installing ‘package_uris’ packages. (templated)</li> |
| <li><strong>training_args</strong> (<em>string</em>) – A list of templated command line arguments to pass to |
| the MLEngine training program. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the MLEngine training |
| job in (templated).</li> |
| <li><strong>scale_tier</strong> (<em>string</em>) – Resource tier for MLEngine training job. (templated)</li> |
| <li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML runtime version to use for |
| training. (templated)</li> |
| <li><strong>python_version</strong> (<em>string</em>) – The version of Python used in training. (templated)</li> |
| <li><strong>job_dir</strong> (<em>string</em>) – A Google Cloud Storage path in which to store training |
| outputs and other data needed for training. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>mode</strong> (<em>string</em>) – Can be one of ‘DRY_RUN’/’CLOUD’. In ‘DRY_RUN’ mode, no real |
| training job will be launched, but the MLEngine training job request |
| will be printed out. In ‘CLOUD’ mode, a real MLEngine training job |
| creation request will be issued.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineTrainingOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="mlengineversionoperator"> |
| <span id="id64"></span><h5>MLEngineVersionOperator<a class="headerlink" href="#mlengineversionoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineVersionOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name=None</em>, <em>version=None</em>, <em>operation='create'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineVersionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for managing a Google Cloud ML Engine version.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine |
| model belongs.</li> |
| <li><strong>model_name</strong> (<em>string</em>) – The name of the Google Cloud ML Engine model that the version |
| belongs to. (templated)</li> |
| <li><strong>version_name</strong> (<em>string</em>) – A name to use for the version being operated upon. |
| If not None and the <cite>version</cite> argument is None or does not have a value for |
| the <cite>name</cite> key, then this will be populated in the payload for the |
| <cite>name</cite> key. (templated)</li> |
| <li><strong>version</strong> (<em>dict</em>) – A dictionary containing the information about the version. |
| If the <cite>operation</cite> is <cite>create</cite>, <cite>version</cite> should contain all the |
| information about this version such as name, and deploymentUrl. |
| If the <cite>operation</cite> is <cite>get</cite> or <cite>delete</cite>, the <cite>version</cite> parameter |
| should contain the <cite>name</cite> of the version. |
| If it is None, the only <cite>operation</cite> possible would be <cite>list</cite>. (templated)</li> |
| <li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new version in the model specified by <cite>model_name</cite>, |
| in which case the <cite>version</cite> parameter should contain all the |
| information to create that version |
| (e.g. <cite>name</cite>, <cite>deploymentUrl</cite>).</li> |
| <li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets full information of a particular version in the model |
| specified by <cite>model_name</cite>. |
| The name of the version should be specified in the <cite>version</cite> |
| parameter.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">list</span></code>: Lists all available versions of the model specified |
| by <cite>model_name</cite>.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">delete</span></code>: Deletes the version specified in <cite>version</cite> parameter from the |
| model specified by <cite>model_name</cite>). |
| The name of the version should be specified in the <cite>version</cite> |
| parameter.</li> |
| </ul> |
| </li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineVersionOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-ml-engine-hook"> |
| <h4>Cloud ML Engine Hook<a class="headerlink" href="#cloud-ml-engine-hook" title="Permalink to this headline">¶</a></h4> |
| <div class="section" id="mlenginehook"> |
| <span id="id65"></span><h5>MLEngineHook<a class="headerlink" href="#mlenginehook" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_mlengine_hook.</code><code class="descname">MLEngineHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job"> |
| <code class="descname">create_job</code><span class="sig-paren">(</span><em>project_id</em>, <em>job</em>, <em>use_existing_job_fn=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Launches a MLEngine job and wait for it to reach a terminal state.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project id within which MLEngine |
| job will be launched.</li> |
| <li><strong>job</strong> (<em>dict</em>) – <p>MLEngine Job object that should be provided to the MLEngine |
| API, such as:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s1">'jobId'</span><span class="p">:</span> <span class="s1">'my_job_id'</span><span class="p">,</span> |
| <span class="s1">'trainingInput'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'scaleTier'</span><span class="p">:</span> <span class="s1">'STANDARD_1'</span><span class="p">,</span> |
| <span class="o">...</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><strong>use_existing_job_fn</strong> (<em>function</em>) – In case that a MLEngine job with the same |
| job_id already exist, this method (if provided) will decide whether |
| we should use this existing job, continue waiting for it to finish |
| and returning the job object. It should accepts a MLEngine job |
| object, and returns a boolean value indicating whether it is OK to |
| reuse the existing job. If ‘use_existing_job_fn’ is not provided, |
| we by default reuse the existing MLEngine job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The MLEngine job object if the job successfully reach a |
| terminal state (which might be FAILED or CANCELLED state).</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model"> |
| <code class="descname">create_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a Model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version"> |
| <code class="descname">create_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_spec</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates the Version on Google Cloud ML Engine.</p> |
| <p>Returns the operation if the version was created successfully and |
| raises an error otherwise.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version"> |
| <code class="descname">delete_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.delete_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the given version of a model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google MLEngine service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model"> |
| <code class="descname">get_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets a Model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions"> |
| <code class="descname">list_versions</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.list_versions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists all available versions of a model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version"> |
| <code class="descname">set_default_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.set_default_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets a version to be the default. Blocks until finished.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="cloud-storage"> |
| <h3>Cloud Storage<a class="headerlink" href="#cloud-storage" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="storage-operators"> |
| <h4>Storage Operators<a class="headerlink" href="#storage-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#filetogooglecloudstorageoperator"><span class="std std-ref">FileToGoogleCloudStorageOperator</span></a> : Uploads a file to Google Cloud Storage.</li> |
| <li><a class="reference internal" href="#googlecloudstoragecreatebucketoperator"><span class="std std-ref">GoogleCloudStorageCreateBucketOperator</span></a> : Creates a new cloud storage bucket.</li> |
| <li><a class="reference internal" href="#googlecloudstoragelistoperator"><span class="std std-ref">GoogleCloudStorageListOperator</span></a> : List all objects from the bucket with the give string prefix and delimiter in name.</li> |
| <li><a class="reference internal" href="#googlecloudstoragedownloadoperator"><span class="std std-ref">GoogleCloudStorageDownloadOperator</span></a> : Downloads a file from Google Cloud Storage.</li> |
| <li><a class="reference internal" href="#googlecloudstoragetobigqueryoperator"><span class="std std-ref">GoogleCloudStorageToBigQueryOperator</span></a> : Loads files from Google cloud storage into BigQuery.</li> |
| <li><a class="reference internal" href="#googlecloudstoragetogooglecloudstorageoperator"><span class="std std-ref">GoogleCloudStorageToGoogleCloudStorageOperator</span></a> : Copies objects from a bucket to another, with renaming if requested.</li> |
| </ul> |
| <div class="section" id="filetogooglecloudstorageoperator"> |
| <span id="id66"></span><h5>FileToGoogleCloudStorageOperator<a class="headerlink" href="#filetogooglecloudstorageoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_gcs.</code><code class="descname">FileToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>src</em>, <em>dst</em>, <em>bucket</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>mime_type='application/octet-stream'</em>, <em>delegate_to=None</em>, <em>gzip=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Uploads a file to Google Cloud Storage. |
| Optionally can compress the file for upload.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>src</strong> (<em>string</em>) – Path to the local file. (templated)</li> |
| <li><strong>dst</strong> (<em>string</em>) – Destination path within the specified bucket. (templated)</li> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to. (templated)</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The Airflow connection ID to upload with</li> |
| <li><strong>mime_type</strong> (<em>string</em>) – The mime-type string</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any</li> |
| <li><strong>gzip</strong> (<em>bool</em>) – Allows for file to be compressed and uploaded as gzip</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads the file to Google cloud storage</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragecreatebucketoperator"> |
| <span id="id67"></span><h5>GoogleCloudStorageCreateBucketOperator<a class="headerlink" href="#googlecloudstoragecreatebucketoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_operator.</code><code class="descname">GoogleCloudStorageCreateBucketOperator</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>storage_class='MULTI_REGIONAL'</em>, <em>location='US'</em>, <em>project_id=None</em>, <em>labels=None</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_operator.html#GoogleCloudStorageCreateBucketOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new bucket. Google Cloud Storage uses a flat namespace, |
| so you can’t create a bucket with a name that is already in use.</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information, see Bucket Naming Guidelines: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p> |
| </div> |
| </div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket. (templated)</li> |
| <li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored |
| and determines the SLA and the cost of storage (templated). Values include</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li> |
| </ul> |
| <p>If this value is not specified when the bucket is |
| created, it will default to STANDARD.</p> |
| </li> |
| <li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. (templated) |
| Object data for objects in the bucket resides in physical storage |
| within this region. Defaults to US.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p> |
| </div> |
| </li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project. (templated)</li> |
| <li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must |
| have domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would create a new bucket <code class="docutils literal notranslate"><span class="pre">test-bucket</span></code> |
| with <code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code> storage class in <code class="docutils literal notranslate"><span class="pre">EU</span></code> region</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateBucket</span> <span class="o">=</span> <span class="n">GoogleCloudStorageCreateBucketOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'CreateNewBucket'</span><span class="p">,</span> |
| <span class="n">bucket_name</span><span class="o">=</span><span class="s1">'test-bucket'</span><span class="p">,</span> |
| <span class="n">storage_class</span><span class="o">=</span><span class="s1">'MULTI_REGIONAL'</span><span class="p">,</span> |
| <span class="n">location</span><span class="o">=</span><span class="s1">'EU'</span><span class="p">,</span> |
| <span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">'env'</span><span class="p">:</span> <span class="s1">'dev'</span><span class="p">,</span> <span class="s1">'team'</span><span class="p">:</span> <span class="s1">'airflow'</span><span class="p">},</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_operator.html#GoogleCloudStorageCreateBucketOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragedownloadoperator"> |
| <span id="id68"></span><h5>GoogleCloudStorageDownloadOperator<a class="headerlink" href="#googlecloudstoragedownloadoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_download_operator.</code><code class="descname">GoogleCloudStorageDownloadOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename=None</em>, <em>store_to_xcom_key=None</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_download_operator.html#GoogleCloudStorageDownloadOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Downloads a file from Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is. (templated)</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to download in the Google cloud |
| storage bucket. (templated)</li> |
| <li><strong>filename</strong> (<em>string</em>) – The file path on the local file system (where the |
| operator is being executed) that the file should be downloaded to. (templated) |
| If no filename passed, the downloaded data will not be stored on the local file |
| system.</li> |
| <li><strong>store_to_xcom_key</strong> (<em>string</em>) – If this param is set, the operator will push |
| the contents of the downloaded file to XCom with the key set in this |
| parameter. If not set, the downloaded data will not be pushed to XCom. (templated)</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_download_operator.html#GoogleCloudStorageDownloadOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragelistoperator"> |
| <span id="id69"></span><h5>GoogleCloudStorageListOperator<a class="headerlink" href="#googlecloudstoragelistoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_list_operator.</code><code class="descname">GoogleCloudStorageListOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>prefix=None</em>, <em>delimiter=None</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_list_operator.html#GoogleCloudStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all objects from the bucket with the give string prefix and delimiter in name.</p> |
| <dl class="docutils"> |
| <dt>This operator returns a python list with the name of objects which can be used by</dt> |
| <dd><cite>xcom</cite> in the downstream task.</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with |
| this prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – The delimiter by which you want to filter the objects. (templated) |
| For e.g to lists the CSV files from in a directory in GCS you would use |
| delimiter=’.csv’.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would list all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">GCS_Files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'GCS_Files'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'sales/sales-2017/'</span><span class="p">,</span> |
| <span class="n">delimiter</span><span class="o">=</span><span class="s1">'.avro'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_list_operator.html#GoogleCloudStorageListOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragetobigqueryoperator"> |
| <span id="id70"></span><h5>GoogleCloudStorageToBigQueryOperator<a class="headerlink" href="#googlecloudstoragetobigqueryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_bq.</code><code class="descname">GoogleCloudStorageToBigQueryOperator</code><span class="sig-paren">(</span><em>bucket</em>, <em>source_objects</em>, <em>destination_project_dataset_table</em>, <em>schema_fields=None</em>, <em>schema_object=None</em>, <em>source_format='CSV'</em>, <em>compression='NONE'</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>skip_leading_rows=0</em>, <em>write_disposition='WRITE_EMPTY'</em>, <em>field_delimiter='</em>, <em>'</em>, <em>max_bad_records=0</em>, <em>quote_character=None</em>, <em>ignore_unknown_values=False</em>, <em>allow_quoted_newlines=False</em>, <em>allow_jagged_rows=False</em>, <em>max_id_key=None</em>, <em>bigquery_conn_id='bigquery_default'</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>schema_update_options=()</em>, <em>src_fmt_configs={}</em>, <em>external_table=False</em>, <em>time_partitioning={}</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_bq.html#GoogleCloudStorageToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Loads files from Google cloud storage into BigQuery.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to load from. (templated)</li> |
| <li><strong>source_objects</strong> – List of Google cloud storage URIs to load from. (templated) |
| If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (<project>.)<dataset>.<table> |
| BigQuery table to load data into. If <project> is not included, |
| project will be the project defined in the connection json. (templated)</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load</a> |
| Should not be set when source_format is ‘DATASTORE_BACKUP’.</li> |
| <li><strong>schema_object</strong> – If set, a GCS object path pointing to a .json file that |
| contains the schema for the table. (templated)</li> |
| <li><strong>schema_object</strong> – string</li> |
| <li><strong>source_format</strong> (<em>string</em>) – File format to export.</li> |
| <li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source. |
| Possible values include GZIP and NONE. |
| The default value is NONE. |
| This setting is ignored for Google Cloud Bigtable, |
| Google Cloud Datastore backups and Avro formats.</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li> |
| <li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when loading from a CSV.</li> |
| <li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can |
| ignore when running the job.</li> |
| <li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li> |
| <li><strong>ignore_unknown_values</strong> (<em>bool</em>) – [Optional] Indicates if BigQuery should allow |
| extra values that are not represented in the table schema. |
| If true, the extra values are ignored. If false, records with extra columns |
| are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result.</li> |
| <li><strong>allow_quoted_newlines</strong> (<em>boolean</em>) – Whether to allow quoted newlines (true) or not (false).</li> |
| <li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns. |
| The missing values are treated as nulls. If false, records with missing trailing |
| columns are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result. Only applicable to CSV, ignored |
| for other formats.</li> |
| <li><strong>max_id_key</strong> (<em>string</em>) – If set, the name of a column in the BigQuery table |
| that’s to be loaded. Thsi will be used to select the MAX value from |
| BigQuery after the load occurs. The results will be returned by the |
| execute() command, which in turn gets stored in XCom for future |
| operators to use. This can be helpful with incremental loads–during |
| future executions, you can pick up from the max ID.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>schema_update_options</strong> (<em>list</em>) – Allows the schema of the destination |
| table to be updated as a side effect of the load job.</li> |
| <li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li> |
| <li><strong>external_table</strong> (<em>bool</em>) – Flag to specify if the destination table should be |
| a BigQuery external table. Default Value is False.</li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications. |
| Note that ‘field’ is not available in concurrency with |
| dataset.table$partition.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_bq.html#GoogleCloudStorageToBigQueryOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragetogooglecloudstorageoperator"> |
| <span id="id71"></span><h5>GoogleCloudStorageToGoogleCloudStorageOperator<a class="headerlink" href="#googlecloudstoragetogooglecloudstorageoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket=None</em>, <em>destination_object=None</em>, <em>move_object=False</em>, <em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs.html#GoogleCloudStorageToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies objects from a bucket to another, with renaming if requested.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The source Google cloud storage bucket where the |
| object is. (templated)</li> |
| <li><strong>source_object</strong> (<em>string</em>) – <p>The source name of the object to copy in the Google cloud |
| storage bucket. (templated) |
| If wildcards are used in this argument:</p> |
| <blockquote> |
| <div>You can use only one wildcard for objects (filenames) within your |
| bucket. The wildcard can appear inside the object name or at the |
| end of the object name. Appending a wildcard to the bucket name is |
| unsupported.</div></blockquote> |
| </li> |
| <li><strong>destination_bucket</strong> – The destination Google cloud storage bucket</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>where the object should be. (templated) |
| :type destination_bucket: string |
| :param destination_object: The destination name of the object in the</p> |
| <blockquote> |
| <div>destination Google cloud storage bucket. (templated) |
| If a wildcard is supplied in the source_object argument, this is the |
| prefix that will be prepended to the final destination objects’ paths. |
| Note that the source path’s part before the wildcard will be removed; |
| if it needs to be retained it should be appended to destination_object. |
| For example, with prefix <code class="docutils literal notranslate"><span class="pre">foo/*</span></code> and destination_object <cite>‘blah/`</cite>, the |
| file <code class="docutils literal notranslate"><span class="pre">foo/baz</span></code> will be copied to <code class="docutils literal notranslate"><span class="pre">blah/baz</span></code>; to retain the prefix write |
| the destination_object as e.g. <code class="docutils literal notranslate"><span class="pre">blah/foo</span></code>, in which case the copied file |
| will be named <code class="docutils literal notranslate"><span class="pre">blah/foo/baz</span></code>.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>move_object</strong> – When move object is True, the object is moved instead</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>of copied to the new location.</dt> |
| <dd>This is the equivalent of a mv command as opposed to a |
| cp command.</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Examples</strong>:</dt> |
| <dd><p class="first">The following Operator would copy a single file named |
| <code class="docutils literal notranslate"><span class="pre">sales/sales-2017/january.avro</span></code> in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the file named |
| <code class="docutils literal notranslate"><span class="pre">copied_sales/2017/january-backup.avro`</span> <span class="pre">in</span> <span class="pre">the</span> <span class="pre">``data_backup</span></code> bucket</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_single_file'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/january.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="s1">'copied_sales/2017/january-backup.avro'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would copy all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the |
| <code class="docutils literal notranslate"><span class="pre">copied_sales/2017</span></code> folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_files'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/*.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="s1">'copied_sales/2017/'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would move all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the |
| same folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket, deleting the original files in the |
| process.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">move_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'move_files'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/*.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">move_object</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs.html#GoogleCloudStorageToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="googlecloudstoragehook"> |
| <h4>GoogleCloudStorageHook<a class="headerlink" href="#googlecloudstoragehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcs_hook.</code><code class="descname">GoogleCloudStorageHook</code><span class="sig-paren">(</span><em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Interact with Google Cloud Storage. This hook uses the Google Cloud Platform |
| connection.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy"> |
| <code class="descname">copy</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket=None</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.copy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Copies an object from a bucket to another, with renaming if requested.</p> |
| <p>destination_bucket or destination_object can be omitted, in which case |
| source bucket/object is used, but not both.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li> |
| <li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to. |
| Can be omitted; then the same bucket is used.</li> |
| <li><strong>destination_object</strong> – The (renamed) path of the object if given. |
| Can be omitted; then the same name is used.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket"> |
| <code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>storage_class='MULTI_REGIONAL'</em>, <em>location='US'</em>, <em>project_id=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new bucket. Google Cloud Storage uses a flat namespace, so |
| you can’t create a bucket with a name that is already in use.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information, see Bucket Naming Guidelines: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket.</li> |
| <li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored |
| and determines the SLA and the cost of storage. Values include</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li> |
| </ul> |
| <p>If this value is not specified when the bucket is |
| created, it will default to STANDARD.</p> |
| </li> |
| <li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. |
| Object data for objects in the bucket resides in physical storage |
| within this region. Defaults to US.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p> |
| </div> |
| </li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">If successful, it returns the <code class="docutils literal notranslate"><span class="pre">id</span></code> of the bucket.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete"> |
| <code class="descname">delete</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>generation=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.delete"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete an object if versioning is not enabled for the bucket, or if generation |
| parameter is used.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – name of the bucket, where the object resides</li> |
| <li><strong>object</strong> (<em>string</em>) – name of the object to delete</li> |
| <li><strong>generation</strong> (<em>string</em>) – if present, permanently delete the object of this generation</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if succeeded</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download"> |
| <code class="descname">download</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.download"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a file from Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to fetch from.</li> |
| <li><strong>object</strong> (<em>string</em>) – The object to fetch.</li> |
| <li><strong>filename</strong> (<em>string</em>) – If set, a local file path where the file should be written to.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists"> |
| <code class="descname">exists</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks for the existence of a file in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Storage service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c"> |
| <code class="descname">get_crc32c</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_crc32c"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the CRC32c checksum of an object in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash"> |
| <code class="descname">get_md5hash</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_md5hash"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the MD5 hash of an object in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size"> |
| <code class="descname">get_size</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_size"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the size of a file in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after"> |
| <code class="descname">is_updated_after</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>ts</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.is_updated_after"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if an object is updated in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| <li><strong>ts</strong> (<em>datetime</em>) – The timestamp to check against.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list"> |
| <code class="descname">list</code><span class="sig-paren">(</span><em>bucket</em>, <em>versions=None</em>, <em>maxResults=None</em>, <em>prefix=None</em>, <em>delimiter=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list" title="Permalink to this definition">¶</a></dt> |
| <dd><p>List all objects from the bucket with the give string prefix in name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – bucket name</li> |
| <li><strong>versions</strong> (<em>boolean</em>) – if true, list all versions of the objects</li> |
| <li><strong>maxResults</strong> (<em>integer</em>) – max count of items to return in a single page of responses</li> |
| <li><strong>prefix</strong> (<em>string</em>) – prefix string which filters objects whose name begin with |
| this prefix</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – filters objects based on the delimiter (for e.g ‘.csv’)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">a stream of object names matching the filtering criteria</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite"> |
| <code class="descname">rewrite</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.rewrite"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Has the same functionality as copy, except that will work on files |
| over 5 TB, as well as when copying between locations and/or storage |
| classes.</p> |
| <p>destination_object can be omitted, in which case source_object is used.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li> |
| <li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to.</li> |
| <li><strong>destination_object</strong> – The (renamed) path of the object if given. |
| Can be omitted; then the same name is used.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload"> |
| <code class="descname">upload</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename</em>, <em>mime_type='application/octet-stream'</em>, <em>gzip=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads a local file to Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to.</li> |
| <li><strong>object</strong> (<em>string</em>) – The object name to set when uploading the local file.</li> |
| <li><strong>filename</strong> (<em>string</em>) – The local file path to the file to be uploaded.</li> |
| <li><strong>mime_type</strong> (<em>str</em>) – The MIME type to set when uploading the file.</li> |
| <li><strong>gzip</strong> (<em>bool</em>) – Option to compress file for upload</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="google-kubernetes-engine"> |
| <h3>Google Kubernetes Engine<a class="headerlink" href="#google-kubernetes-engine" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="google-kubernetes-engine-cluster-operators"> |
| <h4>Google Kubernetes Engine Cluster Operators<a class="headerlink" href="#google-kubernetes-engine-cluster-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#id72"><span class="std std-ref">GKEClusterDeleteOperator</span></a> : Creates a Kubernetes Cluster in Google Cloud Platform</li> |
| <li><a class="reference internal" href="#id73"><span class="std std-ref">Google Kubernetes Engine Hook</span></a> : Deletes a Kubernetes Cluster in Google Cloud Platform</li> |
| </ul> |
| <div class="section" id="gkeclustercreateoperator"> |
| <h5>GKEClusterCreateOperator<a class="headerlink" href="#gkeclustercreateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_container_operator.</code><code class="descname">GKEClusterCreateOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>location</em>, <em>body={}</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v2'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterCreateOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterCreateOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gkeclusterdeleteoperator"> |
| <span id="id72"></span><h5>GKEClusterDeleteOperator<a class="headerlink" href="#gkeclusterdeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_container_operator.</code><code class="descname">GKEClusterDeleteOperator</code><span class="sig-paren">(</span><em>project_id</em>, <em>name</em>, <em>location</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>api_version='v2'</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_container_operator.html#GKEClusterDeleteOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_container_operator.GKEClusterDeleteOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>This is the main method to derive when creating an operator. |
| Context is the same dictionary used as when rendering jinja templates.</p> |
| <p>Refer to get_template_context for more context.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="google-kubernetes-engine-hook"> |
| <span id="id73"></span><h4>Google Kubernetes Engine Hook<a class="headerlink" href="#google-kubernetes-engine-hook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_container_hook.</code><code class="descname">GKEClusterHook</code><span class="sig-paren">(</span><em>project_id</em>, <em>location</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.create_cluster"> |
| <code class="descname">create_cluster</code><span class="sig-paren">(</span><em>cluster</em>, <em>retry=<object object></em>, <em>timeout=<object object></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.create_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.create_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a cluster, consisting of the specified number and type of Google Compute |
| Engine instances.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>cluster</strong> (<em>dict</em><em> or </em><em>google.cloud.container_v1.types.Cluster</em>) – A Cluster protobuf or dict. If dict is provided, it must be of |
| the same form as the protobuf message google.cloud.container_v1.types.Cluster</li> |
| <li><strong>retry</strong> (<em>google.api_core.retry.Retry</em>) – A retry object (google.api_core.retry.Retry) used to retry requests. |
| If None is specified, requests will not be retried.</li> |
| <li><strong>timeout</strong> (<em>float</em>) – The amount of time, in seconds, to wait for the request to |
| complete. Note that if retry is specified, the timeout applies to each |
| individual attempt.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">The full url to the new, or existing, cluster</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>:raises</dt> |
| <dd>ParseError: On JSON parsing problems when trying to convert dict |
| AirflowException: cluster is not dict type nor Cluster proto type</dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.delete_cluster"> |
| <code class="descname">delete_cluster</code><span class="sig-paren">(</span><em>name</em>, <em>retry=<object object></em>, <em>timeout=<object object></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.delete_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.delete_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the cluster, including the Kubernetes endpoint and all |
| worker nodes. Firewalls and routes that were configured during |
| cluster creation are also deleted. Other Google Compute Engine |
| resources that might be in use by the cluster (e.g. load balancer |
| resources) will not be deleted if they weren’t present at the |
| initial create time.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>name</strong> (<em>str</em>) – The name of the cluster to delete</li> |
| <li><strong>retry</strong> (<em>google.api_core.retry.Retry</em>) – Retry object used to determine when/if to retry requests. |
| If None is specified, requests will not be retried.</li> |
| <li><strong>timeout</strong> (<em>float</em>) – The amount of time, in seconds, to wait for the request to |
| complete. Note that if retry is specified, the timeout applies to each |
| individual attempt.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">The full url to the delete operation if successful, else None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_cluster"> |
| <code class="descname">get_cluster</code><span class="sig-paren">(</span><em>name</em>, <em>retry=<object object></em>, <em>timeout=<object object></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.get_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets details of specified cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>name</strong> (<em>str</em>) – The name of the cluster to retrieve</li> |
| <li><strong>retry</strong> (<em>google.api_core.retry.Retry</em>) – A retry object used to retry requests. If None is specified, |
| requests will not be retried.</li> |
| <li><strong>timeout</strong> (<em>float</em>) – The amount of time, in seconds, to wait for the request to |
| complete. Note that if retry is specified, the timeout applies to each |
| individual attempt.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A google.cloud.container_v1.types.Cluster instance</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_operation"> |
| <code class="descname">get_operation</code><span class="sig-paren">(</span><em>operation_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.get_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.get_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fetches the operation from Google Cloud</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>operation_name</strong> (<em>str</em>) – Name of operation to fetch</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">The new, updated operation from Google Cloud</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.wait_for_operation"> |
| <code class="descname">wait_for_operation</code><span class="sig-paren">(</span><em>operation</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_container_hook.html#GKEClusterHook.wait_for_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_container_hook.GKEClusterHook.wait_for_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Given an operation, continuously fetches the status from Google Cloud until either |
| completion or an error occurring</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>operation</strong> (<em>A google.cloud.container_V1.gapic.enums.Operator</em>) – The Operation to wait for</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A new, updated operation fetched from Google Cloud</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="lineage.html" class="btn btn-neutral float-right" title="Lineage" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a> |
| |
| |
| <a href="api.html" class="btn btn-neutral" title="Experimental Rest API" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script> |
| <script type="text/javascript" src="_static/jquery.js"></script> |
| <script type="text/javascript" src="_static/underscore.js"></script> |
| <script type="text/javascript" src="_static/doctools.js"></script> |
| |
| |
| |
| |
| <script type="text/javascript" src="_static/js/theme.js"></script> |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| </body> |
| </html> |