| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>Integration — Airflow Documentation</title> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="_static/pygments.css" type="text/css" /> |
| <link rel="index" title="Index" href="genindex.html" /> |
| <link rel="search" title="Search" href="search.html" /> |
| <link rel="next" title="Metrics" href="metrics.html" /> |
| <link rel="prev" title="Experimental Rest API" href="api.html" /> |
| |
| |
| <script src="_static/js/modernizr.min.js"></script> |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '13']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo --> |
| </head> |
| |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search"> |
| |
| |
| |
| <a href="index.html" class="icon icon-home"> Airflow |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| <div class="version"> |
| 1.10.2 |
| </div> |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="project.html">Project</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="license.html">License</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="start.html">Quick Start</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="howto/index.html">How-to Guides</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="ui.html">UI / Screenshots</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="concepts.html">Concepts</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="profiling.html">Data Profiling</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="cli.html">Command Line Interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="scheduler.html">Scheduling & Triggers</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="plugins.html">Plugins</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="security.html">Security</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="timezone.html">Time zones</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="api.html">Experimental Rest API</a></li> |
| <li class="toctree-l1 current"><a class="current reference internal" href="#">Integration</a><ul> |
| <li class="toctree-l2"><a class="reference internal" href="#reverse-proxy">Reverse Proxy</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="#azure-microsoft-azure">Azure: Microsoft Azure</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#azure-blob-storage">Azure Blob Storage</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#wasbblobsensor">WasbBlobSensor</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#wasbprefixsensor">WasbPrefixSensor</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#filetowasboperator">FileToWasbOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#wasbhook">WasbHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#azure-file-share">Azure File Share</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#azurefilesharehook">AzureFileShareHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#logging">Logging</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#azure-cosmosdb">Azure CosmosDB</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#azurecosmosdbhook">AzureCosmosDBHook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#azurecosmosinsertdocumentoperator">AzureCosmosInsertDocumentOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#azurecosmosdocumentsensor">AzureCosmosDocumentSensor</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#azure-data-lake">Azure Data Lake</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#azuredatalakehook">AzureDataLakeHook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#azuredatalakestoragelistoperator">AzureDataLakeStorageListOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#adlstogooglecloudstorageoperator">AdlsToGoogleCloudStorageOperator</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#aws-amazon-web-services">AWS: Amazon Web Services</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-emr">AWS EMR</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#emraddstepsoperator">EmrAddStepsOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#emrcreatejobflowoperator">EmrCreateJobFlowOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#emrterminatejobflowoperator">EmrTerminateJobFlowOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#emrhook">EmrHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-s3">AWS S3</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#s3hook">S3Hook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3filetransformoperator">S3FileTransformOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3listoperator">S3ListOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3togooglecloudstorageoperator">S3ToGoogleCloudStorageOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3togooglecloudstoragetransferoperator">S3ToGoogleCloudStorageTransferOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3tohivetransfer">S3ToHiveTransfer</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-ec2-container-service">AWS EC2 Container Service</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#ecsoperator">ECSOperator</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-batch-service">AWS Batch Service</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#awsbatchoperator">AWSBatchOperator</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#aws-redshift">AWS RedShift</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#awsredshiftclustersensor">AwsRedshiftClusterSensor</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#redshifthook">RedshiftHook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#redshifttos3transfer">RedshiftToS3Transfer</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#s3toredshifttransfer">S3ToRedshiftTransfer</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#amazon-sagemaker">Amazon SageMaker</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#sagemakerhook">SageMakerHook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#sagemakertrainingoperator">SageMakerTrainingOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#sagemakertuningoperator">SageMakerTuningOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#sagemakermodeloperator">SageMakerModelOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#sagemakertransformoperator">SageMakerTransformOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#sagemakerendpointconfigoperator">SageMakerEndpointConfigOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#sagemakerendpointoperator">SageMakerEndpointOperator</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#id32">Amazon SageMaker</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#id34">SageMakerHook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#id36">SageMakerTrainingOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#id38">SageMakerTuningOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#id40">SageMakerModelOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#id42">SageMakerTransformOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#id44">SageMakerEndpointConfigOperator</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#id46">SageMakerEndpointOperator</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#databricks">Databricks</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#databrickssubmitrunoperator">DatabricksSubmitRunOperator</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#gcp-google-cloud-platform">GCP: Google Cloud Platform</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#id49">Logging</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#googlecloudbasehook">GoogleCloudBaseHook</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#bigquery">BigQuery</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#bigquery-operators">BigQuery Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#bigqueryhook">BigQueryHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-spanner">Cloud Spanner</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-spanner-operators">Cloud Spanner Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloudspannerhook">CloudSpannerHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-sql">Cloud SQL</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-sql-operators">Cloud SQL Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-sql-hooks">Cloud SQL Hooks</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-bigtable">Cloud Bigtable</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-bigtable-operators">Cloud Bigtable Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-bigtable-hook">Cloud Bigtable Hook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#compute-engine">Compute Engine</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#compute-engine-operators">Compute Engine Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#compute-engine-hook">Compute Engine Hook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-functions">Cloud Functions</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-functions-operators">Cloud Functions Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-functions-hook">Cloud Functions Hook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-dataflow">Cloud DataFlow</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#dataflow-operators">DataFlow Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#dataflowhook">DataFlowHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-dataproc">Cloud DataProc</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#dataproc-operators">DataProc Operators</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-datastore">Cloud Datastore</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#datastore-operators">Datastore Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datastorehook">DatastoreHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-ml-engine">Cloud ML Engine</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-operators">Cloud ML Engine Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#cloud-ml-engine-hook">Cloud ML Engine Hook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#cloud-storage">Cloud Storage</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#storage-operators">Storage Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#googlecloudstoragehook">GoogleCloudStorageHook</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#gcptransferservicehook">GCPTransferServiceHook</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#google-kubernetes-engine">Google Kubernetes Engine</a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-cluster-operators">Google Kubernetes Engine Cluster Operators</a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#google-kubernetes-engine-hook">Google Kubernetes Engine Hook</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="#qubole">Qubole</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#quboleoperator">QuboleOperator</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#qubolepartitionsensor">QubolePartitionSensor</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#qubolefilesensor">QuboleFileSensor</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#qubolecheckoperator">QuboleCheckOperator</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="#qubolevaluecheckoperator">QuboleValueCheckOperator</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="metrics.html">Metrics</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="kubernetes.html">Kubernetes</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="lineage.html">Lineage</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="changelog.html">Changelog</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="faq.html">FAQ</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="code.html">API Reference</a></li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="index.html">Airflow</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="index.html">Docs</a> »</li> |
| |
| <li>Integration</li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| |
| <a href="_sources/integration.rst.txt" rel="nofollow"> View page source</a> |
| |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <div class="section" id="integration"> |
| <h1>Integration<a class="headerlink" href="#integration" title="Permalink to this headline">¶</a></h1> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#reverseproxy"><span class="std std-ref">Reverse Proxy</span></a></li> |
| <li><a class="reference internal" href="#azure"><span class="std std-ref">Azure: Microsoft Azure</span></a></li> |
| <li><a class="reference internal" href="#aws"><span class="std std-ref">AWS: Amazon Web Services</span></a></li> |
| <li><a class="reference internal" href="#databricks"><span class="std std-ref">Databricks</span></a></li> |
| <li><a class="reference internal" href="#gcp"><span class="std std-ref">GCP: Google Cloud Platform</span></a></li> |
| <li><a class="reference internal" href="#qubole"><span class="std std-ref">Qubole</span></a></li> |
| </ul> |
| <div class="section" id="reverse-proxy"> |
| <span id="reverseproxy"></span><h2>Reverse Proxy<a class="headerlink" href="#reverse-proxy" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow can be set up behind a reverse proxy, with the ability to set its endpoint with great |
| flexibility.</p> |
| <p>For example, you can configure your reverse proxy to get:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">lab</span><span class="o">.</span><span class="n">mycompany</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span><span class="o">/</span> |
| </pre></div> |
| </div> |
| <p>To do so, you need to set the following setting in your <cite>airflow.cfg</cite>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">base_url</span> <span class="o">=</span> <span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="n">my_host</span><span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">airflow</span> |
| </pre></div> |
| </div> |
| <p>Additionally if you use Celery Executor, you can get Flower in <cite>/myorg/flower</cite> with:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">flower_url_prefix</span> <span class="o">=</span> <span class="o">/</span><span class="n">myorg</span><span class="o">/</span><span class="n">flower</span> |
| </pre></div> |
| </div> |
| <p>Your reverse proxy (ex: nginx) should be configured as follow:</p> |
| <ul> |
| <li><p class="first">pass the url and http header as it for the Airflow webserver, without any rewrite, for example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server { |
| listen 80; |
| server_name lab.mycompany.com; |
| |
| location /myorg/airflow/ { |
| proxy_pass http://localhost:8080; |
| proxy_set_header Host $host; |
| proxy_redirect off; |
| proxy_http_version 1.1; |
| proxy_set_header Upgrade $http_upgrade; |
| proxy_set_header Connection "upgrade"; |
| } |
| } |
| </pre></div> |
| </div> |
| </li> |
| <li><p class="first">rewrite the url for the flower endpoint:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span>server { |
| listen 80; |
| server_name lab.mycompany.com; |
| |
| location /myorg/flower/ { |
| rewrite ^/myorg/flower/(.*)$ /$1 break; # remove prefix from http header |
| proxy_pass http://localhost:5555; |
| proxy_set_header Host $host; |
| proxy_redirect off; |
| proxy_http_version 1.1; |
| proxy_set_header Upgrade $http_upgrade; |
| proxy_set_header Connection "upgrade"; |
| } |
| } |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| <p>To ensure that Airflow generates URLs with the correct scheme when |
| running behind a TLS-terminating proxy, you should configure the proxy |
| to set the <cite>X-Forwarded-Proto</cite> header, and enable the <cite>ProxyFix</cite> |
| middleware in your <cite>airflow.cfg</cite>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">enable_proxy_fix</span> <span class="o">=</span> <span class="kc">True</span> |
| </pre></div> |
| </div> |
| <p>Note: you should only enable the <cite>ProxyFix</cite> middleware when running |
| Airflow behind a trusted proxy (AWS ELB, nginx, etc.).</p> |
| </div> |
| <div class="section" id="azure-microsoft-azure"> |
| <span id="azure"></span><h2>Azure: Microsoft Azure<a class="headerlink" href="#azure-microsoft-azure" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow has limited support for Microsoft Azure: interfaces exist only for Azure Blob |
| Storage and Azure Data Lake. Hook, Sensor and Operator for Blob Storage and |
| Azure Data Lake Hook are in contrib section.</p> |
| <div class="section" id="azure-blob-storage"> |
| <h3>Azure Blob Storage<a class="headerlink" href="#azure-blob-storage" title="Permalink to this headline">¶</a></h3> |
| <p>All classes communicate via the Window Azure Storage Blob protocol. Make sure that a |
| Airflow connection of type <cite>wasb</cite> exists. Authorization can be done by supplying a |
| login (=Storage account name) and password (=KEY), or login and SAS token in the extra |
| field (see connection <cite>wasb_default</cite> for an example).</p> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#wasbblobsensor"><span class="std std-ref">WasbBlobSensor</span></a>: Checks if a blob is present on Azure Blob storage.</li> |
| <li><a class="reference internal" href="#wasbprefixsensor"><span class="std std-ref">WasbPrefixSensor</span></a>: Checks if blobs matching a prefix are present on Azure Blob storage.</li> |
| <li><a class="reference internal" href="#filetowasboperator"><span class="std std-ref">FileToWasbOperator</span></a>: Uploads a local file to a container as a blob.</li> |
| <li><a class="reference internal" href="#wasbhook"><span class="std std-ref">WasbHook</span></a>: Interface with Azure Blob Storage.</li> |
| </ul> |
| <div class="section" id="wasbblobsensor"> |
| <span id="id1"></span><h4>WasbBlobSensor<a class="headerlink" href="#wasbblobsensor" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbBlobSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a blob to arrive on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.check_for_blob()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbBlobSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbBlobSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="wasbprefixsensor"> |
| <span id="id2"></span><h4>WasbPrefixSensor<a class="headerlink" href="#wasbprefixsensor" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.wasb_sensor.</code><code class="descname">WasbPrefixSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for blobs matching a prefix to arrive on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>check_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.check_for_prefix()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/wasb_sensor.html#WasbPrefixSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.wasb_sensor.WasbPrefixSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="filetowasboperator"> |
| <span id="id3"></span><h4>FileToWasbOperator<a class="headerlink" href="#filetowasboperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_wasb.</code><code class="descname">FileToWasbOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Uploads a file to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load. (templated)</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container. (templated)</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob. (templated)</li> |
| <li><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</li> |
| <li><strong>load_options</strong> (<em>dict</em>) – Optional keyword arguments that |
| <cite>WasbHook.load_file()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_wasb.html#FileToWasbOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_wasb.FileToWasbOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Blob Storage.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="wasbhook"> |
| <span id="id4"></span><h4>WasbHook<a class="headerlink" href="#wasbhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.wasb_hook.</code><code class="descname">WasbHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure Blob Storage through the wasb:// protocol.</p> |
| <p>Additional options passed in the ‘extra’ field of the connection will be |
| passed to the <cite>BlockBlockService()</cite> constructor. For example, authenticate |
| using a SAS token by adding {“sas_token”: “YOUR_TOKEN”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob"> |
| <code class="descname">check_for_blob</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_blob"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_blob" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a blob exists on Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the blob exists, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix"> |
| <code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>container_name</em>, <em>prefix</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.check_for_prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a prefix exists on Azure Blob storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>prefix</strong> (<em>str</em>) – Prefix of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.list_blobs()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if blobs matching the prefix exist, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.delete_file"> |
| <code class="descname">delete_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>is_prefix=False</em>, <em>ignore_if_missing=False</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.delete_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.delete_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>is_prefix</strong> (<em>bool</em>) – If blob_name is a prefix, delete all matching files</li> |
| <li><strong>ignore_if_missing</strong> – if True, then return success even if the</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>blob does not exist. |
| :type ignore_if_missing: bool |
| :param kwargs: Optional keyword arguments that</p> |
| <blockquote> |
| <div><cite>BlockBlobService.create_blob_from_path()</cite> takes.</div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the BlockBlobService object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.get_file"> |
| <code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.get_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to download.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a string to Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – String to load.</li> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_text()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.wasb_hook.WasbHook.read_file"> |
| <code class="descname">read_file</code><span class="sig-paren">(</span><em>container_name</em>, <em>blob_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/wasb_hook.html#WasbHook.read_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.wasb_hook.WasbHook.read_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Read a file from Azure Blob Storage and return as a string.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>container_name</strong> (<em>str</em>) – Name of the container.</li> |
| <li><strong>blob_name</strong> (<em>str</em>) – Name of the blob.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>BlockBlobService.create_blob_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="azure-file-share"> |
| <h3>Azure File Share<a class="headerlink" href="#azure-file-share" title="Permalink to this headline">¶</a></h3> |
| <p>Cloud variant of a SMB file share. Make sure that a Airflow connection of |
| type <cite>wasb</cite> exists. Authorization can be done by supplying a login (=Storage account name) |
| and password (=Storage account key), or login and SAS token in the extra field |
| (see connection <cite>wasb_default</cite> for an example).</p> |
| <div class="section" id="azurefilesharehook"> |
| <h4>AzureFileShareHook<a class="headerlink" href="#azurefilesharehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_fileshare_hook.</code><code class="descname">AzureFileShareHook</code><span class="sig-paren">(</span><em>wasb_conn_id='wasb_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure FileShare Storage.</p> |
| <p>Additional options passed in the ‘extra’ field of the connection will be |
| passed to the <cite>FileService()</cite> constructor.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>wasb_conn_id</strong> (<em>str</em>) – Reference to the wasb connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory"> |
| <code class="descname">check_for_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a directory exists on Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the file exists, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file"> |
| <code class="descname">check_for_file</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.check_for_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a file exists on Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.exists()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">True if the file exists, False otherwise.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bool</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory"> |
| <code class="descname">create_directory</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.create_directory"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.create_directory" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a new directory on a Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_directory()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A list of files and directories</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the FileService object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file"> |
| <code class="descname">get_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Where to store the file.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.get_file_to_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream"> |
| <code class="descname">get_file_to_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.get_file_to_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.get_file_to_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>stream</strong> (<em>file-like object</em>) – A filehandle to store the file to.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.get_file_to_stream()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files"> |
| <code class="descname">list_directories_and_files</code><span class="sig-paren">(</span><em>share_name</em>, <em>directory_name=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.list_directories_and_files"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.list_directories_and_files" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the list of directories and files stored on a Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.list_directories_and_files()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A list of files and directories</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">list</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>file_path</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_path</strong> (<em>str</em>) – Path to the file to load.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_path()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream"> |
| <code class="descname">load_stream</code><span class="sig-paren">(</span><em>stream</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>count</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a stream to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>stream</strong> (<em>file-like</em>) – Opened file/stream to upload as the file content.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>count</strong> (<em>int</em>) – Size of the stream in bytes</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_stream()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>share_name</em>, <em>directory_name</em>, <em>file_name</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_fileshare_hook.html#AzureFileShareHook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_fileshare_hook.AzureFileShareHook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a string to Azure File Share.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – String to load.</li> |
| <li><strong>share_name</strong> (<em>str</em>) – Name of the share.</li> |
| <li><strong>directory_name</strong> (<em>str</em>) – Name of the directory.</li> |
| <li><strong>file_name</strong> (<em>str</em>) – Name of the file.</li> |
| <li><strong>kwargs</strong> (<em>object</em>) – Optional keyword arguments that |
| <cite>FileService.create_file_from_text()</cite> takes.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="logging"> |
| <h3>Logging<a class="headerlink" href="#logging" title="Permalink to this headline">¶</a></h3> |
| <p>Airflow can be configured to read and write task logs in Azure Blob Storage. |
| See <a class="reference internal" href="howto/write-logs.html#write-logs-azure"><span class="std std-ref">Writing Logs to Azure Blob Storage</span></a>.</p> |
| </div> |
| <div class="section" id="azure-cosmosdb"> |
| <h3>Azure CosmosDB<a class="headerlink" href="#azure-cosmosdb" title="Permalink to this headline">¶</a></h3> |
| <p>AzureCosmosDBHook communicates via the Azure Cosmos library. Make sure that a |
| Airflow connection of type <cite>azure_cosmos</cite> exists. Authorization can be done by supplying a |
| login (=Endpoint uri), password (=secret key) and extra fields database_name and collection_name to specify the |
| default database and collection to use (see connection <cite>azure_cosmos_default</cite> for an example).</p> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#azurecosmosdbhook"><span class="std std-ref">AzureCosmosDBHook</span></a>: Interface with Azure CosmosDB.</li> |
| <li><span class="xref std std-ref">AzureCosmosInsertDocumentOperator</span>: Simple operator to insert document into CosmosDB.</li> |
| <li><span class="xref std std-ref">AzureCosmosDocumentSensor</span>: Simple sensor to detect document existence in CosmosDB.</li> |
| </ul> |
| <div class="section" id="azurecosmosdbhook"> |
| <span id="id5"></span><h4>AzureCosmosDBHook<a class="headerlink" href="#azurecosmosdbhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_cosmos_hook.</code><code class="descname">AzureCosmosDBHook</code><span class="sig-paren">(</span><em>azure_cosmos_conn_id='azure_cosmos_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure CosmosDB.</p> |
| <p>login should be the endpoint uri, password should be the master key |
| optionally, you can use the following extras to default these values |
| {“database_name”: “<DATABASE_NAME>”, “collection_name”: “COLLECTION_NAME”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_cosmos_conn_id</strong> (<em>str</em>) – Reference to the Azure CosmosDB connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_collection"> |
| <code class="descname">create_collection</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.create_collection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_collection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_database"> |
| <code class="descname">create_database</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.create_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.create_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new database in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_collection"> |
| <code class="descname">delete_collection</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_collection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_collection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes an existing collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_database"> |
| <code class="descname">delete_database</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes an existing database in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_document"> |
| <code class="descname">delete_document</code><span class="sig-paren">(</span><em>document_id</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.delete_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.delete_document" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete an existing document out of a collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_collection_exist"> |
| <code class="descname">does_collection_exist</code><span class="sig-paren">(</span><em>collection_name</em>, <em>database_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.does_collection_exist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_collection_exist" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a collection exists in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_database_exist"> |
| <code class="descname">does_database_exist</code><span class="sig-paren">(</span><em>database_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.does_database_exist"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.does_database_exist" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a database exists in CosmosDB.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return a cosmos db client.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_document"> |
| <code class="descname">get_document</code><span class="sig-paren">(</span><em>document_id</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_document" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a document from an existing collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_documents"> |
| <code class="descname">get_documents</code><span class="sig-paren">(</span><em>sql_string</em>, <em>database_name=None</em>, <em>collection_name=None</em>, <em>partition_key=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.get_documents"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.get_documents" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a list of documents from an existing collection in the CosmosDB database via SQL query.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.insert_documents"> |
| <code class="descname">insert_documents</code><span class="sig-paren">(</span><em>documents</em>, <em>database_name=None</em>, <em>collection_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.insert_documents"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.insert_documents" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Insert a list of new documents into an existing collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.upsert_document"> |
| <code class="descname">upsert_document</code><span class="sig-paren">(</span><em>document</em>, <em>database_name=None</em>, <em>collection_name=None</em>, <em>document_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_cosmos_hook.html#AzureCosmosDBHook.upsert_document"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_cosmos_hook.AzureCosmosDBHook.upsert_document" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Inserts a new document (or updates an existing one) into an existing |
| collection in the CosmosDB database.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="azurecosmosinsertdocumentoperator"> |
| <h4>AzureCosmosInsertDocumentOperator<a class="headerlink" href="#azurecosmosinsertdocumentoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.azure_cosmos_operator.AzureCosmosInsertDocumentOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.azure_cosmos_operator.</code><code class="descname">AzureCosmosInsertDocumentOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/azure_cosmos_operator.html#AzureCosmosInsertDocumentOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.azure_cosmos_operator.AzureCosmosInsertDocumentOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Inserts a new document into the specified Cosmos database and collection |
| It will create both the database and collection if they do not already exist</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>database_name</strong> (<em>str</em>) – The name of the database. (templated)</li> |
| <li><strong>collection_name</strong> (<em>str</em>) – The name of the collection. (templated)</li> |
| <li><strong>document</strong> (<em>dict</em>) – The document to insert</li> |
| <li><strong>azure_cosmos_conn_id</strong> (<em>str</em>) – reference to a CosmosDB connection.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="azurecosmosdocumentsensor"> |
| <h4>AzureCosmosDocumentSensor<a class="headerlink" href="#azurecosmosdocumentsensor" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.azure_cosmos_sensor.</code><code class="descname">AzureCosmosDocumentSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/azure_cosmos_sensor.html#AzureCosmosDocumentSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Checks for the existence of a document which |
| matches the given query in CosmosDB. Example:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">azure_cosmos_sensor</span> <span class="o">=</span> <span class="n">AzureCosmosDocumentSensor</span><span class="p">(</span><span class="n">database_name</span><span class="o">=</span><span class="s2">"somedatabase_name"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">collection_name</span><span class="o">=</span><span class="s2">"somecollection_name"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">document_id</span><span class="o">=</span><span class="s2">"unique-doc-id"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">azure_cosmos_conn_id</span><span class="o">=</span><span class="s2">"azure_cosmos_default"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">task_id</span><span class="o">=</span><span class="s2">"azure_cosmos_sensor"</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/azure_cosmos_sensor.html#AzureCosmosDocumentSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.azure_cosmos_sensor.AzureCosmosDocumentSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="azure-data-lake"> |
| <h3>Azure Data Lake<a class="headerlink" href="#azure-data-lake" title="Permalink to this headline">¶</a></h3> |
| <p>AzureDataLakeHook communicates via a REST API compatible with WebHDFS. Make sure that a |
| Airflow connection of type <cite>azure_data_lake</cite> exists. Authorization can be done by supplying a |
| login (=Client ID), password (=Client Secret) and extra fields tenant (Tenant) and account_name (Account Name)</p> |
| <blockquote> |
| <div>(see connection <cite>azure_data_lake_default</cite> for an example).</div></blockquote> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#azuredatalakehook"><span class="std std-ref">AzureDataLakeHook</span></a>: Interface with Azure Data Lake.</li> |
| <li><a class="reference internal" href="#azuredatalakestoragelistoperator"><span class="std std-ref">AzureDataLakeStorageListOperator</span></a>: Lists the files located in a specified Azure Data Lake path.</li> |
| <li><a class="reference internal" href="#adlstogooglecloudstorageoperator"><span class="std std-ref">AdlsToGoogleCloudStorageOperator</span></a>: Copies files from an Azure Data Lake path to a Google Cloud Storage bucket.</li> |
| </ul> |
| <div class="section" id="azuredatalakehook"> |
| <span id="id6"></span><h4>AzureDataLakeHook<a class="headerlink" href="#azuredatalakehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.azure_data_lake_hook.</code><code class="descname">AzureDataLakeHook</code><span class="sig-paren">(</span><em>azure_data_lake_conn_id='azure_data_lake_default'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Interacts with Azure Data Lake.</p> |
| <p>Client ID and client secret should be in user and password parameters. |
| Tenant and account name should be extra field as |
| {“tenant”: “<TENANT>”, “account_name”: “ACCOUNT_NAME”}.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – Reference to the Azure Data Lake connection.</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file"> |
| <code class="descname">check_for_file</code><span class="sig-paren">(</span><em>file_path</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.check_for_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.check_for_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a file exists on Azure Data Lake.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>file_path</strong> (<em>str</em>) – Path and name of the file.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">True if the file exists, False otherwise.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">bool</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file"> |
| <code class="descname">download_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.download_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.download_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Download a file from Azure Blob Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>local_path</strong> (<em>str</em>) – local path. If downloading a single file, will write to this |
| specific file, unless it is an existing directory, in which case a file is |
| created within it. If downloading multiple files, this is the root |
| directory to write within. Will create directories as required.</li> |
| <li><strong>remote_path</strong> (<em>str</em>) – remote path/globstring to use to find remote files. |
| Recursive glob patterns using <cite>**</cite> are not supported.</li> |
| <li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li> |
| <li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories. |
| If False and remote path is a directory, will quit regardless if any files |
| would be overwritten or not. If True, only matching filenames are actually |
| overwritten.</li> |
| <li><strong>buffersize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for internal buffer. This block cannot be bigger than |
| a chunk and cannot be smaller than a block.</li> |
| <li><strong>blocksize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for a block. Within each chunk, we write a smaller |
| block for each API call. This block cannot be bigger than a chunk.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return a AzureDLFileSystem object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file"> |
| <code class="descname">upload_file</code><span class="sig-paren">(</span><em>local_path</em>, <em>remote_path</em>, <em>nthreads=64</em>, <em>overwrite=True</em>, <em>buffersize=4194304</em>, <em>blocksize=4194304</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/azure_data_lake_hook.html#AzureDataLakeHook.upload_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.azure_data_lake_hook.AzureDataLakeHook.upload_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a file to Azure Data Lake.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>local_path</strong> (<em>str</em>) – local path. Can be single file, directory (in which case, |
| upload recursively) or glob pattern. Recursive glob patterns using <cite>**</cite> |
| are not supported.</li> |
| <li><strong>remote_path</strong> (<em>str</em>) – Remote path to upload to; if multiple files, this is the |
| dircetory root to write within.</li> |
| <li><strong>nthreads</strong> (<em>int</em>) – Number of threads to use. If None, uses the number of cores.</li> |
| <li><strong>overwrite</strong> (<em>bool</em>) – Whether to forcibly overwrite existing files/directories. |
| If False and remote path is a directory, will quit regardless if any files |
| would be overwritten or not. If True, only matching filenames are actually |
| overwritten.</li> |
| <li><strong>buffersize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for internal buffer. This block cannot be bigger than |
| a chunk and cannot be smaller than a block.</li> |
| <li><strong>blocksize</strong> (<em>int</em>) – int [2**22] |
| Number of bytes for a block. Within each chunk, we write a smaller |
| block for each API call. This block cannot be bigger than a chunk.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="azuredatalakestoragelistoperator"> |
| <span id="id7"></span><h4>AzureDataLakeStorageListOperator<a class="headerlink" href="#azuredatalakestoragelistoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.adls_list_operator.</code><code class="descname">AzureDataLakeStorageListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/adls_list_operator.html#AzureDataLakeStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all files from the specified path</p> |
| <dl class="docutils"> |
| <dt>This operator returns a python list with the names of files which can be used by</dt> |
| <dd><cite>xcom</cite> in the downstream tasks.</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>path</strong> (<em>str</em>) – The Azure Data Lake path to find the objects. Supports glob |
| strings (templated)</li> |
| <li><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Azure Data Lake Storage.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would list all the Parquet files from <code class="docutils literal notranslate"><span class="pre">folder/output/</span></code> |
| folder in the specified ADLS account</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">adls_files</span> <span class="o">=</span> <span class="n">AzureDataLakeStorageListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'adls_files'</span><span class="p">,</span> |
| <span class="n">path</span><span class="o">=</span><span class="s1">'folder/output/*.parquet'</span><span class="p">,</span> |
| <span class="n">azure_data_lake_conn_id</span><span class="o">=</span><span class="s1">'azure_data_lake_default'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="adlstogooglecloudstorageoperator"> |
| <span id="id8"></span><h4>AdlsToGoogleCloudStorageOperator<a class="headerlink" href="#adlstogooglecloudstorageoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.adls_to_gcs.AdlsToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.adls_to_gcs.</code><code class="descname">AdlsToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/adls_to_gcs.html#AdlsToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.adls_to_gcs.AdlsToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator" title="airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.adls_list_operator.AzureDataLakeStorageListOperator</span></code></a></p> |
| <p>Synchronizes an Azure Data Lake Storage path with a GCS bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>src_adls</strong> (<em>str</em>) – The Azure Data Lake path to find the objects (templated)</li> |
| <li><strong>dest_gcs</strong> (<em>str</em>) – The Google Cloud Storage bucket and prefix to |
| store the objects. (templated)</li> |
| <li><strong>replace</strong> (<em>bool</em>) – If true, replaces same-named files in GCS</li> |
| <li><strong>azure_data_lake_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Azure Data Lake Storage.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Google Cloud Storage.</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Examples</strong>:</dt> |
| <dd><p class="first">The following Operator would copy a single file named |
| <code class="docutils literal notranslate"><span class="pre">hello/world.avro</span></code> from ADLS to the GCS bucket <code class="docutils literal notranslate"><span class="pre">mybucket</span></code>. Its full |
| resulting gcs path will be <code class="docutils literal notranslate"><span class="pre">gs://mybucket/hello/world.avro</span></code></p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">AdlsToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_single_file'</span><span class="p">,</span> |
| <span class="n">src_adls</span><span class="o">=</span><span class="s1">'hello/world.avro'</span><span class="p">,</span> |
| <span class="n">dest_gcs</span><span class="o">=</span><span class="s1">'gs://mybucket'</span><span class="p">,</span> |
| <span class="n">replace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> |
| <span class="n">azure_data_lake_conn_id</span><span class="o">=</span><span class="s1">'azure_data_lake_default'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'google_cloud_default'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would copy all parquet files from ADLS |
| to the GCS bucket <code class="docutils literal notranslate"><span class="pre">mybucket</span></code>.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span> copy_all_files = AdlsToGoogleCloudStorageOperator( |
| task_id='copy_all_files', |
| src_adls='*.parquet', |
| dest_gcs='gs://mybucket', |
| replace=False, |
| azure_data_lake_conn_id='azure_data_lake_default', |
| google_cloud_storage_conn_id='google_cloud_default' |
| ) |
| |
| The following Operator would copy all parquet files from ADLS |
| path ``/hello/world``to the GCS bucket ``mybucket``. :: |
| copy_world_files = AdlsToGoogleCloudStorageOperator( |
| task_id='copy_world_files', |
| src_adls='hello/world/*.parquet', |
| dest_gcs='gs://mybucket', |
| replace=False, |
| azure_data_lake_conn_id='azure_data_lake_default', |
| google_cloud_storage_conn_id='google_cloud_default' |
| ) |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="aws-amazon-web-services"> |
| <span id="aws"></span><h2>AWS: Amazon Web Services<a class="headerlink" href="#aws-amazon-web-services" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow has extensive support for Amazon Web Services. But note that the Hooks, Sensors and |
| Operators are in the contrib section.</p> |
| <div class="section" id="aws-emr"> |
| <h3>AWS EMR<a class="headerlink" href="#aws-emr" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#emraddstepsoperator"><span class="std std-ref">EmrAddStepsOperator</span></a> : Adds steps to an existing EMR JobFlow.</li> |
| <li><a class="reference internal" href="#emrcreatejobflowoperator"><span class="std std-ref">EmrCreateJobFlowOperator</span></a> : Creates an EMR JobFlow, reading the config from the EMR connection.</li> |
| <li><a class="reference internal" href="#emrterminatejobflowoperator"><span class="std std-ref">EmrTerminateJobFlowOperator</span></a> : Terminates an EMR JobFlow.</li> |
| <li><a class="reference internal" href="#emrhook"><span class="std std-ref">EmrHook</span></a> : Interact with AWS EMR.</li> |
| </ul> |
| <div class="section" id="emraddstepsoperator"> |
| <span id="id9"></span><h4>EmrAddStepsOperator<a class="headerlink" href="#emraddstepsoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_add_steps_operator.</code><code class="descname">EmrAddStepsOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_add_steps_operator.html#EmrAddStepsOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_add_steps_operator.EmrAddStepsOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>An operator that adds steps to an existing EMR job_flow.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_flow_id</strong> (<em>str</em>) – id of the JobFlow to add steps to. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| <li><strong>steps</strong> (<em>list</em>) – boto3 style steps to be added to the jobflow. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="emrcreatejobflowoperator"> |
| <span id="id10"></span><h4>EmrCreateJobFlowOperator<a class="headerlink" href="#emrcreatejobflowoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_create_job_flow_operator.</code><code class="descname">EmrCreateJobFlowOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_create_job_flow_operator.html#EmrCreateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_create_job_flow_operator.EmrCreateJobFlowOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates an EMR JobFlow, reading the config from the EMR connection. |
| A dictionary of JobFlow overrides can be passed that override |
| the config from the connection.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| <li><strong>emr_conn_id</strong> (<em>str</em>) – emr connection to use</li> |
| <li><strong>job_flow_overrides</strong> (<em>dict</em>) – boto3 style arguments to override |
| emr_connection extra. (templated)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="emrterminatejobflowoperator"> |
| <span id="id11"></span><h4>EmrTerminateJobFlowOperator<a class="headerlink" href="#emrterminatejobflowoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.emr_terminate_job_flow_operator.</code><code class="descname">EmrTerminateJobFlowOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/emr_terminate_job_flow_operator.html#EmrTerminateJobFlowOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.emr_terminate_job_flow_operator.EmrTerminateJobFlowOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator to terminate EMR JobFlows.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_flow_id</strong> (<em>str</em>) – id of the JobFlow to terminate. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – aws connection to uses</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="emrhook"> |
| <span id="id12"></span><h4>EmrHook<a class="headerlink" href="#emrhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.emr_hook.EmrHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.emr_hook.</code><code class="descname">EmrHook</code><span class="sig-paren">(</span><em>emr_conn_id=None</em>, <em>region_name=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS EMR. emr_conn_id is only necessary for using the |
| create_job_flow method.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow"> |
| <code class="descname">create_job_flow</code><span class="sig-paren">(</span><em>job_flow_overrides</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/emr_hook.html#EmrHook.create_job_flow"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.emr_hook.EmrHook.create_job_flow" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a job flow using the config from the EMR connection. |
| Keys of the json extra hash may have the arguments of the boto3 |
| run_job_flow method. |
| Overrides for this config may be passed as the job_flow_overrides.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-s3"> |
| <h3>AWS S3<a class="headerlink" href="#aws-s3" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#s3hook"><span class="std std-ref">S3Hook</span></a> : Interact with AWS S3.</li> |
| <li><a class="reference internal" href="#s3filetransformoperator"><span class="std std-ref">S3FileTransformOperator</span></a> : Copies data from a source S3 location to a temporary location on the local filesystem.</li> |
| <li><a class="reference internal" href="#s3listoperator"><span class="std std-ref">S3ListOperator</span></a> : Lists the files matching a key prefix from a S3 location.</li> |
| <li><a class="reference internal" href="#s3togooglecloudstorageoperator"><span class="std std-ref">S3ToGoogleCloudStorageOperator</span></a> : Syncs an S3 location with a Google Cloud Storage bucket.</li> |
| <li><a class="reference internal" href="#s3togooglecloudstoragetransferoperator"><span class="std std-ref">S3ToGoogleCloudStorageTransferOperator</span></a> : Syncs an S3 bucket with a Google Cloud Storage bucket using the GCP Storage Transfer Service.</li> |
| <li><a class="reference internal" href="#s3tohivetransfer"><span class="std std-ref">S3ToHiveTransfer</span></a> : Moves data from S3 to Hive. The operator downloads a file from S3, stores the file locally before loading it into a Hive table.</li> |
| </ul> |
| <div class="section" id="s3hook"> |
| <span id="id13"></span><h4>S3Hook<a class="headerlink" href="#s3hook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.hooks.S3_hook.S3Hook"> |
| <em class="property">class </em><code class="descclassname">airflow.hooks.S3_hook.</code><code class="descname">S3Hook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>verify=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS S3, using the boto3 library.</p> |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_bucket"> |
| <code class="descname">check_for_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if bucket_name exists.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_key"> |
| <code class="descname">check_for_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if a key exists in a bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_prefix"> |
| <code class="descname">check_for_prefix</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix</em>, <em>delimiter</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_prefix"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_prefix" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks that a prefix exists in a bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key"> |
| <code class="descname">check_for_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.check_for_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.check_for_wildcard_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks that a key matching a wildcard expression exists in a bucket</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.copy_object"> |
| <code class="descname">copy_object</code><span class="sig-paren">(</span><em>source_bucket_key</em>, <em>dest_bucket_key</em>, <em>source_bucket_name=None</em>, <em>dest_bucket_name=None</em>, <em>source_version_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.copy_object"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.copy_object" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a copy of an object that is already stored in S3.</p> |
| <p>Note: the S3 connection used here needs to have access to both |
| source and destination bucket/key.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket_key</strong> (<em>str</em>) – <p>The key of the source object.</p> |
| <p>It can be either full s3:// style url or relative path from root level.</p> |
| <p>When it’s specified as a full s3:// url, please omit source_bucket_name.</p> |
| </li> |
| <li><strong>dest_bucket_key</strong> (<em>str</em>) – <p>The key of the object to copy to.</p> |
| <p>The convention to specify <cite>dest_bucket_key</cite> is the same |
| as <cite>source_bucket_key</cite>.</p> |
| </li> |
| <li><strong>source_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket where the source object is in.</p> |
| <p>It should be omitted when <cite>source_bucket_key</cite> is provided as a full s3:// url.</p> |
| </li> |
| <li><strong>dest_bucket_name</strong> (<em>str</em>) – <p>Name of the S3 bucket to where the object is copied.</p> |
| <p>It should be omitted when <cite>dest_bucket_key</cite> is provided as a full s3:// url.</p> |
| </li> |
| <li><strong>source_version_id</strong> (<em>str</em>) – Version ID of the source object (OPTIONAL)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.create_bucket"> |
| <code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>region_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.create_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates an Amazon S3 bucket.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – The name of the bucket</li> |
| <li><strong>region_name</strong> (<em>str</em>) – The name of the aws region in which to create the bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.delete_objects"> |
| <code class="descname">delete_objects</code><span class="sig-paren">(</span><em>bucket</em>, <em>keys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.delete_objects"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.delete_objects" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of the bucket in which you are going to delete object(s)</li> |
| <li><strong>keys</strong> (<em>str</em><em> or </em><em>list</em>) – <p>The key(s) to delete from S3 bucket.</p> |
| <p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a string, it’s supposed to be the key name of |
| the single object to delete.</p> |
| <p>When <code class="docutils literal notranslate"><span class="pre">keys</span></code> is a list, it’s supposed to be the list of the |
| keys to delete.</p> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_bucket"> |
| <code class="descname">get_bucket</code><span class="sig-paren">(</span><em>bucket_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.S3.Bucket object</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_key"> |
| <code class="descname">get_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.s3.Object</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.get_wildcard_key"> |
| <code class="descname">get_wildcard_key</code><span class="sig-paren">(</span><em>wildcard_key</em>, <em>bucket_name=None</em>, <em>delimiter=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.get_wildcard_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.get_wildcard_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a boto3.s3.Object object matching the wildcard expression</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>wildcard_key</strong> (<em>str</em>) – the path to the key</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.list_keys"> |
| <code class="descname">list_keys</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_keys" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists keys in a bucket under prefix and not containing delimiter</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| <li><strong>page_size</strong> (<em>int</em>) – pagination size</li> |
| <li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.list_prefixes"> |
| <code class="descname">list_prefixes</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>prefix=''</em>, <em>delimiter=''</em>, <em>page_size=None</em>, <em>max_items=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.list_prefixes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.list_prefixes" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists prefixes in a bucket under prefix</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>str</em>) – the name of the bucket</li> |
| <li><strong>prefix</strong> (<em>str</em>) – a key prefix</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – the delimiter marks key hierarchy.</li> |
| <li><strong>page_size</strong> (<em>int</em>) – pagination size</li> |
| <li><strong>max_items</strong> (<em>int</em>) – maximum items to return</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_bytes"> |
| <code class="descname">load_bytes</code><span class="sig-paren">(</span><em>bytes_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_bytes"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_bytes" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads bytes to S3</p> |
| <p>This is provided as a convenience to drop a string in S3. It uses the |
| boto infrastructure to ship a file to s3.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bytes_data</strong> (<em>bytes</em>) – bytes to set as content for the key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_file"> |
| <code class="descname">load_file</code><span class="sig-paren">(</span><em>filename</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a local file to S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>filename</strong> (<em>str</em>) – name of the file to load.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists. If replace is False and the key exists, an |
| error will be raised.</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_file_obj"> |
| <code class="descname">load_file_obj</code><span class="sig-paren">(</span><em>file_obj</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_file_obj"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_file_obj" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a file object to S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>file_obj</strong> (<em>file-like object</em>) – The file-like object to set as the content for the S3 key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag that indicates whether to overwrite the key |
| if it already exists.</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, S3 encrypts the file on the server, |
| and the file is stored in encrypted form at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.load_string"> |
| <code class="descname">load_string</code><span class="sig-paren">(</span><em>string_data</em>, <em>key</em>, <em>bucket_name=None</em>, <em>replace=False</em>, <em>encrypt=False</em>, <em>encoding='utf-8'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.load_string"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.load_string" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Loads a string to S3</p> |
| <p>This is provided as a convenience to drop a string in S3. It uses the |
| boto infrastructure to ship a file to s3.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>string_data</strong> (<em>str</em>) – string to set as content for the key.</li> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which to store the file</li> |
| <li><strong>replace</strong> (<em>bool</em>) – A flag to decide whether or not to overwrite the key |
| if it already exists</li> |
| <li><strong>encrypt</strong> (<em>bool</em>) – If True, the file will be encrypted on the server-side |
| by S3 and will be stored in an encrypted form while at rest in S3.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.read_key"> |
| <code class="descname">read_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.read_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.read_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reads a key from S3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.hooks.S3_hook.S3Hook.select_key"> |
| <code class="descname">select_key</code><span class="sig-paren">(</span><em>key</em>, <em>bucket_name=None</em>, <em>expression='SELECT * FROM S3Object'</em>, <em>expression_type='SQL'</em>, <em>input_serialization=None</em>, <em>output_serialization=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/hooks/S3_hook.html#S3Hook.select_key"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.hooks.S3_hook.S3Hook.select_key" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reads a key with S3 Select.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>key</strong> (<em>str</em>) – S3 key that will point to the file</li> |
| <li><strong>bucket_name</strong> (<em>str</em>) – Name of the bucket in which the file is stored</li> |
| <li><strong>expression</strong> (<em>str</em>) – S3 Select expression</li> |
| <li><strong>expression_type</strong> (<em>str</em>) – S3 Select expression type</li> |
| <li><strong>input_serialization</strong> (<em>dict</em>) – S3 Select input data serialization format</li> |
| <li><strong>output_serialization</strong> (<em>dict</em>) – S3 Select output data serialization format</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">retrieved subset of original data by S3 Select</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about S3 Select parameters: |
| <a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content">http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.select_object_content</a></p> |
| </div> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3filetransformoperator"> |
| <span id="id14"></span><h4>S3FileTransformOperator<a class="headerlink" href="#s3filetransformoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.s3_file_transform_operator.S3FileTransformOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_file_transform_operator.</code><code class="descname">S3FileTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_file_transform_operator.html#S3FileTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_file_transform_operator.S3FileTransformOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies data from a source S3 location to a temporary location on the |
| local filesystem. Runs a transformation on this file as specified by |
| the transformation script and uploads the output to a destination S3 |
| location.</p> |
| <p>The locations of the source and the destination files in the local |
| filesystem is provided as an first and second arguments to the |
| transformation script. The transformation script is expected to read the |
| data from source, transform it and write the output to the local |
| destination file. The operator then takes over control and uploads the |
| local destination file to S3.</p> |
| <p>S3 Select is also available to filter the source contents. Users can |
| omit the transformation script if S3 Select expression is specified.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li> |
| <li><strong>source_aws_conn_id</strong> (<em>str</em>) – source s3 connection</li> |
| <li><strong>source_verify</strong> (<em>bool</em><em> or </em><em>str</em>) – <p>Whether or not to verify SSL certificates for S3 connetion. |
| By default SSL certificates are verified. |
| You can provide the following values:</p> |
| <ul> |
| <li><dl class="first docutils"> |
| <dt><code class="docutils literal notranslate"><span class="pre">False</span></code>: do not validate SSL certificates. SSL will still be used</dt> |
| <dd>(unless use_ssl is False), but SSL certificates will not be |
| verified.</dd> |
| </dl> |
| </li> |
| <li><dl class="first docutils"> |
| <dt><code class="docutils literal notranslate"><span class="pre">path/to/cert/bundle.pem</span></code>: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| <p>This is also applicable to <code class="docutils literal notranslate"><span class="pre">dest_verify</span></code>.</p> |
| </li> |
| <li><strong>dest_s3_key</strong> (<em>str</em>) – The key to be written from S3. (templated)</li> |
| <li><strong>dest_aws_conn_id</strong> (<em>str</em>) – destination s3 connection</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Replace dest S3 key if it already exists</li> |
| <li><strong>transform_script</strong> (<em>str</em>) – location of the executable transformation script</li> |
| <li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3listoperator"> |
| <span id="id15"></span><h4>S3ListOperator<a class="headerlink" href="#s3listoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.s3_list_operator.S3ListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_list_operator.</code><code class="descname">S3ListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_list_operator.html#S3ListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all objects from the bucket with the given string prefix in name.</p> |
| <p>This operator returns a python list with the name of objects which can be |
| used by <cite>xcom</cite> in the downstream task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string to filters the objects whose name begin with |
| such prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The connection ID to use when connecting to S3 storage.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following operator would list all the files |
| (excluding subfolders) from the S3 |
| <code class="docutils literal notranslate"><span class="pre">customers/2018/04/</span></code> key in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">s3_file</span> <span class="o">=</span> <span class="n">S3ListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'list_3s_files'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'customers/2018/04/'</span><span class="p">,</span> |
| <span class="n">delimiter</span><span class="o">=</span><span class="s1">'/'</span><span class="p">,</span> |
| <span class="n">aws_conn_id</span><span class="o">=</span><span class="s1">'aws_customers_conn'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3togooglecloudstorageoperator"> |
| <span id="id16"></span><h4>S3ToGoogleCloudStorageOperator<a class="headerlink" href="#s3togooglecloudstorageoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.s3_to_gcs_operator.</code><code class="descname">S3ToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/s3_to_gcs_operator.html#S3ToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.s3_to_gcs_operator.S3ToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.s3_list_operator.S3ListOperator" title="airflow.contrib.operators.s3_list_operator.S3ListOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.s3_list_operator.S3ListOperator</span></code></a></p> |
| <p>Synchronizes an S3 key, possibly a prefix, with a Google Cloud Storage |
| destination path.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The S3 bucket where to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with |
| such prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – the delimiter marks key hierarchy. (templated)</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The source S3 connection</li> |
| <li><strong>dest_gcs_conn_id</strong> (<em>string</em>) – The destination connection ID to use |
| when connecting to Google Cloud Storage.</li> |
| <li><strong>dest_gcs</strong> (<em>string</em>) – The destination Google Cloud Storage bucket and prefix |
| where you want to store the files. (templated)</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>replace</strong> (<em>bool</em>) – Whether you want to replace existing destination files |
| or not.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">s3_to_gcs_op</span> <span class="o">=</span> <span class="n">S3ToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'s3_to_gcs_example'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'my-s3-bucket'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'data/customers-201804'</span><span class="p">,</span> |
| <span class="n">dest_gcs_conn_id</span><span class="o">=</span><span class="s1">'google_cloud_default'</span><span class="p">,</span> |
| <span class="n">dest_gcs</span><span class="o">=</span><span class="s1">'gs://my.gcs.bucket/some/customers/'</span><span class="p">,</span> |
| <span class="n">replace</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Note that <code class="docutils literal notranslate"><span class="pre">bucket</span></code>, <code class="docutils literal notranslate"><span class="pre">prefix</span></code>, <code class="docutils literal notranslate"><span class="pre">delimiter</span></code> and <code class="docutils literal notranslate"><span class="pre">dest_gcs</span></code> are |
| templated, so you can use variables in them if you wish.</p> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3togooglecloudstoragetransferoperator"> |
| <span id="id17"></span><h4>S3ToGoogleCloudStorageTransferOperator<a class="headerlink" href="#s3togooglecloudstoragetransferoperator" title="Permalink to this headline">¶</a></h4> |
| </div> |
| <div class="section" id="s3tohivetransfer"> |
| <span id="id18"></span><h4>S3ToHiveTransfer<a class="headerlink" href="#s3tohivetransfer" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.s3_to_hive_operator.S3ToHiveTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_to_hive_operator.</code><code class="descname">S3ToHiveTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_hive_operator.html#S3ToHiveTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_hive_operator.S3ToHiveTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Moves data from S3 to Hive. The operator downloads a file from S3, |
| stores the file locally before loading it into a Hive table. |
| If the <code class="docutils literal notranslate"><span class="pre">create</span></code> or <code class="docutils literal notranslate"><span class="pre">recreate</span></code> arguments are set to <code class="docutils literal notranslate"><span class="pre">True</span></code>, |
| a <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">TABLE</span></code> and <code class="docutils literal notranslate"><span class="pre">DROP</span> <span class="pre">TABLE</span></code> statements are generated. |
| Hive data types are inferred from the cursor’s metadata from.</p> |
| <p>Note that the table generated in Hive uses <code class="docutils literal notranslate"><span class="pre">STORED</span> <span class="pre">AS</span> <span class="pre">textfile</span></code> |
| which isn’t the most efficient serialization format. If a |
| large amount of data is loaded and/or if the tables gets |
| queried considerably, you may want to use this operator only to |
| stage the data into a temporary table before loading it into its |
| final destination using a <code class="docutils literal notranslate"><span class="pre">HiveOperator</span></code>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>s3_key</strong> (<em>str</em>) – The key to be retrieved from S3. (templated)</li> |
| <li><strong>field_dict</strong> (<em>dict</em>) – A dictionary of the fields name in the file |
| as keys and their Hive types as values</li> |
| <li><strong>hive_table</strong> (<em>str</em>) – target Hive table, use dot notation to target a |
| specific database. (templated)</li> |
| <li><strong>create</strong> (<em>bool</em>) – whether to create the table if it doesn’t exist</li> |
| <li><strong>recreate</strong> (<em>bool</em>) – whether to drop and recreate the table at every |
| execution</li> |
| <li><strong>partition</strong> (<em>dict</em>) – target partition as a dict of partition columns |
| and values. (templated)</li> |
| <li><strong>headers</strong> (<em>bool</em>) – whether the file contains column names on the first |
| line</li> |
| <li><strong>check_headers</strong> (<em>bool</em>) – whether the column names on the first line should be |
| checked against the keys of field_dict</li> |
| <li><strong>wildcard_match</strong> (<em>bool</em>) – whether the s3_key should be interpreted as a Unix |
| wildcard pattern</li> |
| <li><strong>delimiter</strong> (<em>str</em>) – field delimiter in the file</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – source s3 connection</li> |
| <li><strong>hive_cli_conn_id</strong> (<em>str</em>) – destination hive connection</li> |
| <li><strong>input_compressed</strong> (<em>bool</em>) – Boolean to determine if file decompression is |
| required to process headers</li> |
| <li><strong>tblproperties</strong> (<em>dict</em>) – TBLPROPERTIES of the hive table being created</li> |
| <li><strong>select_expression</strong> (<em>str</em>) – S3 Select expression</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-ec2-container-service"> |
| <h3>AWS EC2 Container Service<a class="headerlink" href="#aws-ec2-container-service" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#ecsoperator"><span class="std std-ref">ECSOperator</span></a> : Execute a task on AWS EC2 Container Service.</li> |
| </ul> |
| <div class="section" id="ecsoperator"> |
| <span id="id19"></span><h4>ECSOperator<a class="headerlink" href="#ecsoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.ecs_operator.ECSOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.ecs_operator.</code><code class="descname">ECSOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/ecs_operator.html#ECSOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.ecs_operator.ECSOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a task on AWS EC2 Container Service</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>task_definition</strong> (<em>str</em>) – the task definition name on EC2 Container Service</li> |
| <li><strong>cluster</strong> (<em>str</em>) – the cluster name on EC2 Container Service</li> |
| <li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive (templated): |
| <a class="reference external" href="http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task">http://boto3.readthedocs.org/en/latest/reference/services/ecs.html#ECS.Client.run_task</a></li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None, |
| credential boto3 strategy will be used |
| (<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li> |
| <li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook. |
| Override the region_name in connection (if provided)</li> |
| <li><strong>launch_type</strong> (<em>str</em>) – the launch type on which to run your task (‘EC2’ or ‘FARGATE’)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-batch-service"> |
| <h3>AWS Batch Service<a class="headerlink" href="#aws-batch-service" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#awsbatchoperator"><span class="std std-ref">AWSBatchOperator</span></a> : Execute a task on AWS Batch Service.</li> |
| </ul> |
| <div class="section" id="awsbatchoperator"> |
| <span id="id20"></span><h4>AWSBatchOperator<a class="headerlink" href="#awsbatchoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.awsbatch_operator.AWSBatchOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.awsbatch_operator.</code><code class="descname">AWSBatchOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/awsbatch_operator.html#AWSBatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.awsbatch_operator.AWSBatchOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute a job on AWS Batch Service</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – the name for the job that will run on AWS Batch</li> |
| <li><strong>job_definition</strong> (<em>str</em>) – the job definition name on AWS Batch</li> |
| <li><strong>job_queue</strong> (<em>str</em>) – the queue name on AWS Batch</li> |
| <li><strong>overrides</strong> (<em>dict</em>) – the same parameter that boto3 will receive on |
| containerOverrides (templated): |
| <a class="reference external" href="http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job">http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job</a></li> |
| <li><strong>max_retries</strong> (<em>int</em>) – exponential backoff retries while waiter is not |
| merged, 4200 = 48 hours</li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – connection id of AWS credentials / region name. If None, |
| credential boto3 strategy will be used |
| (<a class="reference external" href="http://boto3.readthedocs.io/en/latest/guide/configuration.html">http://boto3.readthedocs.io/en/latest/guide/configuration.html</a>).</li> |
| <li><strong>region_name</strong> (<em>str</em>) – region name to use in AWS Hook. |
| Override the region_name in connection (if provided)</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="aws-redshift"> |
| <h3>AWS RedShift<a class="headerlink" href="#aws-redshift" title="Permalink to this headline">¶</a></h3> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#awsredshiftclustersensor"><span class="std std-ref">AwsRedshiftClusterSensor</span></a> : Waits for a Redshift cluster to reach a specific status.</li> |
| <li><a class="reference internal" href="#redshifthook"><span class="std std-ref">RedshiftHook</span></a> : Interact with AWS Redshift, using the boto3 library.</li> |
| <li><a class="reference internal" href="#redshifttos3transfer"><span class="std std-ref">RedshiftToS3Transfer</span></a> : Executes an unload command to S3 as CSV with or without headers.</li> |
| <li><a class="reference internal" href="#s3toredshifttransfer"><span class="std std-ref">S3ToRedshiftTransfer</span></a> : Executes an copy command from S3 as CSV with or without headers.</li> |
| </ul> |
| <div class="section" id="awsredshiftclustersensor"> |
| <span id="id21"></span><h4>AwsRedshiftClusterSensor<a class="headerlink" href="#awsredshiftclustersensor" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.aws_redshift_cluster_sensor.</code><code class="descname">AwsRedshiftClusterSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.sensors.base_sensor_operator.BaseSensorOperator" title="airflow.sensors.base_sensor_operator.BaseSensorOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.sensors.base_sensor_operator.BaseSensorOperator</span></code></a></p> |
| <p>Waits for a Redshift cluster to reach a specific status.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – The identifier for the cluster being pinged.</li> |
| <li><strong>target_status</strong> (<em>str</em>) – The cluster status desired.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke"> |
| <code class="descname">poke</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/aws_redshift_cluster_sensor.html#AwsRedshiftClusterSensor.poke"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.aws_redshift_cluster_sensor.AwsRedshiftClusterSensor.poke" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Function that the sensors defined while deriving this class should |
| override.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="redshifthook"> |
| <span id="id22"></span><h4>RedshiftHook<a class="headerlink" href="#redshifthook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.redshift_hook.</code><code class="descname">RedshiftHook</code><span class="sig-paren">(</span><em>aws_conn_id='aws_default'</em>, <em>verify=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with AWS Redshift, using the boto3 library</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status"> |
| <code class="descname">cluster_status</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.cluster_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.cluster_status" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return status of a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot"> |
| <code class="descname">create_cluster_snapshot</code><span class="sig-paren">(</span><em>snapshot_identifier</em>, <em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.create_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.create_cluster_snapshot" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a snapshot of a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster"> |
| <code class="descname">delete_cluster</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>skip_final_cluster_snapshot=True</em>, <em>final_cluster_snapshot_identifier=''</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.delete_cluster"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.delete_cluster" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete a cluster and optionally create a snapshot</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| <li><strong>skip_final_cluster_snapshot</strong> (<em>bool</em>) – determines cluster snapshot creation</li> |
| <li><strong>final_cluster_snapshot_identifier</strong> (<em>str</em>) – name of final cluster snapshot</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots"> |
| <code class="descname">describe_cluster_snapshots</code><span class="sig-paren">(</span><em>cluster_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.describe_cluster_snapshots"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.describe_cluster_snapshots" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets a list of snapshots for a cluster</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot"> |
| <code class="descname">restore_from_cluster_snapshot</code><span class="sig-paren">(</span><em>cluster_identifier</em>, <em>snapshot_identifier</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/redshift_hook.html#RedshiftHook.restore_from_cluster_snapshot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.redshift_hook.RedshiftHook.restore_from_cluster_snapshot" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Restores a cluster from its snapshot</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_identifier</strong> (<em>str</em>) – unique identifier of a cluster</li> |
| <li><strong>snapshot_identifier</strong> (<em>str</em>) – unique identifier for a snapshot of a cluster</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="redshifttos3transfer"> |
| <span id="id23"></span><h4>RedshiftToS3Transfer<a class="headerlink" href="#redshifttos3transfer" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.redshift_to_s3_operator.</code><code class="descname">RedshiftToS3Transfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/redshift_to_s3_operator.html#RedshiftToS3Transfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes an UNLOAD command to s3 as a CSV with headers</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li> |
| <li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li> |
| <li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li> |
| <li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li> |
| <li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li> |
| <li><strong>unload_options</strong> (<em>list</em>) – reference to a list of UNLOAD options</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="s3toredshifttransfer"> |
| <span id="id24"></span><h4>S3ToRedshiftTransfer<a class="headerlink" href="#s3toredshifttransfer" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer"> |
| <em class="property">class </em><code class="descclassname">airflow.operators.s3_to_redshift_operator.</code><code class="descname">S3ToRedshiftTransfer</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/operators/s3_to_redshift_operator.html#S3ToRedshiftTransfer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.operators.s3_to_redshift_operator.S3ToRedshiftTransfer" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes an COPY command to load files from s3 to Redshift</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>schema</strong> (<em>string</em>) – reference to a specific schema in redshift database</li> |
| <li><strong>table</strong> (<em>string</em>) – reference to a specific table in redshift database</li> |
| <li><strong>s3_bucket</strong> (<em>string</em>) – reference to a specific S3 bucket</li> |
| <li><strong>s3_key</strong> (<em>string</em>) – reference to a specific S3 key</li> |
| <li><strong>redshift_conn_id</strong> (<em>string</em>) – reference to a specific redshift database</li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – reference to a specific S3 connection</li> |
| <li><strong>copy_options</strong> (<em>list</em>) – reference to a list of COPY options</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Parame verify:</th><td class="field-body"><p class="first">Whether or not to verify SSL certificates for S3 connection. |
| By default SSL certificates are verified. |
| You can provide the following values: |
| - False: do not validate SSL certificates. SSL will still be used</p> |
| <blockquote> |
| <div><p>(unless use_ssl is False), but SSL certificates will not be |
| verified.</p> |
| </div></blockquote> |
| <ul class="last simple"> |
| <li><dl class="first docutils"> |
| <dt>path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.</dt> |
| <dd>You can specify this argument if you want to use a different |
| CA cert bundle than the one used by botocore.</dd> |
| </dl> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="amazon-sagemaker"> |
| <h3>Amazon SageMaker<a class="headerlink" href="#amazon-sagemaker" title="Permalink to this headline">¶</a></h3> |
| <p>For more instructions on using Amazon SageMaker in Airflow, please see <a class="reference external" href="https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/workflow/README.rst">the SageMaker Python SDK README</a>.</p> |
| <ul class="simple"> |
| <li><span class="xref std std-ref">SageMakerHook</span> : Interact with Amazon SageMaker.</li> |
| <li><span class="xref std std-ref">SageMakerTrainingOperator</span> : Create a SageMaker training job.</li> |
| <li><span class="xref std std-ref">SageMakerTuningOperator</span> : Create a SageMaker tuning job.</li> |
| <li><span class="xref std std-ref">SageMakerModelOperator</span> : Create a SageMaker model.</li> |
| <li><span class="xref std std-ref">SageMakerTransformOperator</span> : Create a SageMaker transform job.</li> |
| <li><span class="xref std std-ref">SageMakerEndpointConfigOperator</span> : Create a SageMaker endpoint config.</li> |
| <li><span class="xref std std-ref">SageMakerEndpointOperator</span> : Create a SageMaker endpoint.</li> |
| </ul> |
| <div class="section" id="sagemakerhook"> |
| <span id="id25"></span><h4>SageMakerHook<a class="headerlink" href="#sagemakerhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.sagemaker_hook.</code><code class="descname">SageMakerHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with Amazon SageMaker.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_s3_url"> |
| <code class="descname">check_s3_url</code><span class="sig-paren">(</span><em>s3url</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_s3_url"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_s3_url" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if an S3 URL exists</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>s3url</strong> (<em>str</em>) – S3 url</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">bool</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_status"> |
| <code class="descname">check_status</code><span class="sig-paren">(</span><em>job_name</em>, <em>key</em>, <em>describe_function</em>, <em>check_interval</em>, <em>max_ingestion_time</em>, <em>non_terminal_states=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_status"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_status" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check status of a SageMaker job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – name of the job to check status</li> |
| <li><strong>key</strong> (<em>str</em>) – the key of the response dict |
| that points to the state</li> |
| <li><strong>describe_function</strong> (<em>python callable</em>) – the function used to retrieve the status</li> |
| <li><strong>args</strong> – the arguments for the function</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| <li><strong>non_terminal_states</strong> (<em>set</em>) – the set of nonterminal states</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">response of describe call after job is done</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_config"> |
| <code class="descname">check_training_config</code><span class="sig-paren">(</span><em>training_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a training configuration is valid</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>training_config</strong> (<em>dict</em>) – training_config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_status_with_log"> |
| <code class="descname">check_training_status_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>non_terminal_states</em>, <em>failed_states</em>, <em>wait_for_completion</em>, <em>check_interval</em>, <em>max_ingestion_time</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_status_with_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_training_status_with_log" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Display the logs for a given training job, optionally tailing them until the |
| job is complete.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – name of the training job to check status and display logs for</li> |
| <li><strong>non_terminal_states</strong> (<em>set</em>) – the set of non_terminal states</li> |
| <li><strong>failed_states</strong> (<em>set</em>) – the set of failed states</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether to keep looking for new log entries |
| until the job completes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – The interval in seconds between polling for new log entries and job completion</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_tuning_config"> |
| <code class="descname">check_tuning_config</code><span class="sig-paren">(</span><em>tuning_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_tuning_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.check_tuning_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Check if a tuning configuration is valid</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>tuning_config</strong> (<em>dict</em>) – tuning_config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.configure_s3_resources"> |
| <code class="descname">configure_s3_resources</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.configure_s3_resources"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.configure_s3_resources" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Extract the S3 operations from the configuration and execute them.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – config of SageMaker operation</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint"> |
| <code class="descname">create_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create an endpoint</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint_config"> |
| <code class="descname">create_endpoint_config</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_endpoint_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create an endpoint config</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for endpoint-config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to endpoint config creation</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_model"> |
| <code class="descname">create_model</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a model job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for model</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to model creation</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_training_job"> |
| <code class="descname">create_training_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>print_log=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_training_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_training_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a training job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for training</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to training job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_transform_job"> |
| <code class="descname">create_transform_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_transform_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_transform_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a transform job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for transform job</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to transform job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_tuning_job"> |
| <code class="descname">create_tuning_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_tuning_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.create_tuning_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a tuning job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for tuning</li> |
| <li><strong>wait_for_completion</strong> – if the program should keep running until job finishes</li> |
| <li><strong>wait_for_completion</strong> – bool</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to tuning job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint"> |
| <code class="descname">describe_endpoint</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint" title="Permalink to this definition">¶</a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint_config"> |
| <code class="descname">describe_endpoint_config</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_endpoint_config" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the endpoint config info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint config info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_model"> |
| <code class="descname">describe_model</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the SageMaker model info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the SageMaker model</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the model info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job"> |
| <code class="descname">describe_training_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the training job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – the name of the training job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the training job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job_with_log"> |
| <code class="descname">describe_training_job_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>positions</em>, <em>stream_names</em>, <em>instance_count</em>, <em>state</em>, <em>last_description</em>, <em>last_describe_job_call</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job_with_log"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_training_job_with_log" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the training job info associated with job_name and print CloudWatch logs</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_transform_job"> |
| <code class="descname">describe_transform_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_transform_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_transform_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the transform job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the transform job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the transform job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_tuning_job"> |
| <code class="descname">describe_tuning_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_tuning_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.describe_tuning_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Return the tuning job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the tuning job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the tuning job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Establish an AWS connection for SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-class docutils literal notranslate"><span class="pre">SageMaker.Client</span></code></a></td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_log_conn"> |
| <code class="descname">get_log_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_log_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.get_log_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Establish an AWS connection for retrieving logs during training</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><code class="xref py py-class docutils literal notranslate"><span class="pre">CloudWatchLog.Client</span></code></td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.log_stream"> |
| <code class="descname">log_stream</code><span class="sig-paren">(</span><em>log_group</em>, <em>stream_name</em>, <em>start_time=0</em>, <em>skip=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.log_stream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.log_stream" title="Permalink to this definition">¶</a></dt> |
| <dd><p>A generator for log items in a single stream. This will yield all the |
| items that are available at the current moment.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li> |
| <li><strong>stream_name</strong> (<em>str</em>) – The name of the specific stream.</li> |
| <li><strong>start_time</strong> (<em>int</em>) – The time stamp value to start reading the logs from (default: 0).</li> |
| <li><strong>skip</strong> (<em>int</em>) – The number of log entries to skip at the start (default: 0). |
| This is for when there are multiple entries at the same timestamp.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">dict</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><div class="line-block"> |
| <div class="line">A CloudWatch log event with the following key-value pairs:</div> |
| <div class="line-block"> |
| <div class="line">’timestamp’ (int): The time in milliseconds of the event.</div> |
| <div class="line">’message’ (str): The log event data.</div> |
| <div class="line">’ingestionTime’ (int): The time in milliseconds the event was ingested.</div> |
| </div> |
| </div> |
| </p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.multi_stream_iter"> |
| <code class="descname">multi_stream_iter</code><span class="sig-paren">(</span><em>log_group</em>, <em>streams</em>, <em>positions=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.multi_stream_iter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.multi_stream_iter" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Iterate over the available events coming from a set of log streams in a single log group |
| interleaving the events from each stream so they’re yielded in timestamp order.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li> |
| <li><strong>streams</strong> (<em>list</em>) – A list of the log stream names. The position of the stream in this list is |
| the stream number.</li> |
| <li><strong>positions</strong> (<em>list</em>) – A list of pairs of (timestamp, skip) which represents the last record |
| read from each stream.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A tuple of (stream number, cloudwatch log event).</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.tar_and_s3_upload"> |
| <code class="descname">tar_and_s3_upload</code><span class="sig-paren">(</span><em>path</em>, <em>key</em>, <em>bucket</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.tar_and_s3_upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.tar_and_s3_upload" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Tar the local file or directory and upload to s3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>path</strong> (<em>str</em>) – local file or directory</li> |
| <li><strong>key</strong> (<em>str</em>) – s3 key</li> |
| <li><strong>bucket</strong> (<em>str</em>) – s3 bucket</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.sagemaker_hook.SageMakerHook.update_endpoint"> |
| <code class="descname">update_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.update_endpoint"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.sagemaker_hook.SageMakerHook.update_endpoint" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Update an endpoint</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint update</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="sagemakertrainingoperator"> |
| <span id="id26"></span><h4>SageMakerTrainingOperator<a class="headerlink" href="#sagemakertrainingoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_training_operator.SageMakerTrainingOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_training_operator.</code><code class="descname">SageMakerTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_training_operator.html#SageMakerTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_training_operator.SageMakerTrainingOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker training job.</p> |
| <p>This operator returns The ARN of the training job created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a training job (templated).</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_training_job()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – If wait is set to True, the time interval, in seconds, |
| that the operation waits to check the status of the training job.</li> |
| <li><strong>print_log</strong> (<em>bool</em>) – if the operator should print the cloudwatch log during training</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – if wait is set to be true, this is the time interval |
| in seconds which the operator will check the status of the training job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails if the training job |
| doesn’t finish within max_ingestion_time seconds. If you set this parameter to None, |
| the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="sagemakertuningoperator"> |
| <span id="id27"></span><h4>SageMakerTuningOperator<a class="headerlink" href="#sagemakertuningoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_tuning_operator.SageMakerTuningOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_tuning_operator.</code><code class="descname">SageMakerTuningOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_tuning_operator.html#SageMakerTuningOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_tuning_operator.SageMakerTuningOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker hyperparameter tuning job.</p> |
| <p>This operator returns The ARN of the tuning job created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a tuning job (templated).</p> |
| <p>For details of the configuration parameter see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_hyper_parameter_tuning_job()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the tuning job finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds, |
| that this operation waits to check the status of the tuning job.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails |
| if the tuning job doesn’t finish within max_ingestion_time seconds. If you |
| set this parameter to None, the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="sagemakermodeloperator"> |
| <span id="id28"></span><h4>SageMakerModelOperator<a class="headerlink" href="#sagemakermodeloperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_model_operator.SageMakerModelOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_model_operator.</code><code class="descname">SageMakerModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_model_operator.html#SageMakerModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_model_operator.SageMakerModelOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker model.</p> |
| <p>This operator returns The ARN of the model created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create a model.</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="sagemakertransformoperator"> |
| <span id="id29"></span><h4>SageMakerTransformOperator<a class="headerlink" href="#sagemakertransformoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_transform_operator.SageMakerTransformOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_transform_operator.</code><code class="descname">SageMakerTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_transform_operator.html#SageMakerTransformOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_transform_operator.SageMakerTransformOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker transform job.</p> |
| <p>This operator returns The ARN of the model created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a transform job (templated).</p> |
| <p>If you need to create a SageMaker transform job based on an existed SageMaker model:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">transform_config</span> |
| </pre></div> |
| </div> |
| <p>If you need to create both SageMaker model and SageMaker Transform job:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'Model'</span><span class="p">:</span> <span class="n">model_config</span><span class="p">,</span> |
| <span class="s1">'Transform'</span><span class="p">:</span> <span class="n">transform_config</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>For details of the configuration parameter of transform_config see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_transform_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_transform_job()</span></code></a></p> |
| <p>For details of the configuration parameter of model_config, See: |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the transform job finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds, |
| that this operation waits to check the status of the transform job.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails |
| if the transform job doesn’t finish within max_ingestion_time seconds. If you |
| set this parameter to None, the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="sagemakerendpointconfigoperator"> |
| <span id="id30"></span><h4>SageMakerEndpointConfigOperator<a class="headerlink" href="#sagemakerendpointconfigoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_endpoint_config_operator.SageMakerEndpointConfigOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_config_operator.</code><code class="descname">SageMakerEndpointConfigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_config_operator.html#SageMakerEndpointConfigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_endpoint_config_operator.SageMakerEndpointConfigOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker endpoint config.</p> |
| <p>This operator returns The ARN of the endpoint config created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint config.</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="sagemakerendpointoperator"> |
| <span id="id31"></span><h4>SageMakerEndpointOperator<a class="headerlink" href="#sagemakerendpointoperator" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.sagemaker_endpoint_operator.SageMakerEndpointOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_operator.</code><code class="descname">SageMakerEndpointOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_operator.html#SageMakerEndpointOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.sagemaker_endpoint_operator.SageMakerEndpointOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker endpoint.</p> |
| <p>This operator returns The ARN of the endpoint created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint.</p> |
| <p>If you need to create a SageMaker endpoint based on an existed |
| SageMaker model and an existed SageMaker endpoint config:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">endpoint_configuration</span><span class="p">;</span> |
| </pre></div> |
| </div> |
| <p>If you need to create all of SageMaker model, SageMaker endpoint-config and SageMaker endpoint:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'Model'</span><span class="p">:</span> <span class="n">model_configuration</span><span class="p">,</span> |
| <span class="s1">'EndpointConfig'</span><span class="p">:</span> <span class="n">endpoint_config_configuration</span><span class="p">,</span> |
| <span class="s1">'Endpoint'</span><span class="p">:</span> <span class="n">endpoint_configuration</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>For details of the configuration parameter of model_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| <p>For details of the configuration parameter of endpoint_config_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p> |
| <p>For details of the configuration parameter of endpoint_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether the operator should wait until the endpoint creation finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, this is the time interval, in seconds, that this operation |
| waits before polling the status of the endpoint creation.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, this operation fails if the endpoint creation doesn’t |
| finish within max_ingestion_time seconds. If you set this parameter to None it never times out.</li> |
| <li><strong>operation</strong> (<em>str</em>) – Whether to create an endpoint or update an endpoint. Must be either ‘create or ‘update’.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="id32"> |
| <h3>Amazon SageMaker<a class="headerlink" href="#id32" title="Permalink to this headline">¶</a></h3> |
| <p>For more instructions on using Amazon SageMaker in Airflow, please see <a class="reference external" href="https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/workflow/README.rst">the SageMaker Python SDK README</a>.</p> |
| <ul class="simple"> |
| <li><span class="xref std std-ref">SageMakerHook</span> : Interact with Amazon SageMaker.</li> |
| <li><span class="xref std std-ref">SageMakerTrainingOperator</span> : Create a SageMaker training job.</li> |
| <li><span class="xref std std-ref">SageMakerTuningOperator</span> : Create a SageMaker tuning job.</li> |
| <li><span class="xref std std-ref">SageMakerModelOperator</span> : Create a SageMaker model.</li> |
| <li><span class="xref std std-ref">SageMakerTransformOperator</span> : Create a SageMaker transform job.</li> |
| <li><span class="xref std std-ref">SageMakerEndpointConfigOperator</span> : Create a SageMaker endpoint config.</li> |
| <li><span class="xref std std-ref">SageMakerEndpointOperator</span> : Create a SageMaker endpoint.</li> |
| </ul> |
| <div class="section" id="id34"> |
| <span id="id35"></span><h4>SageMakerHook<a class="headerlink" href="#id34" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.sagemaker_hook.</code><code class="descname">SageMakerHook</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.hooks.aws_hook.AwsHook" title="airflow.contrib.hooks.aws_hook.AwsHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.aws_hook.AwsHook</span></code></a></p> |
| <p>Interact with Amazon SageMaker.</p> |
| <dl class="method"> |
| <dt> |
| <code class="descname">check_s3_url</code><span class="sig-paren">(</span><em>s3url</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_s3_url"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Check if an S3 URL exists</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>s3url</strong> (<em>str</em>) – S3 url</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">bool</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">check_status</code><span class="sig-paren">(</span><em>job_name</em>, <em>key</em>, <em>describe_function</em>, <em>check_interval</em>, <em>max_ingestion_time</em>, <em>non_terminal_states=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_status"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Check status of a SageMaker job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – name of the job to check status</li> |
| <li><strong>key</strong> (<em>str</em>) – the key of the response dict |
| that points to the state</li> |
| <li><strong>describe_function</strong> (<em>python callable</em>) – the function used to retrieve the status</li> |
| <li><strong>args</strong> – the arguments for the function</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| <li><strong>non_terminal_states</strong> (<em>set</em>) – the set of nonterminal states</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">response of describe call after job is done</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">check_training_config</code><span class="sig-paren">(</span><em>training_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_config"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Check if a training configuration is valid</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>training_config</strong> (<em>dict</em>) – training_config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">check_training_status_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>non_terminal_states</em>, <em>failed_states</em>, <em>wait_for_completion</em>, <em>check_interval</em>, <em>max_ingestion_time</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_training_status_with_log"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Display the logs for a given training job, optionally tailing them until the |
| job is complete.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>job_name</strong> (<em>str</em>) – name of the training job to check status and display logs for</li> |
| <li><strong>non_terminal_states</strong> (<em>set</em>) – the set of non_terminal states</li> |
| <li><strong>failed_states</strong> (<em>set</em>) – the set of failed states</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether to keep looking for new log entries |
| until the job completes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – The interval in seconds between polling for new log entries and job completion</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">check_tuning_config</code><span class="sig-paren">(</span><em>tuning_config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.check_tuning_config"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Check if a tuning configuration is valid</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>tuning_config</strong> (<em>dict</em>) – tuning_config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">configure_s3_resources</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.configure_s3_resources"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Extract the S3 operations from the configuration and execute them.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – config of SageMaker operation</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">create_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Create an endpoint</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">create_endpoint_config</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_endpoint_config"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Create an endpoint config</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for endpoint-config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to endpoint config creation</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">create_model</code><span class="sig-paren">(</span><em>config</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_model"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Create a model job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>config</strong> (<em>dict</em>) – the config for model</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A response to model creation</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">create_training_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>print_log=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_training_job"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Create a training job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for training</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to training job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">create_transform_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_transform_job"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Create a transform job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for transform job</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to transform job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">create_tuning_job</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.create_tuning_job"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Create a tuning job</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for tuning</li> |
| <li><strong>wait_for_completion</strong> – if the program should keep running until job finishes</li> |
| <li><strong>wait_for_completion</strong> – bool</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to tuning job creation</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">describe_endpoint</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">describe_endpoint_config</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_endpoint_config"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Return the endpoint config info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the endpoint config</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the endpoint config info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">describe_model</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_model"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Return the SageMaker model info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the SageMaker model</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the model info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">describe_training_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Return the training job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – the name of the training job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the training job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">describe_training_job_with_log</code><span class="sig-paren">(</span><em>job_name</em>, <em>positions</em>, <em>stream_names</em>, <em>instance_count</em>, <em>state</em>, <em>last_description</em>, <em>last_describe_job_call</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_training_job_with_log"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Return the training job info associated with job_name and print CloudWatch logs</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">describe_transform_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_transform_job"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Return the transform job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the transform job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the transform job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">describe_tuning_job</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.describe_tuning_job"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Return the tuning job info associated with the name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>string</em>) – the name of the tuning job</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A dict contains all the tuning job info</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_conn"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Establish an AWS connection for SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-class docutils literal notranslate"><span class="pre">SageMaker.Client</span></code></a></td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">get_log_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.get_log_conn"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Establish an AWS connection for retrieving logs during training</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><code class="xref py py-class docutils literal notranslate"><span class="pre">CloudWatchLog.Client</span></code></td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">log_stream</code><span class="sig-paren">(</span><em>log_group</em>, <em>stream_name</em>, <em>start_time=0</em>, <em>skip=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.log_stream"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>A generator for log items in a single stream. This will yield all the |
| items that are available at the current moment.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li> |
| <li><strong>stream_name</strong> (<em>str</em>) – The name of the specific stream.</li> |
| <li><strong>start_time</strong> (<em>int</em>) – The time stamp value to start reading the logs from (default: 0).</li> |
| <li><strong>skip</strong> (<em>int</em>) – The number of log entries to skip at the start (default: 0). |
| This is for when there are multiple entries at the same timestamp.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">dict</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><div class="line-block"> |
| <div class="line">A CloudWatch log event with the following key-value pairs:</div> |
| <div class="line-block"> |
| <div class="line">’timestamp’ (int): The time in milliseconds of the event.</div> |
| <div class="line">’message’ (str): The log event data.</div> |
| <div class="line">’ingestionTime’ (int): The time in milliseconds the event was ingested.</div> |
| </div> |
| </div> |
| </p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">multi_stream_iter</code><span class="sig-paren">(</span><em>log_group</em>, <em>streams</em>, <em>positions=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.multi_stream_iter"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Iterate over the available events coming from a set of log streams in a single log group |
| interleaving the events from each stream so they’re yielded in timestamp order.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>log_group</strong> (<em>str</em>) – The name of the log group.</li> |
| <li><strong>streams</strong> (<em>list</em>) – A list of the log stream names. The position of the stream in this list is |
| the stream number.</li> |
| <li><strong>positions</strong> (<em>list</em>) – A list of pairs of (timestamp, skip) which represents the last record |
| read from each stream.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A tuple of (stream number, cloudwatch log event).</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">tar_and_s3_upload</code><span class="sig-paren">(</span><em>path</em>, <em>key</em>, <em>bucket</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.tar_and_s3_upload"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Tar the local file or directory and upload to s3</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>path</strong> (<em>str</em>) – local file or directory</li> |
| <li><strong>key</strong> (<em>str</em>) – s3 key</li> |
| <li><strong>bucket</strong> (<em>str</em>) – s3 bucket</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt> |
| <code class="descname">update_endpoint</code><span class="sig-paren">(</span><em>config</em>, <em>wait_for_completion=True</em>, <em>check_interval=30</em>, <em>max_ingestion_time=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/sagemaker_hook.html#SageMakerHook.update_endpoint"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Update an endpoint</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>config</strong> (<em>dict</em>) – the config for endpoint</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – if the program should keep running until job finishes</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – the time interval in seconds which the operator |
| will check the status of any SageMaker job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – the maximum ingestion time in seconds. Any |
| SageMaker jobs that run longer than this will fail. Setting this to |
| None implies no timeout for any SageMaker job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">A response to endpoint update</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id36"> |
| <span id="id37"></span><h4>SageMakerTrainingOperator<a class="headerlink" href="#id36" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_training_operator.</code><code class="descname">SageMakerTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_training_operator.html#SageMakerTrainingOperator"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker training job.</p> |
| <p>This operator returns The ARN of the training job created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a training job (templated).</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_training_job()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – If wait is set to True, the time interval, in seconds, |
| that the operation waits to check the status of the training job.</li> |
| <li><strong>print_log</strong> (<em>bool</em>) – if the operator should print the cloudwatch log during training</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – if wait is set to be true, this is the time interval |
| in seconds which the operator will check the status of the training job</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails if the training job |
| doesn’t finish within max_ingestion_time seconds. If you set this parameter to None, |
| the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id38"> |
| <span id="id39"></span><h4>SageMakerTuningOperator<a class="headerlink" href="#id38" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_tuning_operator.</code><code class="descname">SageMakerTuningOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_tuning_operator.html#SageMakerTuningOperator"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker hyperparameter tuning job.</p> |
| <p>This operator returns The ARN of the tuning job created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a tuning job (templated).</p> |
| <p>For details of the configuration parameter see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_hyper_parameter_tuning_job()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the tuning job finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds, |
| that this operation waits to check the status of the tuning job.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails |
| if the tuning job doesn’t finish within max_ingestion_time seconds. If you |
| set this parameter to None, the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id40"> |
| <span id="id41"></span><h4>SageMakerModelOperator<a class="headerlink" href="#id40" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_model_operator.</code><code class="descname">SageMakerModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_model_operator.html#SageMakerModelOperator"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker model.</p> |
| <p>This operator returns The ARN of the model created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create a model.</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id42"> |
| <span id="id43"></span><h4>SageMakerTransformOperator<a class="headerlink" href="#id42" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_transform_operator.</code><code class="descname">SageMakerTransformOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_transform_operator.html#SageMakerTransformOperator"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Initiate a SageMaker transform job.</p> |
| <p>This operator returns The ARN of the model created in Amazon SageMaker.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to start a transform job (templated).</p> |
| <p>If you need to create a SageMaker transform job based on an existed SageMaker model:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">transform_config</span> |
| </pre></div> |
| </div> |
| <p>If you need to create both SageMaker model and SageMaker Transform job:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'Model'</span><span class="p">:</span> <span class="n">model_config</span><span class="p">,</span> |
| <span class="s1">'Transform'</span><span class="p">:</span> <span class="n">transform_config</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>For details of the configuration parameter of transform_config see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_transform_job" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_transform_job()</span></code></a></p> |
| <p>For details of the configuration parameter of model_config, See: |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>string</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Set to True to wait until the transform job finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, the time interval, in seconds, |
| that this operation waits to check the status of the transform job.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, the operation fails |
| if the transform job doesn’t finish within max_ingestion_time seconds. If you |
| set this parameter to None, the operation does not timeout.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id44"> |
| <span id="id45"></span><h4>SageMakerEndpointConfigOperator<a class="headerlink" href="#id44" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_config_operator.</code><code class="descname">SageMakerEndpointConfigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_config_operator.html#SageMakerEndpointConfigOperator"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker endpoint config.</p> |
| <p>This operator returns The ARN of the endpoint config created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint config.</p> |
| <p>For details of the configuration parameter see <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id46"> |
| <span id="id47"></span><h4>SageMakerEndpointOperator<a class="headerlink" href="#id46" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.sagemaker_endpoint_operator.</code><code class="descname">SageMakerEndpointOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/sagemaker_endpoint_operator.html#SageMakerEndpointOperator"><span class="viewcode-link">[source]</span></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator" title="airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.sagemaker_base_operator.SageMakerBaseOperator</span></code></a></p> |
| <p>Create a SageMaker endpoint.</p> |
| <p>This operator returns The ARN of the endpoint created in Amazon SageMaker</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>config</strong> (<em>dict</em>) – <p>The configuration necessary to create an endpoint.</p> |
| <p>If you need to create a SageMaker endpoint based on an existed |
| SageMaker model and an existed SageMaker endpoint config:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="n">endpoint_configuration</span><span class="p">;</span> |
| </pre></div> |
| </div> |
| <p>If you need to create all of SageMaker model, SageMaker endpoint-config and SageMaker endpoint:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">config</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'Model'</span><span class="p">:</span> <span class="n">model_configuration</span><span class="p">,</span> |
| <span class="s1">'EndpointConfig'</span><span class="p">:</span> <span class="n">endpoint_config_configuration</span><span class="p">,</span> |
| <span class="s1">'Endpoint'</span><span class="p">:</span> <span class="n">endpoint_configuration</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>For details of the configuration parameter of model_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_model()</span></code></a></p> |
| <p>For details of the configuration parameter of endpoint_config_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint_config()</span></code></a></p> |
| <p>For details of the configuration parameter of endpoint_configuration see |
| <a class="reference external" href="https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint" title="(in Boto 3 Docs v1.9.83)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SageMaker.Client.create_endpoint()</span></code></a></p> |
| </li> |
| <li><strong>aws_conn_id</strong> (<em>str</em>) – The AWS connection ID to use.</li> |
| <li><strong>wait_for_completion</strong> (<em>bool</em>) – Whether the operator should wait until the endpoint creation finishes.</li> |
| <li><strong>check_interval</strong> (<em>int</em>) – If wait is set to True, this is the time interval, in seconds, that this operation |
| waits before polling the status of the endpoint creation.</li> |
| <li><strong>max_ingestion_time</strong> (<em>int</em>) – If wait is set to True, this operation fails if the endpoint creation doesn’t |
| finish within max_ingestion_time seconds. If you set this parameter to None it never times out.</li> |
| <li><strong>operation</strong> (<em>str</em>) – Whether to create an endpoint or update an endpoint. Must be either ‘create or ‘update’.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="databricks"> |
| <span id="id48"></span><h2>Databricks<a class="headerlink" href="#databricks" title="Permalink to this headline">¶</a></h2> |
| <p><a class="reference external" href="https://databricks.com/">Databricks</a> has contributed an Airflow operator which enables |
| submitting runs to the Databricks platform. Internally the operator talks to the |
| <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">endpoint</a>.</p> |
| <div class="section" id="databrickssubmitrunoperator"> |
| <h3>DatabricksSubmitRunOperator<a class="headerlink" href="#databrickssubmitrunoperator" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.databricks_operator.</code><code class="descname">DatabricksSubmitRunOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/databricks_operator.html#DatabricksSubmitRunOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.databricks_operator.DatabricksSubmitRunOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Submits a Spark job run to Databricks using the |
| <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">api/2.0/jobs/runs/submit</a> |
| API endpoint.</p> |
| <p>There are two ways to instantiate this operator.</p> |
| <p>In the first way, you can take the JSON payload that you typically use |
| to call the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint and pass it directly |
| to our <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> through the <code class="docutils literal notranslate"><span class="pre">json</span></code> parameter. |
| For example</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">json</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'new_cluster'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'spark_version'</span><span class="p">:</span> <span class="s1">'2.1.0-db3-scala2.11'</span><span class="p">,</span> |
| <span class="s1">'num_workers'</span><span class="p">:</span> <span class="mi">2</span> |
| <span class="p">},</span> |
| <span class="s1">'notebook_task'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'notebook_path'</span><span class="p">:</span> <span class="s1">'/Users/airflow@example.com/PrepareData'</span><span class="p">,</span> |
| <span class="p">},</span> |
| <span class="p">}</span> |
| <span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span><span class="n">task_id</span><span class="o">=</span><span class="s1">'notebook_run'</span><span class="p">,</span> <span class="n">json</span><span class="o">=</span><span class="n">json</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Another way to accomplish the same thing is to use the named parameters |
| of the <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> directly. Note that there is exactly |
| one named parameter for each top level parameter in the <code class="docutils literal notranslate"><span class="pre">runs/submit</span></code> |
| endpoint. In this method, your code would look like this:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">new_cluster</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'spark_version'</span><span class="p">:</span> <span class="s1">'2.1.0-db3-scala2.11'</span><span class="p">,</span> |
| <span class="s1">'num_workers'</span><span class="p">:</span> <span class="mi">2</span> |
| <span class="p">}</span> |
| <span class="n">notebook_task</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'notebook_path'</span><span class="p">:</span> <span class="s1">'/Users/airflow@example.com/PrepareData'</span><span class="p">,</span> |
| <span class="p">}</span> |
| <span class="n">notebook_run</span> <span class="o">=</span> <span class="n">DatabricksSubmitRunOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'notebook_run'</span><span class="p">,</span> |
| <span class="n">new_cluster</span><span class="o">=</span><span class="n">new_cluster</span><span class="p">,</span> |
| <span class="n">notebook_task</span><span class="o">=</span><span class="n">notebook_task</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>In the case where both the json parameter <strong>AND</strong> the named parameters |
| are provided, they will be merged together. If there are conflicts during the merge, |
| the named parameters will take precedence and override the top level <code class="docutils literal notranslate"><span class="pre">json</span></code> keys.</p> |
| <dl class="docutils"> |
| <dt>Currently the named parameters that <code class="docutils literal notranslate"><span class="pre">DatabricksSubmitRunOperator</span></code> supports are</dt> |
| <dd><ul class="first last simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">notebook_task</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">new_cluster</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">libraries</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">run_name</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">timeout_seconds</span></code></li> |
| </ul> |
| </dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>json</strong> (<em>dict</em>) – <p>A JSON object containing API parameters which will be passed |
| directly to the <code class="docutils literal notranslate"><span class="pre">api/2.0/jobs/runs/submit</span></code> endpoint. The other named parameters |
| (i.e. <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code>, <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code>..) to this operator will |
| be merged with this json dictionary if they are provided. |
| If there are conflicts during the merge, the named parameters will |
| take precedence and override the top level json keys. (templated)</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information about templating see <a class="reference internal" href="concepts.html#jinja-templating"><span class="std std-ref">Jinja Templating</span></a>. |
| <a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#runs-submit">https://docs.databricks.com/api/latest/jobs.html#runs-submit</a></p> |
| </div> |
| </li> |
| <li><strong>spark_jar_task</strong> (<em>dict</em>) – <p>The main class and parameters for the JAR task. Note that |
| the actual JAR is specified in the <code class="docutils literal notranslate"><span class="pre">libraries</span></code>. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask">https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask</a></p> |
| </div> |
| </li> |
| <li><strong>notebook_task</strong> (<em>dict</em>) – <p>The notebook path and parameters for the notebook task. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">spark_jar_task</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">notebook_task</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask">https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask</a></p> |
| </div> |
| </li> |
| <li><strong>new_cluster</strong> (<em>dict</em>) – <p>Specs for a new cluster on which this task will be run. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster">https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster</a></p> |
| </div> |
| </li> |
| <li><strong>existing_cluster_id</strong> (<em>string</em>) – ID for existing cluster on which to run this task. |
| <em>EITHER</em> <code class="docutils literal notranslate"><span class="pre">new_cluster</span></code> <em>OR</em> <code class="docutils literal notranslate"><span class="pre">existing_cluster_id</span></code> should be specified. |
| This field will be templated.</li> |
| <li><strong>libraries</strong> (<em>list of dicts</em>) – <p>Libraries which this run will use. |
| This field will be templated.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary">https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary</a></p> |
| </div> |
| </li> |
| <li><strong>run_name</strong> (<em>string</em>) – The run name used for this task. |
| By default this will be set to the Airflow <code class="docutils literal notranslate"><span class="pre">task_id</span></code>. This <code class="docutils literal notranslate"><span class="pre">task_id</span></code> is a |
| required parameter of the superclass <code class="docutils literal notranslate"><span class="pre">BaseOperator</span></code>. |
| This field will be templated.</li> |
| <li><strong>timeout_seconds</strong> (<em>int32</em>) – The timeout for this run. By default a value of 0 is used |
| which means to have no timeout. |
| This field will be templated.</li> |
| <li><strong>databricks_conn_id</strong> (<em>string</em>) – The name of the Airflow connection to use. |
| By default and in the common case this will be <code class="docutils literal notranslate"><span class="pre">databricks_default</span></code>. To use |
| token based authentication, provide the key <code class="docutils literal notranslate"><span class="pre">token</span></code> in the extra field for the |
| connection.</li> |
| <li><strong>polling_period_seconds</strong> (<em>int</em>) – Controls the rate which we poll for the result of |
| this run. By default the operator will poll every 30 seconds.</li> |
| <li><strong>databricks_retry_limit</strong> (<em>int</em>) – Amount of times retry if the Databricks backend is |
| unreachable. Its value must be greater than or equal to 1.</li> |
| <li><strong>databricks_retry_delay</strong> (<em>float</em>) – Number of seconds to wait between retries (it |
| might be a floating point number).</li> |
| <li><strong>do_xcom_push</strong> (<em>boolean</em>) – Whether we should push run_id and run_page_url to xcom.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="gcp-google-cloud-platform"> |
| <span id="gcp"></span><h2>GCP: Google Cloud Platform<a class="headerlink" href="#gcp-google-cloud-platform" title="Permalink to this headline">¶</a></h2> |
| <p>Airflow has extensive support for the Google Cloud Platform. But note that most Hooks and |
| Operators are in the contrib section. Meaning that they have a <em>beta</em> status, meaning that |
| they can have breaking changes between minor releases.</p> |
| <p>See the <a class="reference internal" href="howto/manage-connections.html#connection-type-gcp"><span class="std std-ref">GCP connection type</span></a> documentation to |
| configure connections to GCP.</p> |
| <div class="section" id="id49"> |
| <h3>Logging<a class="headerlink" href="#id49" title="Permalink to this headline">¶</a></h3> |
| <p>Airflow can be configured to read and write task logs in Google Cloud Storage. |
| See <a class="reference internal" href="howto/write-logs.html#write-logs-gcp"><span class="std std-ref">Writing Logs to Google Cloud Storage</span></a>.</p> |
| </div> |
| <div class="section" id="googlecloudbasehook"> |
| <h3>GoogleCloudBaseHook<a class="headerlink" href="#googlecloudbasehook" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_api_base_hook.</code><code class="descname">GoogleCloudBaseHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_api_base_hook.html#GoogleCloudBaseHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>A base hook for Google cloud-related hooks. Google cloud has a shared REST |
| API client that is built in the same way no matter which service you use. |
| This class helps construct and authorize the credentials needed to then |
| call googleapiclient.discovery.build() to actually discover and build a client |
| for a Google cloud service.</p> |
| <p>The class also contains some miscellaneous helper functions.</p> |
| <p>All hook derived from this base hook use the ‘Google Cloud Platform’ connection |
| type. Three ways of authentication are supported:</p> |
| <p>Default credentials: Only the ‘Project Id’ is required. You’ll need to |
| have set up default credentials, such as by the |
| <code class="docutils literal notranslate"><span class="pre">GOOGLE_APPLICATION_DEFAULT</span></code> environment variable or from the metadata |
| server on Google Compute Engine.</p> |
| <p>JSON key file: Specify ‘Project Id’, ‘Keyfile Path’ and ‘Scope’.</p> |
| <p>Legacy P12 key files are not supported.</p> |
| <p>JSON data provided in the UI: Specify ‘Keyfile JSON’.</p> |
| <dl class="staticmethod"> |
| <dt id="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.fallback_to_default_project_id"> |
| <em class="property">static </em><code class="descname">fallback_to_default_project_id</code><span class="sig-paren">(</span><em>func</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_api_base_hook.html#GoogleCloudBaseHook.fallback_to_default_project_id"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.fallback_to_default_project_id" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Decorator that provides fallback for Google Cloud Platform project id. If |
| the project is None it will be replaced with the project_id from the |
| service account the Hook is authenticated with. Project id can be specified |
| either via project_id kwarg or via first parameter in positional args.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>func</strong> – function to wrap</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">result of the function call</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquery"> |
| <span id="id50"></span><h3>BigQuery<a class="headerlink" href="#bigquery" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="bigquery-operators"> |
| <h4>BigQuery Operators<a class="headerlink" href="#bigquery-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#bigquerycheckoperator"><span class="std std-ref">BigQueryCheckOperator</span></a> : Performs checks against a SQL query that will return a single row with different values.</li> |
| <li><a class="reference internal" href="#bigqueryvaluecheckoperator"><span class="std std-ref">BigQueryValueCheckOperator</span></a> : Performs a simple value check using SQL code.</li> |
| <li><a class="reference internal" href="#bigqueryintervalcheckoperator"><span class="std std-ref">BigQueryIntervalCheckOperator</span></a> : Checks that the values of metrics given as SQL expressions are within a certain tolerance of the ones from days_back before.</li> |
| <li><a class="reference internal" href="#bigquerygetdataoperator"><span class="std std-ref">BigQueryGetDataOperator</span></a> : Fetches the data from a BigQuery table and returns data in a python list</li> |
| <li><a class="reference internal" href="#bigquerycreateemptydatasetoperator"><span class="std std-ref">BigQueryCreateEmptyDatasetOperator</span></a> : Creates an empty BigQuery dataset.</li> |
| <li><a class="reference internal" href="#bigquerycreateemptytableoperator"><span class="std std-ref">BigQueryCreateEmptyTableOperator</span></a> : Creates a new, empty table in the specified BigQuery dataset optionally with schema.</li> |
| <li><a class="reference internal" href="#bigquerycreateexternaltableoperator"><span class="std std-ref">BigQueryCreateExternalTableOperator</span></a> : Creates a new, external table in the dataset with the data in Google Cloud Storage.</li> |
| <li><a class="reference internal" href="#bigquerydeletedatasetoperator"><span class="std std-ref">BigQueryDeleteDatasetOperator</span></a> : Deletes an existing BigQuery dataset.</li> |
| <li><a class="reference internal" href="#bigquerytabledeleteoperator"><span class="std std-ref">BigQueryTableDeleteOperator</span></a> : Deletes an existing BigQuery table.</li> |
| <li><a class="reference internal" href="#bigqueryoperator"><span class="std std-ref">BigQueryOperator</span></a> : Executes BigQuery SQL queries in a specific BigQuery database.</li> |
| <li><a class="reference internal" href="#bigquerytobigqueryoperator"><span class="std std-ref">BigQueryToBigQueryOperator</span></a> : Copy a BigQuery table to another BigQuery table.</li> |
| <li><a class="reference internal" href="#bigquerytocloudstorageoperator"><span class="std std-ref">BigQueryToCloudStorageOperator</span></a> : Transfers a BigQuery table to a Google Cloud Storage bucket</li> |
| </ul> |
| <div class="section" id="bigquerycheckoperator"> |
| <span id="id51"></span><h5>BigQueryCheckOperator<a class="headerlink" href="#bigquerycheckoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a></p> |
| <p>Performs checks against BigQuery. The <code class="docutils literal notranslate"><span class="pre">BigQueryCheckOperator</span></code> expects |
| a sql query that will return a single row. Each value on that |
| first row is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code> the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average.</p> |
| <p>This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alterts |
| without stopping the progress of the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to the BigQuery database</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigqueryvaluecheckoperator"> |
| <span id="id52"></span><h5>BigQueryValueCheckOperator<a class="headerlink" href="#bigqueryvaluecheckoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryValueCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a></p> |
| <p>Performs a simple value check using sql code.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – the sql to be executed</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigqueryintervalcheckoperator"> |
| <span id="id53"></span><h5>BigQueryIntervalCheckOperator<a class="headerlink" href="#bigqueryintervalcheckoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_check_operator.</code><code class="descname">BigQueryIntervalCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_check_operator.html#BigQueryIntervalCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_check_operator.BigQueryIntervalCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.IntervalCheckOperator" title="airflow.operators.check_operator.IntervalCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.IntervalCheckOperator</span></code></a></p> |
| <p>Checks that the values of metrics given as SQL expressions are within |
| a certain tolerance of the ones from days_back before.</p> |
| <p>This method constructs a query like so</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">SELECT</span> <span class="p">{</span><span class="n">metrics_threshold_dict_key</span><span class="p">}</span> <span class="n">FROM</span> <span class="p">{</span><span class="n">table</span><span class="p">}</span> |
| <span class="n">WHERE</span> <span class="p">{</span><span class="n">date_filter_column</span><span class="p">}</span><span class="o">=<</span><span class="n">date</span><span class="o">></span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>table</strong> (<em>str</em>) – the table name</li> |
| <li><strong>days_back</strong> (<em>int</em>) – number of days between ds and the ds we want to check |
| against. Defaults to 7 days</li> |
| <li><strong>metrics_threshold</strong> (<em>dict</em>) – a dictionary of ratios indexed by metrics, for |
| example ‘COUNT(*)’: 1.5 would require a 50 percent or less difference |
| between the current day, and the prior days_back.</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) |
| or standard SQL (false).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerygetdataoperator"> |
| <span id="id54"></span><h5>BigQueryGetDataOperator<a class="headerlink" href="#bigquerygetdataoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_get_data.</code><code class="descname">BigQueryGetDataOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_get_data.html#BigQueryGetDataOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_get_data.BigQueryGetDataOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Fetches the data from a BigQuery table (alternatively fetch data for selected columns) |
| and returns data in a python list. The number of elements in the returned list will |
| be equal to the number of rows fetched. Each element in the list will again be a list |
| where element would represent the columns values for that row.</p> |
| <p><strong>Example Result</strong>: <code class="docutils literal notranslate"><span class="pre">[['Tony',</span> <span class="pre">'10'],</span> <span class="pre">['Mike',</span> <span class="pre">'20'],</span> <span class="pre">['Steve',</span> <span class="pre">'15']]</span></code></p> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">If you pass fields to <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> which are in different order than the |
| order of columns already in |
| BQ table, the data will still be in the order of BQ table. |
| For example if the BQ table has 3 columns as |
| <code class="docutils literal notranslate"><span class="pre">[A,B,C]</span></code> and you pass ‘B,A’ in the <code class="docutils literal notranslate"><span class="pre">selected_fields</span></code> |
| the data would still be of the form <code class="docutils literal notranslate"><span class="pre">'A,B'</span></code>.</p> |
| </div> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">get_data</span> <span class="o">=</span> <span class="n">BigQueryGetDataOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'get_data_from_bq'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'test_dataset'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Transaction_partitions'</span><span class="p">,</span> |
| <span class="n">max_results</span><span class="o">=</span><span class="s1">'100'</span><span class="p">,</span> |
| <span class="n">selected_fields</span><span class="o">=</span><span class="s1">'DATE'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The dataset ID of the requested table. (templated)</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The table ID of the requested table. (templated)</li> |
| <li><strong>max_results</strong> (<em>string</em>) – The maximum number of records (rows) to be fetched |
| from the table. (templated)</li> |
| <li><strong>selected_fields</strong> (<em>string</em>) – List of fields to return (comma-separated). If |
| unspecified, all fields are returned.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerycreateemptytableoperator"> |
| <span id="id55"></span><h5>BigQueryCreateEmptyTableOperator<a class="headerlink" href="#bigquerycreateemptytableoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyTableOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyTableOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new, empty table in the specified BigQuery dataset, |
| optionally with schema.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it. |
| You can also create a table without schema.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The project to create the table into. (templated)</li> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The dataset to create the table into. (templated)</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The Name of the table to be created. (templated)</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}]</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><strong>gcs_schema_object</strong> (<em>string</em>) – Full path to the JSON file containing |
| schema (templated). For |
| example: <code class="docutils literal notranslate"><span class="pre">gs://test-bucket/dir1/dir2/employee_schema.json</span></code></li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – <p>configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning</a></p> |
| </div> |
| </li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – <p>a dictionary containing labels for the table, passed to BigQuery</p> |
| <p><strong>Example (with schema JSON in GCS)</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'BigQueryCreateEmptyTableOperator_task'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'ODS'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Employees'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'internal-gcp-project'</span><span class="p">,</span> |
| <span class="n">gcs_schema_object</span><span class="o">=</span><span class="s1">'gs://schema-bucket/employee_schema.json'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><strong>Corresponding Schema file</strong> (<code class="docutils literal notranslate"><span class="pre">employee_schema.json</span></code>):</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span> |
| <span class="p">{</span> |
| <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">,</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span> |
| <span class="p">},</span> |
| <span class="p">{</span> |
| <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">,</span> |
| <span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> |
| <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span> |
| <span class="p">}</span> |
| <span class="p">]</span> |
| </pre></div> |
| </div> |
| <p><strong>Example (with schema in the DAG)</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateTable</span> <span class="o">=</span> <span class="n">BigQueryCreateEmptyTableOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'BigQueryCreateEmptyTableOperator_task'</span><span class="p">,</span> |
| <span class="n">dataset_id</span><span class="o">=</span><span class="s1">'ODS'</span><span class="p">,</span> |
| <span class="n">table_id</span><span class="o">=</span><span class="s1">'Employees'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'internal-gcp-project'</span><span class="p">,</span> |
| <span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}],</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerycreateexternaltableoperator"> |
| <span id="id56"></span><h5>BigQueryCreateExternalTableOperator<a class="headerlink" href="#bigquerycreateexternaltableoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateExternalTableOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateExternalTableOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateExternalTableOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new external table in the dataset with the data in Google Cloud |
| Storage.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to point the external table to. (templated)</li> |
| <li><strong>source_objects</strong> (<em>list</em>) – List of Google cloud storage URIs to point |
| table to. (templated) |
| If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (<project>.)<dataset>.<table> |
| BigQuery table to load data into (templated). If <project> is not included, |
| project will be the project defined in the connection json.</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – <p>If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</a></p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema_fields</span><span class="o">=</span><span class="p">[{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"emp_name"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"STRING"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"REQUIRED"</span><span class="p">},</span> |
| <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="s2">"salary"</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="s2">"INTEGER"</span><span class="p">,</span> <span class="s2">"mode"</span><span class="p">:</span> <span class="s2">"NULLABLE"</span><span class="p">}]</span> |
| </pre></div> |
| </div> |
| <p>Should not be set when source_format is ‘DATASTORE_BACKUP’.</p> |
| </li> |
| <li><strong>schema_object</strong> (<em>string</em>) – If set, a GCS object path pointing to a .json file that |
| contains the schema for the table. (templated)</li> |
| <li><strong>source_format</strong> (<em>string</em>) – File format of the data.</li> |
| <li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source. |
| Possible values include GZIP and NONE. |
| The default value is NONE. |
| This setting is ignored for Google Cloud Bigtable, |
| Google Cloud Datastore backups and Avro formats.</li> |
| <li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use for the CSV.</li> |
| <li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can |
| ignore when running the job.</li> |
| <li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li> |
| <li><strong>allow_quoted_newlines</strong> (<em>boolean</em>) – Whether to allow quoted newlines (true) or not (false).</li> |
| <li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns. |
| The missing values are treated as nulls. If false, records with missing trailing |
| columns are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result. Only applicable to CSV, ignored |
| for other formats.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:param labels a dictionary containing labels for the table, passed to BigQuery |
| :type labels: dict</p> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerycreateemptydatasetoperator"> |
| <span id="id57"></span><h5>BigQueryCreateEmptyDatasetOperator<a class="headerlink" href="#bigquerycreateemptydatasetoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyDatasetOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryCreateEmptyDatasetOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryCreateEmptyDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryCreateEmptyDatasetOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>” |
| This operator is used to create new dataset for your Project in Big query. |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – The name of the project where we want to create the dataset. |
| Don’t need to provide, if projectId in dataset_reference.</li> |
| <li><strong>dataset_id</strong> (<em>str</em>) – The id of dataset. Don’t need to provide, |
| if datasetId in dataset_reference.</li> |
| <li><strong>dataset_reference</strong> – Dataset reference that could be provided with request body. |
| More info: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</a></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerydeletedatasetoperator"> |
| <span id="id58"></span><h5>BigQueryDeleteDatasetOperator<a class="headerlink" href="#bigquerydeletedatasetoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryDeleteDatasetOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryDeleteDatasetOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryDeleteDatasetOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>” |
| This operator deletes an existing dataset from your Project in Big query. |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete">https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete</a> |
| :param project_id: The project id of the dataset. |
| :type project_id: string |
| :param dataset_id: The dataset to be deleted. |
| :type dataset_id: string</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">delete_temp_data</span> <span class="o">=</span> <span class="n">BigQueryDeleteDatasetOperator</span><span class="p">(</span><span class="n">dataset_id</span> <span class="o">=</span> <span class="s1">'temp-dataset'</span><span class="p">,</span> |
| <span class="n">project_id</span> <span class="o">=</span> <span class="s1">'temp-project'</span><span class="p">,</span> |
| <span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">'_my_gcp_conn_'</span><span class="p">,</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'Deletetemp'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerytabledeleteoperator"> |
| <span id="id59"></span><h5>BigQueryTableDeleteOperator<a class="headerlink" href="#bigquerytabledeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_table_delete_operator.</code><code class="descname">BigQueryTableDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_table_delete_operator.html#BigQueryTableDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_table_delete_operator.BigQueryTableDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Deletes BigQuery tables</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>deletion_dataset_table</strong> (<em>string</em>) – A dotted |
| (<project>.|<project>:)<dataset>.<table> that indicates which table |
| will be deleted. (templated)</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>ignore_if_missing</strong> (<em>boolean</em>) – if True, then return success even if the |
| requested table does not exist.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigqueryoperator"> |
| <span id="id60"></span><h5>BigQueryOperator<a class="headerlink" href="#bigqueryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_operator.BigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_operator.</code><code class="descname">BigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_operator.html#BigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_operator.BigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Executes BigQuery SQL queries in a specific BigQuery database</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'.</em>) – (Deprecated. Use <cite>sql</cite> parameter instead) the sql code to be |
| executed (templated)</li> |
| <li><strong>sql</strong> (<em>Can receive a str representing a sql statement</em><em>, |
| </em><em>a list of str</em><em> (</em><em>sql statements</em><em>)</em><em>, or </em><em>reference to a template file. |
| Template reference are recognized by str ending in '.sql'.</em>) – the sql code to be executed (templated)</li> |
| <li><strong>destination_dataset_table</strong> (<em>string</em>) – A dotted |
| (<project>.|<project>:)<dataset>.<table> that, if set, will store the results |
| of the query. (templated)</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – Specifies the action that occurs if the destination table |
| already exists. (default: ‘WRITE_EMPTY’)</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – Specifies whether the job is allowed to create new tables. |
| (default: ‘CREATE_IF_NEEDED’)</li> |
| <li><strong>allow_large_results</strong> (<em>boolean</em>) – Whether to allow large results.</li> |
| <li><strong>flatten_results</strong> (<em>boolean</em>) – If true and query uses legacy SQL dialect, flattens |
| all nested and repeated fields in the query results. <code class="docutils literal notranslate"><span class="pre">allow_large_results</span></code> |
| must be <code class="docutils literal notranslate"><span class="pre">true</span></code> if this is set to <code class="docutils literal notranslate"><span class="pre">false</span></code>. For standard SQL queries, this |
| flag is ignored and results are never flattened.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>udf_config</strong> (<em>list</em>) – The User Defined Function configuration for the query. |
| See <a class="reference external" href="https://cloud.google.com/bigquery/user-defined-functions">https://cloud.google.com/bigquery/user-defined-functions</a> for details.</li> |
| <li><strong>use_legacy_sql</strong> (<em>boolean</em>) – Whether to use legacy SQL (true) or standard SQL (false).</li> |
| <li><strong>maximum_billing_tier</strong> (<em>integer</em>) – Positive integer that serves as a multiplier |
| of the basic price. |
| Defaults to None, in which case it uses the value set in the project.</li> |
| <li><strong>maximum_bytes_billed</strong> (<em>float</em>) – Limits the bytes billed for this job. |
| Queries that will have bytes billed beyond this limit will fail |
| (without incurring a charge). If unspecified, this will be |
| set to your project default.</li> |
| <li><strong>api_resource_configs</strong> (<em>dict</em>) – a dictionary that contain params |
| ‘configuration’ applied for Google BigQuery Jobs API: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs">https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs</a> |
| for example, {‘query’: {‘useQueryCache’: False}}. You could use it |
| if you need to provide some params that are not supported by BigQueryOperator |
| like args.</li> |
| <li><strong>schema_update_options</strong> (<em>tuple</em>) – Allows the schema of the destination |
| table to be updated as a side effect of the load job.</li> |
| <li><strong>query_params</strong> (<em>dict</em>) – a dictionary containing query parameter types and |
| values, passed to BigQuery.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| <li><strong>priority</strong> (<em>string</em>) – Specifies a priority for the query. |
| Possible values include INTERACTIVE and BATCH. |
| The default value is INTERACTIVE.</li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications.</li> |
| <li><strong>cluster_fields</strong> (<em>list of str</em>) – Request that the result of this query be stored sorted |
| by one or more columns. This is only available in conjunction with |
| time_partitioning. The order of columns given determines the sort order.</li> |
| <li><strong>location</strong> (<em>str</em>) – The geographic location of the job. Required except for |
| US and EU. See details at |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/locations#specifying_your_location">https://cloud.google.com/bigquery/docs/locations#specifying_your_location</a></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerytobigqueryoperator"> |
| <span id="id61"></span><h5>BigQueryToBigQueryOperator<a class="headerlink" href="#bigquerytobigqueryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_bigquery.</code><code class="descname">BigQueryToBigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_bigquery.html#BigQueryToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_bigquery.BigQueryToBigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies data from one BigQuery table to another.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about these parameters: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_project_dataset_tables</strong> (<em>list|string</em>) – One or more |
| dotted (project:<a href="#id62"><span class="problematic" id="id63">|</span></a>project.)<dataset>.<table> BigQuery tables to use as the |
| source data. If <project> is not included, project will be the |
| project defined in the connection json. Use a list if there are multiple |
| source tables. (templated)</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The destination BigQuery |
| table. Format is: (project:<a href="#id64"><span class="problematic" id="id65">|</span></a>project.)<dataset>.<table> (templated)</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="bigquerytocloudstorageoperator"> |
| <span id="id66"></span><h5>BigQueryToCloudStorageOperator<a class="headerlink" href="#bigquerytocloudstorageoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.bigquery_to_gcs.</code><code class="descname">BigQueryToCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/bigquery_to_gcs.html#BigQueryToCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.bigquery_to_gcs.BigQueryToCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Transfers a BigQuery table to a Google Cloud Storage bucket.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more details about these parameters: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">https://cloud.google.com/bigquery/docs/reference/v2/jobs</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_project_dataset_table</strong> (<em>string</em>) – The dotted |
| <code class="docutils literal notranslate"><span class="pre">(<project>.|<project>:)<dataset>.<table></span></code> BigQuery table to use as the |
| source data. If <project> is not included, project will be the project |
| defined in the connection json. (templated)</li> |
| <li><strong>destination_cloud_storage_uris</strong> (<em>list</em>) – The destination Google Cloud |
| Storage URI (e.g. gs://some-bucket/some-file.txt). (templated) Follows |
| convention defined here: |
| https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple</li> |
| <li><strong>compression</strong> (<em>string</em>) – Type of compression to use.</li> |
| <li><strong>export_format</strong> (<em>string</em>) – File format to export.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when extracting to a CSV.</li> |
| <li><strong>print_header</strong> (<em>boolean</em>) – Whether to print a header for a CSV file extract.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – reference to a specific BigQuery hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – a dictionary containing labels for the job/query, |
| passed to BigQuery</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="bigqueryhook"> |
| <h4>BigQueryHook<a class="headerlink" href="#bigqueryhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.bigquery_hook.</code><code class="descname">BigQueryHook</code><span class="sig-paren">(</span><em>bigquery_conn_id='bigquery_default'</em>, <em>delegate_to=None</em>, <em>use_legacy_sql=True</em>, <em>location=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a>, <a class="reference internal" href="code.html#airflow.hooks.dbapi_hook.DbApiHook" title="airflow.hooks.dbapi_hook.DbApiHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.dbapi_hook.DbApiHook</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Interact with BigQuery. This hook uses the Google Cloud Platform |
| connection.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a BigQuery PEP 249 connection object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df"> |
| <code class="descname">get_pandas_df</code><span class="sig-paren">(</span><em>sql</em>, <em>parameters=None</em>, <em>dialect=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_pandas_df"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Pandas DataFrame for the results produced by a BigQuery |
| query. The DbApiHook method must be overridden because Pandas |
| doesn’t support PEP 249 connections, except for SQLite. See:</p> |
| <p><a class="reference external" href="https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447">https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447</a> |
| <a class="reference external" href="https://github.com/pydata/pandas/issues/6900">https://github.com/pydata/pandas/issues/6900</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>string</em>) – The BigQuery SQL to execute.</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – The parameters to render the SQL query with (not |
| used, leave to override superclass method)</li> |
| <li><strong>dialect</strong> (<em>string in {'legacy'</em><em>, </em><em>'standard'}</em>) – Dialect of BigQuery SQL – legacy SQL or standard SQL |
| defaults to use <cite>self.use_legacy_sql</cite> if not specified</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service"> |
| <code class="descname">get_service</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.get_service"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a BigQuery service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows"> |
| <code class="descname">insert_rows</code><span class="sig-paren">(</span><em>table</em>, <em>rows</em>, <em>target_fields=None</em>, <em>commit_every=1000</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.insert_rows"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Insertion is currently unsupported. Theoretically, you could use |
| BigQuery’s streaming API to insert rows into a table, but this hasn’t |
| been implemented.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists"> |
| <code class="descname">table_exists</code><span class="sig-paren">(</span><em>project_id</em>, <em>dataset_id</em>, <em>table_id</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/bigquery_hook.html#BigQueryHook.table_exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks for the existence of a table in Google BigQuery.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google cloud project in which to look for the |
| table. The connection supplied to the hook must provide access to |
| the specified project.</li> |
| <li><strong>dataset_id</strong> (<em>string</em>) – The name of the dataset in which to look for the |
| table.</li> |
| <li><strong>table_id</strong> (<em>string</em>) – The name of the table to check the existence of.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-spanner"> |
| <h3>Cloud Spanner<a class="headerlink" href="#cloud-spanner" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-spanner-operators"> |
| <h4>Cloud Spanner Operators<a class="headerlink" href="#cloud-spanner-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#cloudspannerinstancedatabasedeleteoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseDeleteOperator</span></a> : deletes an existing database from |
| a Google Cloud Spanner instance or returns success if the database is missing.</li> |
| <li><a class="reference internal" href="#cloudspannerinstancedatabasedeployoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseDeployOperator</span></a> : creates a new database in a Google |
| Cloud instance or returns success if the database already exists.</li> |
| <li><a class="reference internal" href="#cloudspannerinstancedatabaseupdateoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseUpdateOperator</span></a> : updates the structure of a |
| Google Cloud Spanner database.</li> |
| <li><a class="reference internal" href="#cloudspannerinstancedatabasequeryoperator"><span class="std std-ref">CloudSpannerInstanceDatabaseQueryOperator</span></a> : executes an arbitrary DML query |
| (INSERT, UPDATE, DELETE).</li> |
| <li><a class="reference internal" href="#cloudspannerinstancedeployoperator"><span class="std std-ref">CloudSpannerInstanceDeployOperator</span></a> : creates a new Google Cloud Spanner instance, |
| or if an instance with the same name exists, updates the instance.</li> |
| <li><a class="reference internal" href="#cloudspannerinstancedeleteoperator"><span class="std std-ref">CloudSpannerInstanceDeleteOperator</span></a> : deletes a Google Cloud Spanner instance.</li> |
| </ul> |
| <div class="section" id="cloudspannerinstancedatabasedeleteoperator"> |
| <span id="id67"></span><h5>CloudSpannerInstanceDatabaseDeleteOperator<a class="headerlink" href="#cloudspannerinstancedatabasedeleteoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="cloudspannerinstancedatabasedeployoperator"> |
| <span id="id68"></span><h5>CloudSpannerInstanceDatabaseDeployOperator<a class="headerlink" href="#cloudspannerinstancedatabasedeployoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="cloudspannerinstancedatabaseupdateoperator"> |
| <span id="id69"></span><h5>CloudSpannerInstanceDatabaseUpdateOperator<a class="headerlink" href="#cloudspannerinstancedatabaseupdateoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="cloudspannerinstancedatabasequeryoperator"> |
| <span id="id70"></span><h5>CloudSpannerInstanceDatabaseQueryOperator<a class="headerlink" href="#cloudspannerinstancedatabasequeryoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="cloudspannerinstancedeployoperator"> |
| <span id="id71"></span><h5>CloudSpannerInstanceDeployOperator<a class="headerlink" href="#cloudspannerinstancedeployoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="cloudspannerinstancedeleteoperator"> |
| <span id="id72"></span><h5>CloudSpannerInstanceDeleteOperator<a class="headerlink" href="#cloudspannerinstancedeleteoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| </div> |
| <div class="section" id="cloudspannerhook"> |
| <h4>CloudSpannerHook<a class="headerlink" href="#cloudspannerhook" title="Permalink to this headline">¶</a></h4> |
| </div> |
| </div> |
| <div class="section" id="cloud-sql"> |
| <h3>Cloud SQL<a class="headerlink" href="#cloud-sql" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-sql-operators"> |
| <h4>Cloud SQL Operators<a class="headerlink" href="#cloud-sql-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#cloudsqlinstancedatabasedeleteoperator"><span class="std std-ref">CloudSqlInstanceDatabaseDeleteOperator</span></a> : deletes a database from a Cloud SQL |
| instance.</li> |
| <li><a class="reference internal" href="#cloudsqlinstancedatabasecreateoperator"><span class="std std-ref">CloudSqlInstanceDatabaseCreateOperator</span></a> : creates a new database inside a Cloud |
| SQL instance.</li> |
| <li><a class="reference internal" href="#cloudsqlinstancedatabasepatchoperator"><span class="std std-ref">CloudSqlInstanceDatabasePatchOperator</span></a> : updates a database inside a Cloud |
| SQL instance.</li> |
| <li><a class="reference internal" href="#cloudsqlinstancedeleteoperator"><span class="std std-ref">CloudSqlInstanceDeleteOperator</span></a> : delete a Cloud SQL instance.</li> |
| <li><a class="reference internal" href="#cloudsqlinstanceexportoperator"><span class="std std-ref">CloudSqlInstanceExportOperator</span></a> : exports data from a Cloud SQL instance.</li> |
| <li><a class="reference internal" href="#cloudsqlinstanceimportoperator"><span class="std std-ref">CloudSqlInstanceImportOperator</span></a> : imports data into a Cloud SQL instance.</li> |
| <li><a class="reference internal" href="#cloudsqlinstancecreateoperator"><span class="std std-ref">CloudSqlInstanceCreateOperator</span></a> : create a new Cloud SQL instance.</li> |
| <li><a class="reference internal" href="#cloudsqlinstancepatchoperator"><span class="std std-ref">CloudSqlInstancePatchOperator</span></a> : patch a Cloud SQL instance.</li> |
| <li><a class="reference internal" href="#cloudsqlqueryoperator"><span class="std std-ref">CloudSqlQueryOperator</span></a> : run query in a Cloud SQL instance.</li> |
| </ul> |
| <div class="section" id="cloudsqlinstancedatabasedeleteoperator"> |
| <span id="id73"></span><h5>CloudSqlInstanceDatabaseDeleteOperator<a class="headerlink" href="#cloudsqlinstancedatabasedeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Deletes a database from a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancedatabasecreateoperator"> |
| <span id="id74"></span><h5>CloudSqlInstanceDatabaseCreateOperator<a class="headerlink" href="#cloudsqlinstancedatabasecreateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabaseCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabaseCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabaseCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Creates a new database inside a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancedatabasepatchoperator"> |
| <span id="id75"></span><h5>CloudSqlInstanceDatabasePatchOperator<a class="headerlink" href="#cloudsqlinstancedatabasepatchoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDatabasePatchOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDatabasePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDatabasePatchOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Updates a resource containing information about a database inside a Cloud SQL |
| instance using patch semantics. |
| See: <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancedeleteoperator"> |
| <span id="id76"></span><h5>CloudSqlInstanceDeleteOperator<a class="headerlink" href="#cloudsqlinstancedeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Deletes a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstanceexportoperator"> |
| <span id="id77"></span><h5>CloudSqlInstanceExportOperator<a class="headerlink" href="#cloudsqlinstanceexportoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceExportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceExportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceExportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceExportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump |
| or CSV file.</p> |
| <p>Note: This operator is idempotent. If executed multiple times with the same |
| export file URI, the export file in GCS will simply be overridden.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstanceimportoperator"> |
| <span id="id78"></span><h5>CloudSqlInstanceImportOperator<a class="headerlink" href="#cloudsqlinstanceimportoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceImportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceImportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceImportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceImportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Imports data into a Cloud SQL instance from a SQL dump or CSV file in Cloud Storage.</p> |
| <p>CSV IMPORT:</p> |
| <p>This operator is NOT idempotent for a CSV import. If the same file is imported |
| multiple times, the imported data will be duplicated in the database. |
| Moreover, if there are any unique constraints the duplicate import may result in an |
| error.</p> |
| <p>SQL IMPORT:</p> |
| <p>This operator is idempotent for a SQL import if it was also exported by Cloud SQL. |
| The exported SQL contains ‘DROP TABLE IF EXISTS’ statements for all tables |
| to be imported.</p> |
| <p>If the import file was generated in a different way, idempotence is not guaranteed. |
| It has to be ensured on the SQL file level.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Whether the body should be validated. Defaults to True.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancecreateoperator"> |
| <span id="id79"></span><h5>CloudSqlInstanceCreateOperator<a class="headerlink" href="#cloudsqlinstancecreateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstanceCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstanceCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstanceCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Creates a new Cloud SQL instance. |
| If an instance with the same name exists, no action will be taken and |
| the operator will succeed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert</a> |
| #request-body</li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – True if body should be validated, False otherwise.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlinstancepatchoperator"> |
| <span id="id80"></span><h5>CloudSqlInstancePatchOperator<a class="headerlink" href="#cloudsqlinstancepatchoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlInstancePatchOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlInstancePatchOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlInstancePatchOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_sql_operator.CloudSqlBaseOperator</span></code></p> |
| <p>Updates settings of a Cloud SQL instance.</p> |
| <p>Caution: This is a partial update, so only included values for the settings will be |
| updated.</p> |
| <p>In the request body, supply the relevant portions of an instance resource, according |
| to the rules of patch semantics. |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a></li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (e.g. v1beta4).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="cloudsqlqueryoperator"> |
| <span id="id81"></span><h5>CloudSqlQueryOperator<a class="headerlink" href="#cloudsqlqueryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_sql_operator.CloudSqlQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_sql_operator.</code><code class="descname">CloudSqlQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_sql_operator.html#CloudSqlQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_sql_operator.CloudSqlQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses |
| cloud-sql-proxy to establish secure connection with the database.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>sql</strong> (<em>str</em><em> or </em><em>[</em><em>str</em><em>]</em>) – SQL query or list of queries to run (should be DML or DDL query - |
| this operator does not return any data from the database, |
| so it is useless to pass it DQL queries. Note that it is responsibility of the |
| author of the queries to make sure that the queries are idempotent. For example |
| you can use CREATE TABLE IF NOT EXISTS to create a table.</li> |
| <li><strong>parameters</strong> (<em>mapping</em><em> or </em><em>iterable</em>) – (optional) the parameters to render the SQL query with.</li> |
| <li><strong>autocommit</strong> (<em>bool</em>) – if True, each command is automatically committed. |
| (default value: False)</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud Platform for |
| cloud-sql-proxy authentication.</li> |
| <li><strong>gcp_cloudsql_conn_id</strong> (<em>str</em>) – The connection ID used to connect to Google Cloud SQL |
| its schema should be gcpcloudsql://. |
| See <code class="xref py py-class docutils literal notranslate"><span class="pre">CloudSqlDatabaseHook</span></code> for |
| details on how to define gcpcloudsql:// connection.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-sql-hooks"> |
| <h4>Cloud SQL Hooks<a class="headerlink" href="#cloud-sql-hooks" title="Permalink to this headline">¶</a></h4> |
| <span class="target" id="cloudsqlhook"></span><dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for Google Cloud SQL APIs.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database"> |
| <code class="descname">create_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new database inside a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a>.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance"> |
| <code class="descname">create_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.create_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.create_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL insert API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body</a>.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database"> |
| <code class="descname">delete_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a database from a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be deleted in the instance.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance"> |
| <code class="descname">delete_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.delete_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.delete_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.export_instance"> |
| <code class="descname">export_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.export_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.export_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump |
| or CSV file.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID of the Cloud SQL instance. This does not include the |
| project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves connection to Cloud SQL.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud SQL services object.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database"> |
| <code class="descname">get_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves a database resource from a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database in the instance.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL database resource, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource</a>.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance"> |
| <code class="descname">get_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.get_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.get_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves a resource containing information about a Cloud SQL instance.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A Cloud SQL instance resource.</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.import_instance"> |
| <code class="descname">import_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.import_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.import_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Imports data into a Cloud SQL instance from a SQL dump or CSV file in |
| Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the |
| project ID.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database"> |
| <code class="descname">patch_database</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_database"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_database" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Updates a database resource inside a Cloud SQL instance.</p> |
| <p>This method supports patch semantics. |
| See <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch">https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch</a>.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>instance</strong> (<em>str</em>) – Database instance ID. This does not include the project ID.</li> |
| <li><strong>database</strong> (<em>str</em>) – Name of the database to be updated in the instance.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The request body, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body</a>.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance"> |
| <code class="descname">patch_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlHook.patch_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook.patch_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Updates settings of a Cloud SQL instance.</p> |
| <p>Caution: This is not a partial update, so you must include values for |
| all the settings that you want to retain.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Cloud SQL patch API, as described in |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body">https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body</a>.</li> |
| <li><strong>instance</strong> (<em>str</em>) – Cloud SQL instance ID. This does not include the project ID.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Project ID of the project that contains the instance. If set |
| to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <span class="target" id="cloudsqldatabasehook"></span><dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlDatabaseHook</code><span class="sig-paren">(</span><em>gcp_cloudsql_conn_id='google_cloud_sql_default'</em>, <em>default_gcp_project_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.hooks.base_hook.BaseHook</span></code></p> |
| <p>Serves DB connection configuration for Google Cloud SQL (Connections |
| of <em>gcpcloudsql://</em> type).</p> |
| <p>The hook is a “meta” one. It does not perform an actual connection. |
| It is there to retrieve all the parameters configured in gcpcloudsql:// connection, |
| start/stop Cloud SQL Proxy if needed, dynamically generate Postgres or MySQL |
| connection in the database and return an actual Postgres or MySQL hook. |
| The returned Postgres/MySQL hooks are using direct connection or Cloud SQL |
| Proxy socket/TCP as configured.</p> |
| <p>Main parameters of the hook are retrieved from the standard URI components:</p> |
| <ul class="simple"> |
| <li><strong>user</strong> - User name to authenticate to the database (from login of the URI).</li> |
| <li><strong>password</strong> - Password to authenticate to the database (from password of the URI).</li> |
| <li><strong>public_ip</strong> - IP to connect to for public connection (from host of the URI).</li> |
| <li><strong>public_port</strong> - Port to connect to for public connection (from port of the URI).</li> |
| <li><strong>database</strong> - Database to connect to (from schema of the URI).</li> |
| </ul> |
| <p>Remaining parameters are retrieved from the extras (URI query parameters):</p> |
| <ul class="simple"> |
| <li><dl class="first docutils"> |
| <dt><strong>project_id</strong> - Optional, Google Cloud Platform project where the Cloud SQL</dt> |
| <dd>instance exists. If missing, default project id passed is used.</dd> |
| </dl> |
| </li> |
| <li><strong>instance</strong> - Name of the instance of the Cloud SQL database instance.</li> |
| <li><strong>location</strong> - The location of the Cloud SQL instance (for example europe-west1).</li> |
| <li><strong>database_type</strong> - The type of the database instance (MySQL or Postgres).</li> |
| <li><strong>use_proxy</strong> - (default False) Whether SQL proxy should be used to connect to Cloud |
| SQL DB.</li> |
| <li><strong>use_ssl</strong> - (default False) Whether SSL should be used to connect to Cloud SQL DB. |
| You cannot use proxy and SSL together.</li> |
| <li><strong>sql_proxy_use_tcp</strong> - (default False) If set to true, TCP is used to connect via |
| proxy, otherwise UNIX sockets are used.</li> |
| <li><strong>sql_proxy_binary_path</strong> - Optional path to Cloud SQL Proxy binary. If the binary |
| is not specified or the binary is not present, it is automatically downloaded.</li> |
| <li><strong>sql_proxy_version</strong> - Specific version of the proxy to download (for example |
| v1.13). If not specified, the latest version is downloaded.</li> |
| <li><strong>sslcert</strong> - Path to client certificate to authenticate when SSL is used.</li> |
| <li><strong>sslkey</strong> - Path to client private key to authenticate when SSL is used.</li> |
| <li><strong>sslrootcert</strong> - Path to server’s certificate to authenticate when SSL is used.</li> |
| </ul> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>gcp_cloudsql_conn_id</strong> (<em>str</em>) – URL of the connection</li> |
| <li><strong>default_gcp_project_id</strong> (<em>str</em>) – Default project id used if project_id not specified |
| in the connection URL</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.cleanup_database_hook"> |
| <code class="descname">cleanup_database_hook</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.cleanup_database_hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.cleanup_database_hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clean up database hook after it was used.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.create_connection"> |
| <code class="descname">create_connection</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.create_connection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.create_connection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create connection in the Connection table, according to whether it uses |
| proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – Session of the SQL Alchemy ORM (automatically generated with |
| decorator).</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.delete_connection"> |
| <code class="descname">delete_connection</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.delete_connection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.delete_connection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete the dynamically created connection from the Connection table.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – Session of the SQL Alchemy ORM (automatically generated with |
| decorator).</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.free_reserved_port"> |
| <code class="descname">free_reserved_port</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.free_reserved_port"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.free_reserved_port" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Free TCP port. Makes it immediately ready to be used by Cloud SQL Proxy.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_database_hook"> |
| <code class="descname">get_database_hook</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.get_database_hook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_database_hook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieve database hook. This is the actual Postgres or MySQL database hook |
| that uses proxy or connects directly to the Google Cloud SQL database.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_sqlproxy_runner"> |
| <code class="descname">get_sqlproxy_runner</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.get_sqlproxy_runner"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_sqlproxy_runner" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieve Cloud SQL Proxy runner. It is used to manage the proxy |
| lifecycle per task.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The Cloud SQL Proxy runner.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner" title="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner">CloudSqlProxyRunner</a></td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.reserve_free_tcp_port"> |
| <code class="descname">reserve_free_tcp_port</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.reserve_free_tcp_port"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.reserve_free_tcp_port" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reserve free TCP port to be used by Cloud SQL Proxy</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.retrieve_connection"> |
| <code class="descname">retrieve_connection</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlDatabaseHook.retrieve_connection"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.retrieve_connection" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves the dynamically created connection from the Connection table.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>session</strong> – Session of the SQL Alchemy ORM (automatically generated with |
| decorator).</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <span class="target" id="cloudsqlproxyrunner"></span><dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_sql_hook.</code><code class="descname">CloudSqlProxyRunner</code><span class="sig-paren">(</span><em>path_prefix</em>, <em>instance_specification</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>project_id=None</em>, <em>sql_proxy_version=None</em>, <em>sql_proxy_binary_path=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.utils.log.logging_mixin.LoggingMixin</span></code></p> |
| <p>Downloads and runs cloud-sql-proxy as subprocess of the Python process.</p> |
| <p>The cloud-sql-proxy needs to be downloaded and started before we can connect |
| to the Google Cloud SQL instance via database connection. It establishes |
| secure tunnel connection to the database. It authorizes using the |
| GCP credentials that are passed by the configuration.</p> |
| <p>More details about the proxy can be found here: |
| <a class="reference external" href="https://cloud.google.com/sql/docs/mysql/sql-proxy">https://cloud.google.com/sql/docs/mysql/sql-proxy</a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_proxy_version"> |
| <code class="descname">get_proxy_version</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.get_proxy_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_proxy_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns version of the Cloud SQL Proxy.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_socket_path"> |
| <code class="descname">get_socket_path</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.get_socket_path"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.get_socket_path" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves UNIX socket path used by Cloud SQL Proxy.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The dynamically generated path for the socket created by the proxy.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">str</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.start_proxy"> |
| <code class="descname">start_proxy</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.start_proxy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.start_proxy" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Starts Cloud SQL Proxy.</p> |
| <p>You have to remember to stop the proxy if you started it!</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.stop_proxy"> |
| <code class="descname">stop_proxy</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_sql_hook.html#CloudSqlProxyRunner.stop_proxy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_sql_hook.CloudSqlProxyRunner.stop_proxy" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Stops running proxy.</p> |
| <p>You should stop the proxy after you stop using it.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-bigtable"> |
| <h3>Cloud Bigtable<a class="headerlink" href="#cloud-bigtable" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-bigtable-operators"> |
| <h4>Cloud Bigtable Operators<a class="headerlink" href="#cloud-bigtable-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#bigtableinstancecreateoperator"><span class="std std-ref">BigtableInstanceCreateOperator</span></a> : creates a Cloud Bigtable instance.</li> |
| <li><a class="reference internal" href="#bigtableinstancedeleteoperator"><span class="std std-ref">BigtableInstanceDeleteOperator</span></a> : deletes a Google Cloud Bigtable instance.</li> |
| <li><a class="reference internal" href="#bigtableclusterupdateoperator"><span class="std std-ref">BigtableClusterUpdateOperator</span></a> : updates the number of nodes in a Google Cloud Bigtable cluster.</li> |
| <li><a class="reference internal" href="#bigtabletablecreateoperator"><span class="std std-ref">BigtableTableCreateOperator</span></a> : creates a table in a Google Cloud Bigtable instance.</li> |
| <li><a class="reference internal" href="#bigtabletabledeleteoperator"><span class="std std-ref">BigtableTableDeleteOperator</span></a> : deletes a table in a Google Cloud Bigtable instance.</li> |
| <li><a class="reference internal" href="#bigtabletablewaitforreplicationsensor"><span class="std std-ref">BigtableTableWaitForReplicationSensor</span></a> : (sensor) waits for a table to be fully replicated.</li> |
| </ul> |
| <div class="section" id="bigtableinstancecreateoperator"> |
| <span id="id82"></span><h5>BigtableInstanceCreateOperator<a class="headerlink" href="#bigtableinstancecreateoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="bigtableinstancedeleteoperator"> |
| <span id="id83"></span><h5>BigtableInstanceDeleteOperator<a class="headerlink" href="#bigtableinstancedeleteoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="bigtableclusterupdateoperator"> |
| <span id="id84"></span><h5>BigtableClusterUpdateOperator<a class="headerlink" href="#bigtableclusterupdateoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="bigtabletablecreateoperator"> |
| <span id="id85"></span><h5>BigtableTableCreateOperator<a class="headerlink" href="#bigtabletablecreateoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="bigtabletabledeleteoperator"> |
| <span id="id86"></span><h5>BigtableTableDeleteOperator<a class="headerlink" href="#bigtabletabledeleteoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="bigtabletablewaitforreplicationsensor"> |
| <span id="id87"></span><h5>BigtableTableWaitForReplicationSensor<a class="headerlink" href="#bigtabletablewaitforreplicationsensor" title="Permalink to this headline">¶</a></h5> |
| </div> |
| </div> |
| <div class="section" id="cloud-bigtable-hook"> |
| <span id="bigtablehook"></span><h4>Cloud Bigtable Hook<a class="headerlink" href="#cloud-bigtable-hook" title="Permalink to this headline">¶</a></h4> |
| </div> |
| </div> |
| <div class="section" id="compute-engine"> |
| <h3>Compute Engine<a class="headerlink" href="#compute-engine" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="compute-engine-operators"> |
| <h4>Compute Engine Operators<a class="headerlink" href="#compute-engine-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#gceinstancestartoperator"><span class="std std-ref">GceInstanceStartOperator</span></a> : start an existing Google Compute Engine instance.</li> |
| <li><a class="reference internal" href="#gceinstancestopoperator"><span class="std std-ref">GceInstanceStopOperator</span></a> : stop an existing Google Compute Engine instance.</li> |
| <li><a class="reference internal" href="#gcesetmachinetypeoperator"><span class="std std-ref">GceSetMachineTypeOperator</span></a> : change the machine type for a stopped instance.</li> |
| <li><a class="reference internal" href="#gceinstancetemplatecopyoperator"><span class="std std-ref">GceInstanceTemplateCopyOperator</span></a> : copy the Instance Template, applying |
| specified changes.</li> |
| <li><a class="reference internal" href="#gceinstancegroupmanagerupdatetemplateoperator"><span class="std std-ref">GceInstanceGroupManagerUpdateTemplateOperator</span></a> : patch the Instance Group Manager, |
| replacing source Instance Template URL with the destination one.</li> |
| </ul> |
| <p>The operators have the common base operator:</p> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceBaseOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceBaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Abstract base operator for Google Compute Engine operators to inherit from.</p> |
| </dd></dl> |
| |
| <p>They also use <a class="reference internal" href="#gcehook"><span class="std std-ref">Compute Engine Hook</span></a> to communicate with Google Cloud Platform.</p> |
| <div class="section" id="gceinstancestartoperator"> |
| <span id="id88"></span><h5>GceInstanceStartOperator<a class="headerlink" href="#gceinstancestartoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStartOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStartOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStartOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p> |
| <p>Starts an instance in Google Compute Engine.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute |
| Engine Instance exists. If set to None or missing, the default project_id from the GCP connection is |
| used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud |
| Platform. Defaults to ‘google_cloud_default’.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults |
| to v1.</li> |
| <li><strong>validate_body</strong> – Optional, If set to False, body validation is not performed. |
| Defaults to False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gceinstancestopoperator"> |
| <span id="id89"></span><h5>GceInstanceStopOperator<a class="headerlink" href="#gceinstancestopoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceStopOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceStopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceStopOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p> |
| <p>Stops an instance in Google Compute Engine.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute |
| Engine Instance exists. If set to None or missing, the default project_id from the GCP connection is |
| used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud |
| Platform. Defaults to ‘google_cloud_default’.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults |
| to v1.</li> |
| <li><strong>validate_body</strong> – Optional, If set to False, body validation is not performed. |
| Defaults to False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gcesetmachinetypeoperator"> |
| <span id="id90"></span><h5>GceSetMachineTypeOperator<a class="headerlink" href="#gcesetmachinetypeoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceSetMachineTypeOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceSetMachineTypeOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceSetMachineTypeOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p> |
| <dl class="docutils"> |
| <dt>Changes the machine type for a stopped instance to the machine type specified in</dt> |
| <dd>the request.</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource.</li> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Compute Engine setMachineType API, as described in |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body">https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute |
| Engine Instance exists. If set to None or missing, the default project_id from the GCP connection |
| is used.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud |
| Platform. Defaults to ‘google_cloud_default’.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults |
| to v1.</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Optional, If set to False, body validation is not performed. |
| Defaults to False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gceinstancetemplatecopyoperator"> |
| <span id="id91"></span><h5>GceInstanceTemplateCopyOperator<a class="headerlink" href="#gceinstancetemplatecopyoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceTemplateCopyOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceTemplateCopyOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceTemplateCopyOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceTemplateCopyOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p> |
| <p>Copies the instance template, applying specified changes.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Template</li> |
| <li><strong>body_patch</strong> (<em>dict</em>) – Patch to the body of instanceTemplates object following rfc7386 |
| PATCH semantics. The body_patch content follows |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates">https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates</a> |
| Name field is required as we need to rename the template, |
| all the other fields are optional. It is important to follow PATCH semantics |
| - arrays are replaced fully, so if you need to update an array you should |
| provide the whole target array as patch element.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute |
| Engine Instance exists. If set to None or missing, the default project_id from the GCP connection |
| is used.</li> |
| <li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve |
| full idempotence (for example when client call times out repeating the request |
| with the same request id will not create a new instance template again). |
| It should be in UUID format as defined in RFC 4122.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud |
| Platform. Defaults to ‘google_cloud_default’.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults |
| to v1.</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Optional, If set to False, body validation is not performed. |
| Defaults to False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gceinstancegroupmanagerupdatetemplateoperator"> |
| <span id="id92"></span><h5>GceInstanceGroupManagerUpdateTemplateOperator<a class="headerlink" href="#gceinstancegroupmanagerupdatetemplateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_compute_operator.GceInstanceGroupManagerUpdateTemplateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_compute_operator.</code><code class="descname">GceInstanceGroupManagerUpdateTemplateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_compute_operator.html#GceInstanceGroupManagerUpdateTemplateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_compute_operator.GceInstanceGroupManagerUpdateTemplateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.gcp_compute_operator.GceBaseOperator" title="airflow.contrib.operators.gcp_compute_operator.GceBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.gcp_compute_operator.GceBaseOperator</span></code></a></p> |
| <p>Patches the Instance Group Manager, replacing source template URL with the |
| destination one. API V1 does not have update/patch operations for Instance |
| Group Manager, so you must use beta or newer API version. Beta is the default.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Group Manager</li> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the Instance Group Manager exists.</li> |
| <li><strong>source_template</strong> (<em>str</em>) – URL of the template to replace.</li> |
| <li><strong>destination_template</strong> (<em>str</em>) – URL of the target template.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform Project ID where the Compute |
| Engine Instance exists. If set to None or missing, the default project_id from the GCP connection is |
| used.</li> |
| <li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve |
| full idempotence (for example when client call times out repeating the request |
| with the same request id will not create a new instance template again). |
| It should be in UUID format as defined in RFC 4122.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional, The connection ID used to connect to Google Cloud |
| Platform. Defaults to ‘google_cloud_default’.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – Optional, API version used (for example v1 - or beta). Defaults |
| to v1.</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – Optional, If set to False, body validation is not performed. |
| Defaults to False.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="compute-engine-hook"> |
| <span id="gcehook"></span><h4>Compute Engine Hook<a class="headerlink" href="#compute-engine-hook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_compute_hook.</code><code class="descname">GceHook</code><span class="sig-paren">(</span><em>api_version='v1'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for Google Compute Engine APIs.</p> |
| <p>All the methods in the hook where project_id is used must be called with |
| keyword arguments rather than positional.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves connection to Google Compute Engine.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Compute Engine services object</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_group_manager"> |
| <code class="descname">get_instance_group_manager</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.get_instance_group_manager"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_group_manager" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves Instance Group Manager by project_id, zone and resource_id. |
| Must be called with keyword arguments rather than positional.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the Instance Group Manager exists</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Group Manager</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the |
| Compute Engine Instance exists. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Instance group manager representation as object according to |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/beta/instanceGroupManagers">https://cloud.google.com/compute/docs/reference/rest/beta/instanceGroupManagers</a></p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_template"> |
| <code class="descname">get_instance_template</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.get_instance_template"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.get_instance_template" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves instance template by project_id and resource_id. |
| Must be called with keyword arguments rather than positional.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the instance template</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the |
| Compute Engine Instance exists. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Instance template representation as object according to |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates">https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates</a></p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.insert_instance_template"> |
| <code class="descname">insert_instance_template</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.insert_instance_template"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.insert_instance_template" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Inserts instance template using body specified |
| Must be called with keyword arguments rather than positional.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>body</strong> (<em>dict</em>) – Instance template representation as object according to |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates">https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates</a></li> |
| <li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve |
| full idempotence (for example when client call times out repeating the request |
| with the same request id will not create a new instance template again) |
| It should be in UUID format as defined in RFC 4122</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the |
| Compute Engine Instance exists. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.patch_instance_group_manager"> |
| <code class="descname">patch_instance_group_manager</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.patch_instance_group_manager"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.patch_instance_group_manager" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Patches Instance Group Manager with the specified body. |
| Must be called with keyword arguments rather than positional.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the Instance Group Manager exists</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Instance Group Manager</li> |
| <li><strong>body</strong> (<em>dict</em>) – Instance Group Manager representation as json-merge-patch object |
| according to |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/beta/instanceTemplates/patch">https://cloud.google.com/compute/docs/reference/rest/beta/instanceTemplates/patch</a></li> |
| <li><strong>request_id</strong> (<em>str</em>) – Optional, unique request_id that you might add to achieve |
| full idempotence (for example when client call times out repeating the request |
| with the same request id will not create a new instance template again). |
| It should be in UUID format as defined in RFC 4122</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the |
| Compute Engine Instance exists. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>:return None</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.set_machine_type"> |
| <code class="descname">set_machine_type</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.set_machine_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.set_machine_type" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets machine type of an instance defined by project_id, zone and resource_id. |
| Must be called with keyword arguments rather than positional.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists.</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource</li> |
| <li><strong>body</strong> (<em>dict</em>) – Body required by the Compute Engine setMachineType API, |
| as described in |
| <a class="reference external" href="https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType">https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType</a></li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the |
| Compute Engine Instance exists. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.start_instance"> |
| <code class="descname">start_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.start_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.start_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Starts an existing instance defined by project_id, zone and resource_id. |
| Must be called with keyword arguments rather than positional.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the |
| Compute Engine Instance exists. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_compute_hook.GceHook.stop_instance"> |
| <code class="descname">stop_instance</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_compute_hook.html#GceHook.stop_instance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_compute_hook.GceHook.stop_instance" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Stops an instance defined by project_id, zone and resource_id |
| Must be called with keyword arguments rather than positional.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>zone</strong> (<em>str</em>) – Google Cloud Platform zone where the instance exists</li> |
| <li><strong>resource_id</strong> (<em>str</em>) – Name of the Compute Engine instance resource</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Platform project ID where the |
| Compute Engine Instance exists. If set to None or missing, |
| the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">members:</th><td class="field-body"></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="section" id="cloud-functions"> |
| <h3>Cloud Functions<a class="headerlink" href="#cloud-functions" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-functions-operators"> |
| <h4>Cloud Functions Operators<a class="headerlink" href="#cloud-functions-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#gcffunctiondeployoperator"><span class="std std-ref">GcfFunctionDeployOperator</span></a> : deploy Google Cloud Function to Google Cloud Platform</li> |
| <li><a class="reference internal" href="#gcffunctiondeleteoperator"><span class="std std-ref">GcfFunctionDeleteOperator</span></a> : delete Google Cloud Function in Google Cloud Platform</li> |
| </ul> |
| <p>They also use <a class="reference internal" href="#gcfhook"><span class="std std-ref">Cloud Functions Hook</span></a> to communicate with Google Cloud Platform.</p> |
| <div class="section" id="gcffunctiondeployoperator"> |
| <span id="id93"></span><h5>GcfFunctionDeployOperator<a class="headerlink" href="#gcffunctiondeployoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeployOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeployOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeployOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a function in Google Cloud Functions. |
| If a function with this name already exists, it will be updated.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>location</strong> (<em>str</em>) – Google Cloud Platform region where the function should be created.</li> |
| <li><strong>body</strong> (<em>dict</em><em> or </em><em>google.cloud.functions.v1.CloudFunction</em>) – Body of the Cloud Functions definition. The body must be a |
| Cloud Functions dictionary as described in: |
| <a class="reference external" href="https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions">https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions</a> |
| . Different API versions require different variants of the Cloud Functions |
| dictionary.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – (Optional) Google Cloud Platform project ID where the function |
| should be created.</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – (Optional) The connection ID used to connect to Google Cloud |
| Platform - default ‘google_cloud_default’.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – (Optional) API version used (for example v1 - default - or |
| v1beta1).</li> |
| <li><strong>zip_path</strong> (<em>str</em>) – Path to zip file containing source code of the function. If the path |
| is set, the sourceUploadUrl should not be specified in the body or it should |
| be empty. Then the zip file will be uploaded using the upload URL generated |
| via generateUploadUrl from the Cloud Functions API.</li> |
| <li><strong>validate_body</strong> (<em>bool</em>) – If set to False, body validation is not performed.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gcffunctiondeleteoperator"> |
| <span id="id94"></span><h5>GcfFunctionDeleteOperator<a class="headerlink" href="#gcffunctiondeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcp_function_operator.</code><code class="descname">GcfFunctionDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcp_function_operator.html#GcfFunctionDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcp_function_operator.GcfFunctionDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Deletes the specified function from Google Cloud Functions.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>name</strong> (<em>str</em>) – A fully-qualified function name, matching |
| the pattern: <cite>^projects/[^/]+/locations/[^/]+/functions/[^/]+$</cite></li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – The connection ID to use to connect to Google Cloud Platform.</li> |
| <li><strong>api_version</strong> (<em>str</em>) – API version used (for example v1 or v1beta1).</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-functions-hook"> |
| <span id="gcfhook"></span><h4>Cloud Functions Hook<a class="headerlink" href="#cloud-functions-hook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_function_hook.</code><code class="descname">GcfHook</code><span class="sig-paren">(</span><em>api_version</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for the Google Cloud Functions APIs.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function"> |
| <code class="descname">create_new_function</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.create_new_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.create_new_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new function in Cloud Function in the location specified in the body.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>location</strong> (<em>str</em>) – The location of the function.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The body required by the Cloud Functions insert API.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Project project_id where the function belongs. |
| If set to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function"> |
| <code class="descname">delete_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.delete_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.delete_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the specified Cloud Function.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – The name of the function.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">None</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves the connection to Cloud Functions.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Cloud Function services object.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function"> |
| <code class="descname">get_function</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.get_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.get_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the Cloud Function with the given name.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<em>str</em>) – Name of the function.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A Cloud Functions object representing the function.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function"> |
| <code class="descname">update_function</code><span class="sig-paren">(</span><em>name</em>, <em>body</em>, <em>update_mask</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.update_function"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.update_function" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Updates Cloud Functions according to the specified update mask.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>name</strong> (<em>str</em>) – The name of the function.</li> |
| <li><strong>body</strong> (<em>dict</em>) – The body required by the cloud function patch API.</li> |
| <li><strong>update_mask</strong> (<em>[</em><em>str</em><em>]</em>) – The update mask - array of fields that should be patched.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">None</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip"> |
| <code class="descname">upload_function_zip</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_function_hook.html#GcfHook.upload_function_zip"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_function_hook.GcfHook.upload_function_zip" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads zip file with sources.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>location</strong> (<em>str</em>) – The location where the function is created.</li> |
| <li><strong>zip_path</strong> (<em>str</em>) – The path of the valid .zip file to upload.</li> |
| <li><strong>project_id</strong> (<em>str</em>) – Optional, Google Cloud Project project_id where the function belongs. |
| If set to None or missing, the default project_id from the GCP connection is used.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">The upload URL that was returned by generateUploadUrl method.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-dataflow"> |
| <h3>Cloud DataFlow<a class="headerlink" href="#cloud-dataflow" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="dataflow-operators"> |
| <h4>DataFlow Operators<a class="headerlink" href="#dataflow-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#dataflowjavaoperator"><span class="std std-ref">DataFlowJavaOperator</span></a> : launching Cloud Dataflow jobs written in Java.</li> |
| <li><a class="reference internal" href="#dataflowtemplateoperator"><span class="std std-ref">DataflowTemplateOperator</span></a> : launching a templated Cloud DataFlow batch job.</li> |
| <li><a class="reference internal" href="#dataflowpythonoperator"><span class="std std-ref">DataFlowPythonOperator</span></a> : launching Cloud Dataflow jobs written in python.</li> |
| </ul> |
| <div class="section" id="dataflowjavaoperator"> |
| <span id="id95"></span><h5>DataFlowJavaOperator<a class="headerlink" href="#dataflowjavaoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowJavaOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowJavaOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowJavaOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Java Cloud DataFlow batch job. The parameters of the operation |
| will be passed to the job.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>jar</strong> (<em>string</em>) – The reference to a self executing DataFlow jar.</li> |
| <li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li> |
| <li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud |
| Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google |
| Cloud Platform for the dataflow job status while the job is in the |
| JOB_STATE_RUNNING state.</li> |
| <li><strong>job_class</strong> (<em>string</em>) – The name of the dataflow job class to be executued, it |
| is often not the main class configured in the dataflow jar file.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>Both <code class="docutils literal notranslate"><span class="pre">jar</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> are templated so you can use variables in them.</p> |
| <p>Note that both |
| <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">options</span></code> will be merged to specify pipeline |
| execution parameter, and <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> is expected to save |
| high-level options, for instances, project and zone information, which |
| apply to all dataflow operators in the DAG.</p> |
| <p>It’s a good practice to define dataflow_* parameters in the default_args of the dag |
| like the project, zone and staging location.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'europe-west1-d'</span><span class="p">,</span> |
| <span class="s1">'stagingLocation'</span><span class="p">:</span> <span class="s1">'gs://my-staging-bucket/staging/'</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You need to pass the path to your dataflow as a file reference with the <code class="docutils literal notranslate"><span class="pre">jar</span></code> |
| parameter, the jar needs to be a self executing jar (see documentation here: |
| <a class="reference external" href="https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar">https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar</a>). |
| Use <code class="docutils literal notranslate"><span class="pre">options</span></code> to pass on options to your job.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataFlowJavaOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'datapflow_example'</span><span class="p">,</span> |
| <span class="n">jar</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}pipeline/build/libs/pipeline-example-1.0.jar'</span><span class="p">,</span> |
| <span class="n">options</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'autoscalingAlgorithm'</span><span class="p">:</span> <span class="s1">'BASIC'</span><span class="p">,</span> |
| <span class="s1">'maxNumWorkers'</span><span class="p">:</span> <span class="s1">'50'</span><span class="p">,</span> |
| <span class="s1">'start'</span><span class="p">:</span> <span class="s1">'{{ds}}'</span><span class="p">,</span> |
| <span class="s1">'partitionType'</span><span class="p">:</span> <span class="s1">'DAY'</span><span class="p">,</span> |
| <span class="s1">'labels'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'foo'</span> <span class="p">:</span> <span class="s1">'bar'</span><span class="p">}</span> |
| <span class="p">},</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp-airflow-service-account'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| <div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'owner'</span><span class="p">:</span> <span class="s1">'airflow'</span><span class="p">,</span> |
| <span class="s1">'depends_on_past'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="s1">'start_date'</span><span class="p">:</span> |
| <span class="p">(</span><span class="mi">2016</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> |
| <span class="s1">'email'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'alex@vanboxel.be'</span><span class="p">],</span> |
| <span class="s1">'email_on_failure'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="s1">'email_on_retry'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="s1">'retries'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="s1">'retry_delay'</span><span class="p">:</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="mi">30</span><span class="p">),</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'us-central1-f'</span><span class="p">,</span> |
| <span class="s1">'stagingLocation'</span><span class="p">:</span> <span class="s1">'gs://bucket/tmp/dataflow/staging/'</span><span class="p">,</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| |
| <span class="n">dag</span> <span class="o">=</span> <span class="n">DAG</span><span class="p">(</span><span class="s1">'test-dag'</span><span class="p">,</span> <span class="n">default_args</span><span class="o">=</span><span class="n">default_args</span><span class="p">)</span> |
| |
| <span class="n">task</span> <span class="o">=</span> <span class="n">DataFlowJavaOperator</span><span class="p">(</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp_default'</span><span class="p">,</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'normalize-cal'</span><span class="p">,</span> |
| <span class="n">jar</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}pipeline-ingress-cal-normalize-1.0.jar'</span><span class="p">,</span> |
| <span class="n">options</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'autoscalingAlgorithm'</span><span class="p">:</span> <span class="s1">'BASIC'</span><span class="p">,</span> |
| <span class="s1">'maxNumWorkers'</span><span class="p">:</span> <span class="s1">'50'</span><span class="p">,</span> |
| <span class="s1">'start'</span><span class="p">:</span> <span class="s1">'{{ds}}'</span><span class="p">,</span> |
| <span class="s1">'partitionType'</span><span class="p">:</span> <span class="s1">'DAY'</span> |
| |
| <span class="p">},</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </div> |
| <div class="section" id="dataflowtemplateoperator"> |
| <span id="id96"></span><h5>DataflowTemplateOperator<a class="headerlink" href="#dataflowtemplateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataflowTemplateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataflowTemplateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataflowTemplateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Templated Cloud DataFlow batch job. The parameters of the operation |
| will be passed to the job.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template</strong> (<em>string</em>) – The reference to the DataFlow template.</li> |
| <li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job environment options.</li> |
| <li><strong>parameters</strong> (<em>dict</em>) – Map of job specific parameters for the template.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud |
| Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google |
| Cloud Platform for the dataflow job status while the job is in the |
| JOB_STATE_RUNNING state.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>It’s a good practice to define dataflow_* parameters in the default_args of the dag |
| like the project, zone and staging location.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters</a> |
| <a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a></p> |
| </div> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'dataflow_default_options'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'project'</span><span class="p">:</span> <span class="s1">'my-gcp-project'</span> |
| <span class="s1">'zone'</span><span class="p">:</span> <span class="s1">'europe-west1-d'</span><span class="p">,</span> |
| <span class="s1">'tempLocation'</span><span class="p">:</span> <span class="s1">'gs://my-staging-bucket/staging/'</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You need to pass the path to your dataflow template as a file reference with the |
| <code class="docutils literal notranslate"><span class="pre">template</span></code> parameter. Use <code class="docutils literal notranslate"><span class="pre">parameters</span></code> to pass on parameters to your job. |
| Use <code class="docutils literal notranslate"><span class="pre">environment</span></code> to pass on runtime environment variables to your job.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataflowTemplateOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'datapflow_example'</span><span class="p">,</span> |
| <span class="n">template</span><span class="o">=</span><span class="s1">'{{var.value.gcp_dataflow_base}}'</span><span class="p">,</span> |
| <span class="n">parameters</span><span class="o">=</span><span class="p">{</span> |
| <span class="s1">'inputFile'</span><span class="p">:</span> <span class="s2">"gs://bucket/input/my_input.txt"</span><span class="p">,</span> |
| <span class="s1">'outputFile'</span><span class="p">:</span> <span class="s2">"gs://bucket/output/my_output.txt"</span> |
| <span class="p">},</span> |
| <span class="n">gcp_conn_id</span><span class="o">=</span><span class="s1">'gcp-airflow-service-account'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my</span><span class="o">-</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p><code class="docutils literal notranslate"><span class="pre">template</span></code>, <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> and <code class="docutils literal notranslate"><span class="pre">parameters</span></code> are templated so you can |
| use variables in them.</p> |
| <p>Note that <code class="docutils literal notranslate"><span class="pre">dataflow_default_options</span></code> is expected to save high-level options |
| for project information, which apply to all dataflow operators in the DAG.</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3">https://cloud.google.com/dataflow/docs/reference/rest/v1b3</a> |
| /LaunchTemplateParameters |
| <a class="reference external" href="https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment">https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment</a> |
| For more detail on job template execution have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataflow/docs/templates/executing-templates">https://cloud.google.com/dataflow/docs/templates/executing-templates</a></p> |
| </div> |
| </div></blockquote> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataflowpythonoperator"> |
| <span id="id97"></span><h5>DataFlowPythonOperator<a class="headerlink" href="#dataflowpythonoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataflow_operator.</code><code class="descname">DataFlowPythonOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Launching Cloud Dataflow jobs written in python. Note that both |
| dataflow_default_options and options will be merged to specify pipeline |
| execution parameter, and dataflow_default_options is expected to save |
| high-level options, for instances, project and zone information, which |
| apply to all dataflow operators in the DAG.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataflow/pipelines/specifying-exec-params">https://cloud.google.com/dataflow/pipelines/specifying-exec-params</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>py_file</strong> (<em>string</em>) – Reference to the python dataflow pipleline file.py, e.g., |
| /some/local/file/path/to/your/python/pipeline/file.</li> |
| <li><strong>py_options</strong> – Additional python options.</li> |
| <li><strong>dataflow_default_options</strong> (<em>dict</em>) – Map of default job options.</li> |
| <li><strong>options</strong> (<em>dict</em>) – Map of job specific options.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud |
| Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>poll_sleep</strong> (<em>int</em>) – The time in seconds to sleep between polling Google |
| Cloud Platform for the dataflow job status while the job is in the |
| JOB_STATE_RUNNING state.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataflow_operator.html#DataFlowPythonOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataflow_operator.DataFlowPythonOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Execute the python dataflow job.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="dataflowhook"> |
| <h4>DataFlowHook<a class="headerlink" href="#dataflowhook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_dataflow_hook.</code><code class="descname">DataFlowHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em>, <em>poll_sleep=10</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_dataflow_hook.html#DataFlowHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Dataflow service object.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-dataproc"> |
| <h3>Cloud DataProc<a class="headerlink" href="#cloud-dataproc" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="dataproc-operators"> |
| <h4>DataProc Operators<a class="headerlink" href="#dataproc-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#dataprocclustercreateoperator"><span class="std std-ref">DataprocClusterCreateOperator</span></a> : Create a new cluster on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocclusterdeleteoperator"><span class="std std-ref">DataprocClusterDeleteOperator</span></a> : Delete a cluster on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocclusterscaleoperator"><span class="std std-ref">DataprocClusterScaleOperator</span></a> : Scale up or down a cluster on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocpigoperator"><span class="std std-ref">DataProcPigOperator</span></a> : Start a Pig query Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprochiveoperator"><span class="std std-ref">DataProcHiveOperator</span></a> : Start a Hive query Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocsparksqloperator"><span class="std std-ref">DataProcSparkSqlOperator</span></a> : Start a Spark SQL query Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocsparkoperator"><span class="std std-ref">DataProcSparkOperator</span></a> : Start a Spark Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprochadoopoperator"><span class="std std-ref">DataProcHadoopOperator</span></a> : Start a Hadoop Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocpysparkoperator"><span class="std std-ref">DataProcPySparkOperator</span></a> : Start a PySpark Job on a Cloud DataProc cluster.</li> |
| <li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateOperator</span></a> : Instantiate a WorkflowTemplate on Google Cloud Dataproc.</li> |
| <li><a class="reference internal" href="#dataprocworkflowtemplateinstantiateinlineoperator"><span class="std std-ref">DataprocWorkflowTemplateInstantiateInlineOperator</span></a> : Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.</li> |
| </ul> |
| <div class="section" id="dataprocclustercreateoperator"> |
| <span id="id98"></span><h5>DataprocClusterCreateOperator<a class="headerlink" href="#dataprocclustercreateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterCreateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Create a new cluster on Google Cloud Dataproc. The operator will wait until the |
| creation is successful or an error occurs in the creation process.</p> |
| <p>The parameters allow to configure the cluster. Please refer to</p> |
| <p><a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters</a></p> |
| <p>for a detailed explanation on the different parameters. Most of the configuration |
| parameters detailed in the link are available as a parameter to this operator.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster to create. (templated)</li> |
| <li><strong>project_id</strong> (<em>str</em>) – The ID of the google cloud project in which |
| to create the cluster. (templated)</li> |
| <li><strong>num_workers</strong> (<em>int</em>) – The # of workers to spin up. If set to zero will |
| spin up cluster in a single node mode</li> |
| <li><strong>storage_bucket</strong> (<em>string</em>) – The storage bucket to use, setting to None lets dataproc |
| generate a custom one for you</li> |
| <li><strong>init_actions_uris</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – List of GCS uri’s containing |
| dataproc initialization scripts</li> |
| <li><strong>init_action_timeout</strong> (<em>string</em>) – Amount of time executable scripts in |
| init_actions_uris has to complete</li> |
| <li><strong>metadata</strong> (<em>dict</em>) – dict of key-value google compute engine metadata entries |
| to add to all instances</li> |
| <li><strong>image_version</strong> (<em>string</em>) – the version of software inside the Dataproc cluster</li> |
| <li><strong>custom_image</strong> – custom Dataproc image for more info see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/guides/dataproc-images">https://cloud.google.com/dataproc/docs/guides/dataproc-images</a></li> |
| <li><strong>properties</strong> (<em>dict</em>) – dict of properties to set on |
| config files (e.g. spark-defaults.conf), see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig</a></li> |
| <li><strong>master_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the master node</li> |
| <li><strong>master_disk_type</strong> (<em>string</em>) – Type of the boot disk for the master node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li> |
| <li><strong>master_disk_size</strong> (<em>int</em>) – Disk size for the master node</li> |
| <li><strong>worker_machine_type</strong> (<em>string</em>) – Compute engine machine type to use for the worker nodes</li> |
| <li><strong>worker_disk_type</strong> (<em>string</em>) – Type of the boot disk for the worker node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</li> |
| <li><strong>worker_disk_size</strong> (<em>int</em>) – Disk size for the worker nodes</li> |
| <li><strong>num_preemptible_workers</strong> (<em>int</em>) – The # of preemptible worker nodes to spin up</li> |
| <li><strong>labels</strong> (<em>dict</em>) – dict of labels to add to the cluster</li> |
| <li><strong>zone</strong> (<em>string</em>) – The zone where the cluster will be located. (templated)</li> |
| <li><strong>network_uri</strong> (<em>string</em>) – The network uri to be used for machine communication, cannot be |
| specified with subnetwork_uri</li> |
| <li><strong>subnetwork_uri</strong> (<em>string</em>) – The subnetwork uri to be used for machine communication, |
| cannot be specified with network_uri</li> |
| <li><strong>internal_ip_only</strong> (<em>bool</em>) – If true, all instances in the cluster will only |
| have internal IP addresses. This can only be enabled for subnetwork |
| enabled networks</li> |
| <li><strong>tags</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The GCE tags to add to all instances</li> |
| <li><strong>region</strong> – leave as ‘global’, might become relevant in the future. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>service_account</strong> (<em>string</em>) – The service account of the dataproc instances.</li> |
| <li><strong>service_account_scopes</strong> (<em>list</em><em>[</em><em>string</em><em>]</em>) – The URIs of service account scopes to be included.</li> |
| <li><strong>idle_delete_ttl</strong> (<em>int</em>) – The longest duration that cluster would keep alive while |
| staying idle. Passing this threshold will cause cluster to be auto-deleted. |
| A duration in seconds.</li> |
| <li><strong>auto_delete_time</strong> (<em>datetime.datetime</em>) – The time when cluster will be auto-deleted.</li> |
| <li><strong>auto_delete_ttl</strong> (<em>int</em>) – The life duration of cluster, the cluster will be |
| auto-deleted at the end of this duration. |
| A duration in seconds. (If auto_delete_time is set this parameter will be ignored)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Type:</th><td class="field-body"><p class="first last">custom_image: string</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocclusterscaleoperator"> |
| <span id="id99"></span><h5>DataprocClusterScaleOperator<a class="headerlink" href="#dataprocclusterscaleoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterScaleOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Scale, up or down, a cluster on Google Cloud Dataproc. |
| The operator will wait until the cluster is re-scaled.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataprocClusterScaleOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_scale'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">cluster_name</span><span class="o">=</span><span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="n">num_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">num_preemptible_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">graceful_decommission_timeout</span><span class="o">=</span><span class="s1">'1h'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on about scaling clusters have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters">https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to scale. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The region for the dataproc cluster. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>num_workers</strong> (<em>int</em>) – The new number of workers</li> |
| <li><strong>num_preemptible_workers</strong> (<em>int</em>) – The new number of preemptible workers</li> |
| <li><strong>graceful_decommission_timeout</strong> (<em>string</em>) – Timeout for graceful YARN decomissioning. |
| Maximum value is 1d</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocclusterdeleteoperator"> |
| <span id="id100"></span><h5>DataprocClusterDeleteOperator<a class="headerlink" href="#dataprocclusterdeleteoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocClusterDeleteOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Delete a cluster on Google Cloud Dataproc. The operator will wait until the |
| cluster is destroyed.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the cluster to create. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocpigoperator"> |
| <span id="id101"></span><h5>DataProcPigOperator<a class="headerlink" href="#dataprocpigoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPigOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation |
| will be passed to the cluster.</p> |
| <p>It’s a good practice to define dataproc_* parameters in the default_args of the dag |
| like the cluster name and UDFs.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'cluster_name'</span><span class="p">:</span> <span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="s1">'dataproc_pig_jars'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s1">'gs://example/udf/jar/datafu/1.2.0/datafu.jar'</span><span class="p">,</span> |
| <span class="s1">'gs://example/udf/jar/gpig/1.2/gpig.jar'</span> |
| <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You can pass a pig script as string or file reference. Use variables to pass on |
| variables for the pig script to be resolved on the cluster or use the parameters to |
| be resolved in the script as template parameters.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataProcPigOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_pig'</span><span class="p">,</span> |
| <span class="n">query</span><span class="o">=</span><span class="s1">'a_pig_script.pig'</span><span class="p">,</span> |
| <span class="n">variables</span><span class="o">=</span><span class="p">{</span><span class="s1">'out'</span><span class="p">:</span> <span class="s1">'gs://example/output/{{ds}}'</span><span class="p">},</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more detail on about job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs">https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query |
| file (pg or pig extension). (templated)</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a pig script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_pig_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_pig_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for |
| UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprochiveoperator"> |
| <span id="id102"></span><h5>DataProcHiveOperator<a class="headerlink" href="#dataprochiveoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHiveOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Hive query Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension).</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a hive script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query.</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This name by default |
| is the task_id appended with the execution data, but can be templated. The |
| name will always be appended with a random number to avoid name clashes.</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li> |
| <li><strong>dataproc_hive_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_hive_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: for |
| UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocsparksqloperator"> |
| <span id="id103"></span><h5>DataProcSparkSqlOperator<a class="headerlink" href="#dataprocsparksqloperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkSqlOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Spark SQL query Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>query</strong> (<em>string</em>) – The query or reference to the query file (q extension). (templated)</li> |
| <li><strong>query_uri</strong> (<em>string</em>) – The uri of a spark sql script on Cloud Storage.</li> |
| <li><strong>variables</strong> (<em>dict</em>) – Map of named parameters for the query. (templated)</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocsparkoperator"> |
| <span id="id104"></span><h5>DataProcSparkOperator<a class="headerlink" href="#dataprocsparkoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcSparkOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Spark Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or |
| the main_class, not both together).</li> |
| <li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both |
| together).</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_spark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_spark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprochadoopoperator"> |
| <span id="id105"></span><h5>DataProcHadoopOperator<a class="headerlink" href="#dataprochadoopoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcHadoopOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Hadoop Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>main_jar</strong> (<em>string</em>) – URI of the job jar provisioned on Cloud Storage. (use this or |
| the main_class, not both together).</li> |
| <li><strong>main_class</strong> (<em>string</em>) – Name of the job class. (use this or the main_jar, not both |
| together).</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster. (templated)</li> |
| <li><strong>dataproc_hadoop_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_hadoop_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocpysparkoperator"> |
| <span id="id106"></span><h5>DataProcPySparkOperator<a class="headerlink" href="#dataprocpysparkoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataProcPySparkOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a PySpark Job on a Cloud DataProc cluster.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>main</strong> (<em>string</em>) – [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main |
| Python file to use as the driver. Must be a .py file.</li> |
| <li><strong>arguments</strong> (<em>list</em>) – Arguments for the job. (templated)</li> |
| <li><strong>archives</strong> (<em>list</em>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</li> |
| <li><strong>files</strong> (<em>list</em>) – List of files to be copied to the working directory</li> |
| <li><strong>pyfiles</strong> (<em>list</em>) – List of Python files to pass to the PySpark framework. |
| Supported file types: .py, .egg, and .zip</li> |
| <li><strong>job_name</strong> (<em>string</em>) – The job name used in the DataProc cluster. This |
| name by default is the task_id appended with the execution data, but can |
| be templated. The name will always be appended with a random number to |
| avoid name clashes. (templated)</li> |
| <li><strong>cluster_name</strong> (<em>string</em>) – The name of the DataProc cluster.</li> |
| <li><strong>dataproc_pyspark_properties</strong> (<em>dict</em>) – Map for the Pig properties. Ideal to put in |
| default arguments</li> |
| <li><strong>dataproc_pyspark_jars</strong> (<em>list</em>) – URIs to jars provisioned in Cloud Storage (example: |
| for UDFs and libs) and are ideal to put in default arguments.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>region</strong> (<em>str</em>) – The specified region where the dataproc cluster is created.</li> |
| <li><strong>job_error_states</strong> (<em>list</em>) – Job states that should be considered error states. |
| Any states in this list will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">['ERROR',</span> <span class="pre">'CANCELLED']</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">['ERROR']</span></code>.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Variables:</th><td class="field-body"><p class="first last"><strong>dataproc_job_id</strong> (<em>string</em>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocworkflowtemplateinstantiateoperator"> |
| <span id="id107"></span><h5>DataprocWorkflowTemplateInstantiateOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait |
| until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template_id</strong> (<em>string</em>) – The id of the template. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the template runs</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="dataprocworkflowtemplateinstantiateinlineoperator"> |
| <span id="id108"></span><h5>DataprocWorkflowTemplateInstantiateInlineOperator<a class="headerlink" href="#dataprocworkflowtemplateinstantiateinlineoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="descname">DataprocWorkflowTemplateInstantiateInlineOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will |
| wait until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>template</strong> (<em>map</em>) – The template contents. (templated)</li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the google cloud project in which |
| the template runs</li> |
| <li><strong>region</strong> (<em>string</em>) – leave as ‘global’, might become relevant in the future</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use connecting to Google Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="cloud-datastore"> |
| <h3>Cloud Datastore<a class="headerlink" href="#cloud-datastore" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="datastore-operators"> |
| <h4>Datastore Operators<a class="headerlink" href="#datastore-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#datastoreexportoperator"><span class="std std-ref">DatastoreExportOperator</span></a> : Export entities from Google Cloud Datastore to Cloud Storage.</li> |
| <li><a class="reference internal" href="#datastoreimportoperator"><span class="std std-ref">DatastoreImportOperator</span></a> : Import entities from Cloud Storage to Google Cloud Datastore.</li> |
| </ul> |
| <div class="section" id="datastoreexportoperator"> |
| <span id="id109"></span><h5>DatastoreExportOperator<a class="headerlink" href="#datastoreexportoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_export_operator.</code><code class="descname">DatastoreExportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_export_operator.html#DatastoreExportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_export_operator.DatastoreExportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Export entities from Google Cloud Datastore to Cloud Storage</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – name of the cloud storage bucket to backup data</li> |
| <li><strong>namespace</strong> (<em>str</em>) – optional namespace path in the specified Cloud Storage bucket |
| to backup data. If this namespace does not exist in GCS, it will be created.</li> |
| <li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the Datastore connection id to use</li> |
| <li><strong>cloud_storage_conn_id</strong> (<em>string</em>) – the name of the cloud storage connection id to |
| force-write backup</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in the |
| export, refer to |
| <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li> |
| <li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li> |
| <li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for |
| execution status again</li> |
| <li><strong>overwrite_existing</strong> (<em>bool</em>) – if the storage bucket + namespace is not empty, it will be |
| emptied prior to exports. This enables overwriting existing backups.</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="datastoreimportoperator"> |
| <span id="id110"></span><h5>DatastoreImportOperator<a class="headerlink" href="#datastoreimportoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.datastore_import_operator.</code><code class="descname">DatastoreImportOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/datastore_import_operator.html#DatastoreImportOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.datastore_import_operator.DatastoreImportOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Import entities from Cloud Storage to Google Cloud Datastore</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – container in Cloud Storage to store data</li> |
| <li><strong>file</strong> (<em>string</em>) – path of the backup metadata file in the specified Cloud Storage bucket. |
| It should have the extension .overall_export_metadata</li> |
| <li><strong>namespace</strong> (<em>str</em>) – optional namespace of the backup metadata file in |
| the specified Cloud Storage bucket.</li> |
| <li><strong>entity_filter</strong> (<em>dict</em>) – description of what data from the project is included in |
| the export, refer to |
| <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter">https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter</a></li> |
| <li><strong>labels</strong> (<em>dict</em>) – client-assigned labels for cloud storage</li> |
| <li><strong>datastore_conn_id</strong> (<em>string</em>) – the name of the connection id to use</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>polling_interval_in_seconds</strong> (<em>int</em>) – number of seconds to wait before polling for |
| execution status again</li> |
| <li><strong>xcom_push</strong> (<em>bool</em>) – push operation name to xcom for reference</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="datastorehook"> |
| <h4>DatastoreHook<a class="headerlink" href="#datastorehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.datastore_hook.</code><code class="descname">DatastoreHook</code><span class="sig-paren">(</span><em>datastore_conn_id='google_cloud_datastore_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Interact with Google Cloud Datastore. This hook uses the Google Cloud Platform |
| connection.</p> |
| <p>This object is not threads safe. If you want to make multiple requests |
| simultaneously, you will need to create a hook per thread.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids"> |
| <code class="descname">allocate_ids</code><span class="sig-paren">(</span><em>partialKeys</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.allocate_ids"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.allocate_ids" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Allocate IDs for incomplete keys. |
| see <a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>partialKeys</strong> – a list of partial keys</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">a list of full keys.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction"> |
| <code class="descname">begin_transaction</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.begin_transaction"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.begin_transaction" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a new transaction handle</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction</a></p> |
| </div> |
| </div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">a transaction handle</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.commit"> |
| <code class="descname">commit</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.commit"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.commit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Commit a transaction, optionally creating, deleting or modifying some entities.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the commit request</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the response body of the commit request</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation"> |
| <code class="descname">delete_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.delete_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.delete_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the long-running operation</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket"> |
| <code class="descname">export_to_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.export_to_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.export_to_storage_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Export entities from Cloud Datastore to Cloud Storage for backup</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><em>version='v1'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Datastore service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation"> |
| <code class="descname">get_operation</code><span class="sig-paren">(</span><em>name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.get_operation"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.get_operation" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the latest state of a long-running operation</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> – the name of the operation resource</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket"> |
| <code class="descname">import_from_storage_bucket</code><span class="sig-paren">(</span><em>bucket</em>, <em>file</em>, <em>namespace=None</em>, <em>entity_filter=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.import_from_storage_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.import_from_storage_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Import a backup from Cloud Storage to Cloud Datastore</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup"> |
| <code class="descname">lookup</code><span class="sig-paren">(</span><em>keys</em>, <em>read_consistency=None</em>, <em>transaction=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.lookup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.lookup" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lookup some entities by key</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>keys</strong> – the keys to lookup</li> |
| <li><strong>read_consistency</strong> – the read consistency to use. default, strong or eventual. |
| Cannot be used with a transaction.</li> |
| <li><strong>transaction</strong> – the transaction to use, if any.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">the response body of the lookup request.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done"> |
| <code class="descname">poll_operation_until_done</code><span class="sig-paren">(</span><em>name</em>, <em>polling_interval_in_seconds</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.poll_operation_until_done"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.poll_operation_until_done" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Poll backup operation state until it’s completed</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback"> |
| <code class="descname">rollback</code><span class="sig-paren">(</span><em>transaction</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.rollback"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.rollback" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Roll back a transaction</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>transaction</strong> – the transaction to roll back</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query"> |
| <code class="descname">run_query</code><span class="sig-paren">(</span><em>body</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/datastore_hook.html#DatastoreHook.run_query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.datastore_hook.DatastoreHook.run_query" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Run a query for entities.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery">https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>body</strong> – the body of the query request</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">the batch of query results.</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-ml-engine"> |
| <h3>Cloud ML Engine<a class="headerlink" href="#cloud-ml-engine" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="cloud-ml-engine-operators"> |
| <h4>Cloud ML Engine Operators<a class="headerlink" href="#cloud-ml-engine-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#mlenginebatchpredictionoperator"><span class="std std-ref">MLEngineBatchPredictionOperator</span></a> : Start a Cloud ML Engine batch prediction job.</li> |
| <li><a class="reference internal" href="#mlenginemodeloperator"><span class="std std-ref">MLEngineModelOperator</span></a> : Manages a Cloud ML Engine model.</li> |
| <li><a class="reference internal" href="#mlenginetrainingoperator"><span class="std std-ref">MLEngineTrainingOperator</span></a> : Start a Cloud ML Engine training job.</li> |
| <li><a class="reference internal" href="#mlengineversionoperator"><span class="std std-ref">MLEngineVersionOperator</span></a> : Manages a Cloud ML Engine model version.</li> |
| </ul> |
| <div class="section" id="mlenginebatchpredictionoperator"> |
| <span id="id111"></span><h5>MLEngineBatchPredictionOperator<a class="headerlink" href="#mlenginebatchpredictionoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineBatchPredictionOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineBatchPredictionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineBatchPredictionOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Start a Google Cloud ML Engine prediction job.</p> |
| <p>NOTE: For model origin, users should consider exactly one from the |
| three options below: |
| 1. Populate ‘uri’ field only, which should be a GCS location that |
| points to a tensorflow savedModel directory. |
| 2. Populate ‘model_name’ field only, which refers to an existing |
| model, and the default version of the model will be used. |
| 3. Populate both ‘model_name’ and ‘version_name’ fields, which |
| refers to a specific version of a specific model.</p> |
| <p>In options 2 and 3, both model and version name should contain the |
| minimal identifier. For instance, call</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MLEngineBatchPredictionOperator</span><span class="p">(</span> |
| <span class="o">...</span><span class="p">,</span> |
| <span class="n">model_name</span><span class="o">=</span><span class="s1">'my_model'</span><span class="p">,</span> |
| <span class="n">version_name</span><span class="o">=</span><span class="s1">'my_version'</span><span class="p">,</span> |
| <span class="o">...</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>if the desired model version is |
| “projects/my_project/models/my_model/versions/my_version”.</p> |
| <p>See <a class="reference external" href="https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs">https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs</a> |
| for further documentation on the parameters.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name where the |
| prediction job is submitted. (templated)</li> |
| <li><strong>job_id</strong> (<em>string</em>) – A unique id for the prediction job on Google Cloud |
| ML Engine. (templated)</li> |
| <li><strong>data_format</strong> (<em>string</em>) – The format of the input data. |
| It will default to ‘DATA_FORMAT_UNSPECIFIED’ if is not provided |
| or is not one of [“TEXT”, “TF_RECORD”, “TF_RECORD_GZIP”].</li> |
| <li><strong>input_paths</strong> (<em>list of string</em>) – A list of GCS paths of input data for batch |
| prediction. Accepting wildcard operator <a href="#id112"><span class="problematic" id="id113">*</span></a>, but only at the end. (templated)</li> |
| <li><strong>output_path</strong> (<em>string</em>) – The GCS path where the prediction results are |
| written to. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the |
| prediction job in. (templated)</li> |
| <li><strong>model_name</strong> (<em>string</em>) – The Google Cloud ML Engine model to use for prediction. |
| If version_name is not provided, the default version of this |
| model will be used. |
| Should not be None if version_name is provided. |
| Should be None if uri is provided. (templated)</li> |
| <li><strong>version_name</strong> (<em>string</em>) – The Google Cloud ML Engine model version to use for |
| prediction. |
| Should be None if uri is provided. (templated)</li> |
| <li><strong>uri</strong> (<em>string</em>) – The GCS path of the saved model to use for prediction. |
| Should be None if model_name is provided. |
| It should be a GCS path pointing to a tensorflow SavedModel. (templated)</li> |
| <li><strong>max_worker_count</strong> (<em>int</em>) – The maximum number of workers to be used |
| for parallel processing. Defaults to 10 if not specified.</li> |
| <li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML Engine runtime version to use |
| for batch prediction.</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID used for connection to Google |
| Cloud Platform.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must |
| have doamin-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>Raises:</dt> |
| <dd><code class="docutils literal notranslate"><span class="pre">ValueError</span></code>: if a unique model/version origin cannot be determined.</dd> |
| </dl> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="mlenginemodeloperator"> |
| <span id="id114"></span><h5>MLEngineModelOperator<a class="headerlink" href="#mlenginemodeloperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineModelOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineModelOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineModelOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineModelOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for managing a Google Cloud ML Engine model.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine |
| model belongs. (templated)</li> |
| <li><strong>model</strong> (<em>dict</em>) – <p>A dictionary containing the information about the model. |
| If the <cite>operation</cite> is <cite>create</cite>, then the <cite>model</cite> parameter should |
| contain all the information about this model such as <cite>name</cite>.</p> |
| <p>If the <cite>operation</cite> is <cite>get</cite>, the <cite>model</cite> parameter |
| should contain the <cite>name</cite> of the model.</p> |
| </li> |
| <li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new model as provided by the <cite>model</cite> parameter.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets a particular model where the name is specified in <cite>model</cite>.</li> |
| </ul> |
| </li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="mlenginetrainingoperator"> |
| <span id="id115"></span><h5>MLEngineTrainingOperator<a class="headerlink" href="#mlenginetrainingoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineTrainingOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineTrainingOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineTrainingOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for launching a MLEngine training job.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name within which MLEngine |
| training job should run (templated).</li> |
| <li><strong>job_id</strong> (<em>string</em>) – A unique templated id for the submitted Google MLEngine |
| training job. (templated)</li> |
| <li><strong>package_uris</strong> (<em>string</em>) – A list of package locations for MLEngine training job, |
| which should include the main training program + any additional |
| dependencies. (templated)</li> |
| <li><strong>training_python_module</strong> (<em>string</em>) – The Python module name to run within MLEngine |
| training job after installing ‘package_uris’ packages. (templated)</li> |
| <li><strong>training_args</strong> (<em>string</em>) – A list of templated command line arguments to pass to |
| the MLEngine training program. (templated)</li> |
| <li><strong>region</strong> (<em>string</em>) – The Google Compute Engine region to run the MLEngine training |
| job in (templated).</li> |
| <li><strong>scale_tier</strong> (<em>string</em>) – Resource tier for MLEngine training job. (templated)</li> |
| <li><strong>runtime_version</strong> (<em>string</em>) – The Google Cloud ML runtime version to use for |
| training. (templated)</li> |
| <li><strong>python_version</strong> (<em>string</em>) – The version of Python used in training. (templated)</li> |
| <li><strong>job_dir</strong> (<em>string</em>) – A Google Cloud Storage path in which to store training |
| outputs and other data needed for training. (templated)</li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>mode</strong> (<em>string</em>) – Can be one of ‘DRY_RUN’/’CLOUD’. In ‘DRY_RUN’ mode, no real |
| training job will be launched, but the MLEngine training job request |
| will be printed out. In ‘CLOUD’ mode, a real MLEngine training job |
| creation request will be issued.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="mlengineversionoperator"> |
| <span id="id116"></span><h5>MLEngineVersionOperator<a class="headerlink" href="#mlengineversionoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mlengine_operator.</code><code class="descname">MLEngineVersionOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mlengine_operator.html#MLEngineVersionOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mlengine_operator.MLEngineVersionOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Operator for managing a Google Cloud ML Engine version.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project name to which MLEngine |
| model belongs.</li> |
| <li><strong>model_name</strong> (<em>string</em>) – The name of the Google Cloud ML Engine model that the version |
| belongs to. (templated)</li> |
| <li><strong>version_name</strong> (<em>string</em>) – A name to use for the version being operated upon. |
| If not None and the <cite>version</cite> argument is None or does not have a value for |
| the <cite>name</cite> key, then this will be populated in the payload for the |
| <cite>name</cite> key. (templated)</li> |
| <li><strong>version</strong> (<em>dict</em>) – A dictionary containing the information about the version. |
| If the <cite>operation</cite> is <cite>create</cite>, <cite>version</cite> should contain all the |
| information about this version such as name, and deploymentUrl. |
| If the <cite>operation</cite> is <cite>get</cite> or <cite>delete</cite>, the <cite>version</cite> parameter |
| should contain the <cite>name</cite> of the version. |
| If it is None, the only <cite>operation</cite> possible would be <cite>list</cite>. (templated)</li> |
| <li><strong>operation</strong> (<em>string</em>) – <p>The operation to perform. Available operations are:</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">create</span></code>: Creates a new version in the model specified by <cite>model_name</cite>, |
| in which case the <cite>version</cite> parameter should contain all the |
| information to create that version |
| (e.g. <cite>name</cite>, <cite>deploymentUrl</cite>).</li> |
| <li><code class="docutils literal notranslate"><span class="pre">get</span></code>: Gets full information of a particular version in the model |
| specified by <cite>model_name</cite>. |
| The name of the version should be specified in the <cite>version</cite> |
| parameter.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">list</span></code>: Lists all available versions of the model specified |
| by <cite>model_name</cite>.</li> |
| <li><code class="docutils literal notranslate"><span class="pre">delete</span></code>: Deletes the version specified in <cite>version</cite> parameter from the |
| model specified by <cite>model_name</cite>). |
| The name of the version should be specified in the <cite>version</cite> |
| parameter.</li> |
| </ul> |
| </li> |
| <li><strong>gcp_conn_id</strong> (<em>string</em>) – The connection ID to use when fetching connection info.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="cloud-ml-engine-hook"> |
| <h4>Cloud ML Engine Hook<a class="headerlink" href="#cloud-ml-engine-hook" title="Permalink to this headline">¶</a></h4> |
| <div class="section" id="mlenginehook"> |
| <span id="id117"></span><h5>MLEngineHook<a class="headerlink" href="#mlenginehook" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_mlengine_hook.</code><code class="descname">MLEngineHook</code><span class="sig-paren">(</span><em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job"> |
| <code class="descname">create_job</code><span class="sig-paren">(</span><em>project_id</em>, <em>job</em>, <em>use_existing_job_fn=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_job"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_job" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Launches a MLEngine job and wait for it to reach a terminal state.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>project_id</strong> (<em>string</em>) – The Google Cloud project id within which MLEngine |
| job will be launched.</li> |
| <li><strong>job</strong> (<em>dict</em>) – <p>MLEngine Job object that should be provided to the MLEngine |
| API, such as:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span> |
| <span class="s1">'jobId'</span><span class="p">:</span> <span class="s1">'my_job_id'</span><span class="p">,</span> |
| <span class="s1">'trainingInput'</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s1">'scaleTier'</span><span class="p">:</span> <span class="s1">'STANDARD_1'</span><span class="p">,</span> |
| <span class="o">...</span> |
| <span class="p">}</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| </li> |
| <li><strong>use_existing_job_fn</strong> (<em>function</em>) – In case that a MLEngine job with the same |
| job_id already exist, this method (if provided) will decide whether |
| we should use this existing job, continue waiting for it to finish |
| and returning the job object. It should accepts a MLEngine job |
| object, and returns a boolean value indicating whether it is OK to |
| reuse the existing job. If ‘use_existing_job_fn’ is not provided, |
| we by default reuse the existing MLEngine job.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The MLEngine job object if the job successfully reach a |
| terminal state (which might be FAILED or CANCELLED state).</p> |
| </td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">dict</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model"> |
| <code class="descname">create_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a Model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version"> |
| <code class="descname">create_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_spec</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.create_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.create_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates the Version on Google Cloud ML Engine.</p> |
| <p>Returns the operation if the version was created successfully and |
| raises an error otherwise.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version"> |
| <code class="descname">delete_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.delete_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.delete_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Deletes the given version of a model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google MLEngine service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model"> |
| <code class="descname">get_model</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.get_model"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.get_model" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets a Model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions"> |
| <code class="descname">list_versions</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.list_versions"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.list_versions" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Lists all available versions of a model. Blocks until finished.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version"> |
| <code class="descname">set_default_version</code><span class="sig-paren">(</span><em>project_id</em>, <em>model_name</em>, <em>version_name</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_mlengine_hook.html#MLEngineHook.set_default_version"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook.set_default_version" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets a version to be the default. Blocks until finished.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| <div class="section" id="cloud-storage"> |
| <h3>Cloud Storage<a class="headerlink" href="#cloud-storage" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="storage-operators"> |
| <h4>Storage Operators<a class="headerlink" href="#storage-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#filetogooglecloudstorageoperator"><span class="std std-ref">FileToGoogleCloudStorageOperator</span></a> : Uploads a file to Google Cloud Storage.</li> |
| <li><a class="reference internal" href="#id119"><span class="std std-ref">GoogleCloudStorageCreateBucketOperator</span></a> : Creates a new ACL entry on the specified bucket.</li> |
| <li><a class="reference internal" href="#googlecloudstoragecreatebucketoperator"><span class="std std-ref">GoogleCloudStorageBucketCreateAclEntryOperator</span></a> : Creates a new cloud storage bucket.</li> |
| <li><a class="reference internal" href="#googlecloudstoragedownloadoperator"><span class="std std-ref">GoogleCloudStorageDownloadOperator</span></a> : Downloads a file from Google Cloud Storage.</li> |
| <li><a class="reference internal" href="#googlecloudstoragelistoperator"><span class="std std-ref">GoogleCloudStorageListOperator</span></a> : List all objects from the bucket with the give string prefix and delimiter in name.</li> |
| <li><a class="reference internal" href="#id123"><span class="std std-ref">GoogleCloudStorageToBigQueryOperator</span></a> : Creates a new ACL entry on the specified object.</li> |
| <li><a class="reference internal" href="#googlecloudstoragetobigqueryoperator"><span class="std std-ref">GoogleCloudStorageObjectCreateAclEntryOperator</span></a> : Loads files from Google cloud storage into BigQuery.</li> |
| <li><a class="reference internal" href="#googlecloudstoragetogooglecloudstorageoperator"><span class="std std-ref">GoogleCloudStorageToGoogleCloudStorageOperator</span></a> : Copies objects from a bucket to another, with renaming if requested.</li> |
| <li><a class="reference internal" href="#googlecloudstoragetogooglecloudstoragetransferoperator"><span class="std std-ref">GoogleCloudStorageToGoogleCloudStorageTransferOperator</span></a> : Copies objects from a bucket to another using Google Transfer service.</li> |
| <li><a class="reference internal" href="#mysqltogooglecloudstorageoperator"><span class="std std-ref">MySqlToGoogleCloudStorageOperator</span></a>: Copy data from any MySQL Database to Google cloud storage in JSON format.</li> |
| </ul> |
| <div class="section" id="filetogooglecloudstorageoperator"> |
| <span id="id118"></span><h5>FileToGoogleCloudStorageOperator<a class="headerlink" href="#filetogooglecloudstorageoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.file_to_gcs.</code><code class="descname">FileToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Uploads a file to Google Cloud Storage. |
| Optionally can compress the file for upload.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>src</strong> (<em>string</em>) – Path to the local file. (templated)</li> |
| <li><strong>dst</strong> (<em>string</em>) – Destination path within the specified bucket. (templated)</li> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to. (templated)</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The Airflow connection ID to upload with</li> |
| <li><strong>mime_type</strong> (<em>string</em>) – The mime-type string</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any</li> |
| <li><strong>gzip</strong> (<em>bool</em>) – Allows for file to be compressed and uploaded as gzip</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute"> |
| <code class="descname">execute</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/file_to_gcs.html#FileToGoogleCloudStorageOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.file_to_gcs.FileToGoogleCloudStorageOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads the file to Google cloud storage</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragebucketcreateaclentryoperator"> |
| <span id="googlecloudstoragecreatebucketoperator"></span><h5>GoogleCloudStorageBucketCreateAclEntryOperator<a class="headerlink" href="#googlecloudstoragebucketcreateaclentryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageBucketCreateAclEntryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_acl_operator.</code><code class="descname">GoogleCloudStorageBucketCreateAclEntryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_acl_operator.html#GoogleCloudStorageBucketCreateAclEntryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageBucketCreateAclEntryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new ACL entry on the specified bucket.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li> |
| <li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms: |
| user-userId, user-email, group-groupId, group-email, domain-domain, |
| project-team-projectId, allUsers, allAuthenticatedUsers</li> |
| <li><strong>role</strong> (<em>str</em>) – The access permission for the entity. |
| Acceptable values are: “OWNER”, “READER”, “WRITER”.</li> |
| <li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request. |
| Required for Requester Pays buckets.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Google Cloud Storage.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id119"> |
| <span id="id120"></span><h5>GoogleCloudStorageCreateBucketOperator<a class="headerlink" href="#id119" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_operator.</code><code class="descname">GoogleCloudStorageCreateBucketOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_operator.html#GoogleCloudStorageCreateBucketOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_operator.GoogleCloudStorageCreateBucketOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new bucket. Google Cloud Storage uses a flat namespace, |
| so you can’t create a bucket with a name that is already in use.</p> |
| <blockquote> |
| <div><div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information, see Bucket Naming Guidelines: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p> |
| </div> |
| </div></blockquote> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket. (templated)</li> |
| <li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored |
| and determines the SLA and the cost of storage (templated). Values include</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li> |
| </ul> |
| <p>If this value is not specified when the bucket is |
| created, it will default to STANDARD.</p> |
| </li> |
| <li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. (templated) |
| Object data for objects in the bucket resides in physical storage |
| within this region. Defaults to US.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p> |
| </div> |
| </li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project. (templated)</li> |
| <li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must |
| have domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would create a new bucket <code class="docutils literal notranslate"><span class="pre">test-bucket</span></code> |
| with <code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code> storage class in <code class="docutils literal notranslate"><span class="pre">EU</span></code> region</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">CreateBucket</span> <span class="o">=</span> <span class="n">GoogleCloudStorageCreateBucketOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'CreateNewBucket'</span><span class="p">,</span> |
| <span class="n">bucket_name</span><span class="o">=</span><span class="s1">'test-bucket'</span><span class="p">,</span> |
| <span class="n">storage_class</span><span class="o">=</span><span class="s1">'MULTI_REGIONAL'</span><span class="p">,</span> |
| <span class="n">location</span><span class="o">=</span><span class="s1">'EU'</span><span class="p">,</span> |
| <span class="n">labels</span><span class="o">=</span><span class="p">{</span><span class="s1">'env'</span><span class="p">:</span> <span class="s1">'dev'</span><span class="p">,</span> <span class="s1">'team'</span><span class="p">:</span> <span class="s1">'airflow'</span><span class="p">},</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'airflow-service-account'</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragedownloadoperator"> |
| <span id="id121"></span><h5>GoogleCloudStorageDownloadOperator<a class="headerlink" href="#googlecloudstoragedownloadoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_download_operator.</code><code class="descname">GoogleCloudStorageDownloadOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_download_operator.html#GoogleCloudStorageDownloadOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_download_operator.GoogleCloudStorageDownloadOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Downloads a file from Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is. (templated)</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to download in the Google cloud |
| storage bucket. (templated)</li> |
| <li><strong>filename</strong> (<em>string</em>) – The file path on the local file system (where the |
| operator is being executed) that the file should be downloaded to. (templated) |
| If no filename passed, the downloaded data will not be stored on the local file |
| system.</li> |
| <li><strong>store_to_xcom_key</strong> (<em>string</em>) – If this param is set, the operator will push |
| the contents of the downloaded file to XCom with the key set in this |
| parameter. If not set, the downloaded data will not be pushed to XCom. (templated)</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragelistoperator"> |
| <span id="id122"></span><h5>GoogleCloudStorageListOperator<a class="headerlink" href="#googlecloudstoragelistoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_list_operator.</code><code class="descname">GoogleCloudStorageListOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_list_operator.html#GoogleCloudStorageListOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_list_operator.GoogleCloudStorageListOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>List all objects from the bucket with the give string prefix and delimiter in name.</p> |
| <dl class="docutils"> |
| <dt>This operator returns a python list with the name of objects which can be used by</dt> |
| <dd><cite>xcom</cite> in the downstream task.</dd> |
| </dl> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket to find the objects. (templated)</li> |
| <li><strong>prefix</strong> (<em>string</em>) – Prefix string which filters objects whose name begin with |
| this prefix. (templated)</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – The delimiter by which you want to filter the objects. (templated) |
| For e.g to lists the CSV files from in a directory in GCS you would use |
| delimiter=’.csv’.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Example</strong>:</dt> |
| <dd><p class="first">The following Operator would list all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">GCS_Files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageListOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'GCS_Files'</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="s1">'sales/sales-2017/'</span><span class="p">,</span> |
| <span class="n">delimiter</span><span class="o">=</span><span class="s1">'.avro'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstorageobjectcreateaclentryoperator"> |
| <span id="googlecloudstoragetobigqueryoperator"></span><h5>GoogleCloudStorageObjectCreateAclEntryOperator<a class="headerlink" href="#googlecloudstorageobjectcreateaclentryoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageObjectCreateAclEntryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_acl_operator.</code><code class="descname">GoogleCloudStorageObjectCreateAclEntryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_acl_operator.html#GoogleCloudStorageObjectCreateAclEntryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_acl_operator.GoogleCloudStorageObjectCreateAclEntryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Creates a new ACL entry on the specified object.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li> |
| <li><strong>object_name</strong> (<em>str</em>) – Name of the object. For information about how to URL encode object |
| names to be path safe, see: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/#encoding">https://cloud.google.com/storage/docs/json_api/#encoding</a></li> |
| <li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms: |
| user-userId, user-email, group-groupId, group-email, domain-domain, |
| project-team-projectId, allUsers, allAuthenticatedUsers</li> |
| <li><strong>role</strong> (<em>str</em>) – The access permission for the entity. |
| Acceptable values are: “OWNER”, “READER”.</li> |
| <li><strong>generation</strong> (<em>str</em>) – (Optional) If present, selects a specific revision of this object |
| (as opposed to the latest version, the default).</li> |
| <li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request. |
| Required for Requester Pays buckets.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>str</em>) – The connection ID to use when |
| connecting to Google Cloud Storage.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="id123"> |
| <span id="id124"></span><h5>GoogleCloudStorageToBigQueryOperator<a class="headerlink" href="#id123" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_bq.</code><code class="descname">GoogleCloudStorageToBigQueryOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_bq.html#GoogleCloudStorageToBigQueryOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_bq.GoogleCloudStorageToBigQueryOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Loads files from Google cloud storage into BigQuery.</p> |
| <p>The schema to be used for the BigQuery table may be specified in one of |
| two ways. You may either directly pass the schema fields in, or you may |
| point the operator to a Google cloud storage object name. The object in |
| Google cloud storage must be a JSON file with the schema fields in it.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to load from. (templated)</li> |
| <li><strong>source_objects</strong> (<em>list of str</em>) – List of Google cloud storage URIs to load from. (templated) |
| If source_format is ‘DATASTORE_BACKUP’, the list must only contain a single URI.</li> |
| <li><strong>destination_project_dataset_table</strong> (<em>string</em>) – The dotted (<project>.)<dataset>.<table> |
| BigQuery table to load data into. If <project> is not included, |
| project will be the project defined in the connection json. (templated)</li> |
| <li><strong>schema_fields</strong> (<em>list</em>) – If set, the schema field list as defined here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load">https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load</a> |
| Should not be set when source_format is ‘DATASTORE_BACKUP’.</li> |
| <li><strong>schema_object</strong> (<em>string</em>) – If set, a GCS object path pointing to a .json file that |
| contains the schema for the table. (templated)</li> |
| <li><strong>source_format</strong> (<em>string</em>) – File format to export.</li> |
| <li><strong>compression</strong> (<em>string</em>) – [Optional] The compression type of the data source. |
| Possible values include GZIP and NONE. |
| The default value is NONE. |
| This setting is ignored for Google Cloud Bigtable, |
| Google Cloud Datastore backups and Avro formats.</li> |
| <li><strong>create_disposition</strong> (<em>string</em>) – The create disposition if the table doesn’t exist.</li> |
| <li><strong>skip_leading_rows</strong> (<em>int</em>) – Number of rows to skip when loading from a CSV.</li> |
| <li><strong>write_disposition</strong> (<em>string</em>) – The write disposition if the table already exists.</li> |
| <li><strong>field_delimiter</strong> (<em>string</em>) – The delimiter to use when loading from a CSV.</li> |
| <li><strong>max_bad_records</strong> (<em>int</em>) – The maximum number of bad records that BigQuery can |
| ignore when running the job.</li> |
| <li><strong>quote_character</strong> (<em>string</em>) – The value that is used to quote data sections in a CSV file.</li> |
| <li><strong>ignore_unknown_values</strong> (<em>bool</em>) – [Optional] Indicates if BigQuery should allow |
| extra values that are not represented in the table schema. |
| If true, the extra values are ignored. If false, records with extra columns |
| are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result.</li> |
| <li><strong>allow_quoted_newlines</strong> (<em>bool</em>) – Whether to allow quoted newlines (true) or not (false).</li> |
| <li><strong>allow_jagged_rows</strong> (<em>bool</em>) – Accept rows that are missing trailing optional columns. |
| The missing values are treated as nulls. If false, records with missing trailing |
| columns are treated as bad records, and if there are too many bad records, an |
| invalid error is returned in the job result. Only applicable to CSV, ignored |
| for other formats.</li> |
| <li><strong>max_id_key</strong> (<em>string</em>) – If set, the name of a column in the BigQuery table |
| that’s to be loaded. This will be used to select the MAX value from |
| BigQuery after the load occurs. The results will be returned by the |
| execute() command, which in turn gets stored in XCom for future |
| operators to use. This can be helpful with incremental loads–during |
| future executions, you can pick up from the max ID.</li> |
| <li><strong>bigquery_conn_id</strong> (<em>string</em>) – Reference to a specific BigQuery hook.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – Reference to a specific Google |
| cloud storage hook.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. For this to |
| work, the service account making the request must have domain-wide |
| delegation enabled.</li> |
| <li><strong>schema_update_options</strong> (<em>list</em>) – Allows the schema of the destination |
| table to be updated as a side effect of the load job.</li> |
| <li><strong>src_fmt_configs</strong> (<em>dict</em>) – configure optional fields specific to the source format</li> |
| <li><strong>external_table</strong> (<em>bool</em>) – Flag to specify if the destination table should be |
| a BigQuery external table. Default Value is False.</li> |
| <li><strong>time_partitioning</strong> (<em>dict</em>) – configure optional time partitioning fields i.e. |
| partition by field, type and expiration as per API specifications. |
| Note that ‘field’ is not available in concurrency with |
| dataset.table$partition.</li> |
| <li><strong>cluster_fields</strong> (<em>list of str</em>) – Request that the result of this load be stored sorted |
| by one or more columns. This is only available in conjunction with |
| time_partitioning. The order of columns given determines the sort order. |
| Not applicable for external tables.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragetogooglecloudstorageoperator"> |
| <span id="id125"></span><h5>GoogleCloudStorageToGoogleCloudStorageOperator<a class="headerlink" href="#googlecloudstoragetogooglecloudstorageoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs.html#GoogleCloudStorageToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs.GoogleCloudStorageToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies objects from a bucket to another, with renaming if requested.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The source Google cloud storage bucket where the |
| object is. (templated)</li> |
| <li><strong>source_object</strong> (<em>string</em>) – <p>The source name of the object to copy in the Google cloud |
| storage bucket. (templated) |
| If wildcards are used in this argument:</p> |
| <blockquote> |
| <div>You can use only one wildcard for objects (filenames) within your |
| bucket. The wildcard can appear inside the object name or at the |
| end of the object name. Appending a wildcard to the bucket name is |
| unsupported.</div></blockquote> |
| </li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination Google cloud storage bucket |
| where the object should be. (templated)</li> |
| <li><strong>destination_object</strong> (<em>string</em>) – The destination name of the object in the |
| destination Google cloud storage bucket. (templated) |
| If a wildcard is supplied in the source_object argument, this is the |
| prefix that will be prepended to the final destination objects’ paths. |
| Note that the source path’s part before the wildcard will be removed; |
| if it needs to be retained it should be appended to destination_object. |
| For example, with prefix <code class="docutils literal notranslate"><span class="pre">foo/*</span></code> and destination_object <code class="docutils literal notranslate"><span class="pre">blah/</span></code>, the |
| file <code class="docutils literal notranslate"><span class="pre">foo/baz</span></code> will be copied to <code class="docutils literal notranslate"><span class="pre">blah/baz</span></code>; to retain the prefix write |
| the destination_object as e.g. <code class="docutils literal notranslate"><span class="pre">blah/foo</span></code>, in which case the copied file |
| will be named <code class="docutils literal notranslate"><span class="pre">blah/foo/baz</span></code>.</li> |
| <li><strong>move_object</strong> (<em>bool</em>) – When move object is True, the object is moved instead |
| of copied to the new location. This is the equivalent of a mv command |
| as opposed to a cp command.</li> |
| <li><strong>google_cloud_storage_conn_id</strong> (<em>string</em>) – The connection ID to use when |
| connecting to Google cloud storage.</li> |
| <li><strong>delegate_to</strong> (<em>string</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt><strong>Examples</strong>:</dt> |
| <dd><p class="first">The following Operator would copy a single file named |
| <code class="docutils literal notranslate"><span class="pre">sales/sales-2017/january.avro</span></code> in the <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the file named |
| <code class="docutils literal notranslate"><span class="pre">copied_sales/2017/january-backup.avro`</span> <span class="pre">in</span> <span class="pre">the</span> <span class="pre">``data_backup</span></code> bucket</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_single_file</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_single_file'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/january.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="s1">'copied_sales/2017/january-backup.avro'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would copy all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the |
| <code class="docutils literal notranslate"><span class="pre">copied_sales/2017</span></code> folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket.</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">copy_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'copy_files'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/*.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="s1">'copied_sales/2017/'</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>The following Operator would move all the Avro files from <code class="docutils literal notranslate"><span class="pre">sales/sales-2017</span></code> |
| folder (i.e. with names starting with that prefix) in <code class="docutils literal notranslate"><span class="pre">data</span></code> bucket to the |
| same folder in the <code class="docutils literal notranslate"><span class="pre">data_backup</span></code> bucket, deleting the original files in the |
| process.</p> |
| <div class="last highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">move_files</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'move_files'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'data'</span><span class="p">,</span> |
| <span class="n">source_object</span><span class="o">=</span><span class="s1">'sales/sales-2017/*.avro'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'data_backup'</span><span class="p">,</span> |
| <span class="n">move_object</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="n">google_cloud_conn_id</span> |
| <span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="googlecloudstoragetogooglecloudstoragetransferoperator"> |
| <span id="id126"></span><h5>GoogleCloudStorageToGoogleCloudStorageTransferOperator<a class="headerlink" href="#googlecloudstoragetogooglecloudstoragetransferoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.gcs_to_gcs_transfer_operator.GoogleCloudStorageToGoogleCloudStorageTransferOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.gcs_to_gcs_transfer_operator.</code><code class="descname">GoogleCloudStorageToGoogleCloudStorageTransferOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/gcs_to_gcs_transfer_operator.html#GoogleCloudStorageToGoogleCloudStorageTransferOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.gcs_to_gcs_transfer_operator.GoogleCloudStorageToGoogleCloudStorageTransferOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copies objects from a bucket to another using the GCP Storage Transfer |
| Service.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>str</em>) – The source Google cloud storage bucket where the |
| object is. (templated)</li> |
| <li><strong>destination_bucket</strong> (<em>str</em>) – The destination Google cloud storage bucket |
| where the object should be. (templated)</li> |
| <li><strong>project_id</strong> (<em>str</em>) – The ID of the Google Cloud Platform Console project that |
| owns the job</li> |
| <li><strong>gcp_conn_id</strong> (<em>str</em>) – Optional connection ID to use when connecting to Google Cloud |
| Storage.</li> |
| <li><strong>delegate_to</strong> (<em>str</em>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have |
| domain-wide delegation enabled.</li> |
| <li><strong>description</strong> (<em>str</em>) – Optional transfer service job description</li> |
| <li><strong>schedule</strong> (<em>dict</em>) – Optional transfer service schedule; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs</a>. |
| If not set, run transfer job once as soon as the operator runs</li> |
| <li><strong>object_conditions</strong> (<em>dict</em>) – Optional transfer service object conditions; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#ObjectConditions">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#ObjectConditions</a></li> |
| <li><strong>transfer_options</strong> (<em>dict</em>) – Optional transfer service transfer options; see |
| <a class="reference external" href="https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#TransferOptions">https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#TransferOptions</a></li> |
| <li><strong>wait</strong> (<em>bool</em>) – Wait for transfer to finish; defaults to <cite>True</cite></li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">gcs_to_gcs_transfer_op</span> <span class="o">=</span> <span class="n">GoogleCloudStorageToGoogleCloudStorageTransferOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'gcs_to_gcs_transfer_example'</span><span class="p">,</span> |
| <span class="n">source_bucket</span><span class="o">=</span><span class="s1">'my-source-bucket'</span><span class="p">,</span> |
| <span class="n">destination_bucket</span><span class="o">=</span><span class="s1">'my-destination-bucket'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'my-gcp-project'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">my_dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="mysqltogooglecloudstorageoperator"> |
| <span id="id127"></span><h5>MySqlToGoogleCloudStorageOperator<a class="headerlink" href="#mysqltogooglecloudstorageoperator" title="Permalink to this headline">¶</a></h5> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.mysql_to_gcs.</code><code class="descname">MySqlToGoogleCloudStorageOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mysql_to_gcs.html#MySqlToGoogleCloudStorageOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Copy data from MySQL to Google cloud storage in JSON format.</p> |
| <dl class="classmethod"> |
| <dt id="airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator.type_map"> |
| <em class="property">classmethod </em><code class="descname">type_map</code><span class="sig-paren">(</span><em>mysql_type</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/mysql_to_gcs.html#MySqlToGoogleCloudStorageOperator.type_map"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.mysql_to_gcs.MySqlToGoogleCloudStorageOperator.type_map" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Helper function that maps from MySQL fields to BigQuery fields. Used |
| when a schema_filename is set.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="googlecloudstoragehook"> |
| <h4>GoogleCloudStorageHook<a class="headerlink" href="#googlecloudstoragehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcs_hook.</code><code class="descname">GoogleCloudStorageHook</code><span class="sig-paren">(</span><em>google_cloud_storage_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Interact with Google Cloud Storage. This hook uses the Google Cloud Platform |
| connection.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy"> |
| <code class="descname">copy</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket=None</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.copy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Copies an object from a bucket to another, with renaming if requested.</p> |
| <p>destination_bucket or destination_object can be omitted, in which case |
| source bucket/object is used, but not both.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li> |
| <li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to. |
| Can be omitted; then the same bucket is used.</li> |
| <li><strong>destination_object</strong> (<em>string</em>) – The (renamed) path of the object if given. |
| Can be omitted; then the same name is used.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket"> |
| <code class="descname">create_bucket</code><span class="sig-paren">(</span><em>bucket_name</em>, <em>storage_class='MULTI_REGIONAL'</em>, <em>location='US'</em>, <em>project_id=None</em>, <em>labels=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.create_bucket"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new bucket. Google Cloud Storage uses a flat namespace, so |
| you can’t create a bucket with a name that is already in use.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last">For more information, see Bucket Naming Guidelines: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/bucketnaming.html#requirements">https://cloud.google.com/storage/docs/bucketnaming.html#requirements</a></p> |
| </div> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket_name</strong> (<em>string</em>) – The name of the bucket.</li> |
| <li><strong>storage_class</strong> (<em>string</em>) – <p>This defines how objects in the bucket are stored |
| and determines the SLA and the cost of storage. Values include</p> |
| <ul> |
| <li><code class="docutils literal notranslate"><span class="pre">MULTI_REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">REGIONAL</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">STANDARD</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">NEARLINE</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">COLDLINE</span></code>.</li> |
| </ul> |
| <p>If this value is not specified when the bucket is |
| created, it will default to STANDARD.</p> |
| </li> |
| <li><strong>location</strong> (<em>string</em>) – <p>The location of the bucket. |
| Object data for objects in the bucket resides in physical storage |
| within this region. Defaults to US.</p> |
| <div class="admonition seealso"> |
| <p class="first admonition-title">See also</p> |
| <p class="last"><a class="reference external" href="https://developers.google.com/storage/docs/bucket-locations">https://developers.google.com/storage/docs/bucket-locations</a></p> |
| </div> |
| </li> |
| <li><strong>project_id</strong> (<em>string</em>) – The ID of the GCP Project.</li> |
| <li><strong>labels</strong> (<em>dict</em>) – User-provided labels, in key/value pairs.</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">If successful, it returns the <code class="docutils literal notranslate"><span class="pre">id</span></code> of the bucket.</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete"> |
| <code class="descname">delete</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>generation=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.delete"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete an object if versioning is not enabled for the bucket, or if generation |
| parameter is used.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – name of the bucket, where the object resides</li> |
| <li><strong>object</strong> (<em>string</em>) – name of the object to delete</li> |
| <li><strong>generation</strong> (<em>string</em>) – if present, permanently delete the object of this generation</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">True if succeeded</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download"> |
| <code class="descname">download</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.download"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Get a file from Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to fetch from.</li> |
| <li><strong>object</strong> (<em>string</em>) – The object to fetch.</li> |
| <li><strong>filename</strong> (<em>string</em>) – If set, a local file path where the file should be written to.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists"> |
| <code class="descname">exists</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.exists"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks for the existence of a file in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns a Google Cloud Storage service object.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c"> |
| <code class="descname">get_crc32c</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_crc32c"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the CRC32c checksum of an object in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash"> |
| <code class="descname">get_md5hash</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_md5hash"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the MD5 hash of an object in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size"> |
| <code class="descname">get_size</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.get_size"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the size of a file in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud storage bucket.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_bucket_acl"> |
| <code class="descname">insert_bucket_acl</code><span class="sig-paren">(</span><em>bucket</em>, <em>entity</em>, <em>role</em>, <em>user_project</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.insert_bucket_acl"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_bucket_acl" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new ACL entry on the specified bucket. |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert">https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li> |
| <li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms: |
| user-userId, user-email, group-groupId, group-email, domain-domain, |
| project-team-projectId, allUsers, allAuthenticatedUsers. |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/access-control/lists#scopes">https://cloud.google.com/storage/docs/access-control/lists#scopes</a></li> |
| <li><strong>role</strong> (<em>str</em>) – The access permission for the entity. |
| Acceptable values are: “OWNER”, “READER”, “WRITER”.</li> |
| <li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request. |
| Required for Requester Pays buckets.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_object_acl"> |
| <code class="descname">insert_object_acl</code><span class="sig-paren">(</span><em>bucket</em>, <em>object_name</em>, <em>entity</em>, <em>role</em>, <em>generation</em>, <em>user_project</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.insert_object_acl"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_object_acl" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a new ACL entry on the specified object. |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert">https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert</a></p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>str</em>) – Name of a bucket.</li> |
| <li><strong>object_name</strong> (<em>str</em>) – Name of the object. For information about how to URL encode |
| object names to be path safe, see: |
| <a class="reference external" href="https://cloud.google.com/storage/docs/json_api/#encoding">https://cloud.google.com/storage/docs/json_api/#encoding</a></li> |
| <li><strong>entity</strong> (<em>str</em>) – The entity holding the permission, in one of the following forms: |
| user-userId, user-email, group-groupId, group-email, domain-domain, |
| project-team-projectId, allUsers, allAuthenticatedUsers |
| See: <a class="reference external" href="https://cloud.google.com/storage/docs/access-control/lists#scopes">https://cloud.google.com/storage/docs/access-control/lists#scopes</a></li> |
| <li><strong>role</strong> (<em>str</em>) – The access permission for the entity. |
| Acceptable values are: “OWNER”, “READER”.</li> |
| <li><strong>generation</strong> (<em>str</em>) – (Optional) If present, selects a specific revision of this |
| object (as opposed to the latest version, the default).</li> |
| <li><strong>user_project</strong> (<em>str</em>) – (Optional) The project to be billed for this request. |
| Required for Requester Pays buckets.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after"> |
| <code class="descname">is_updated_after</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>ts</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.is_updated_after"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks if an object is updated in Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The Google cloud storage bucket where the object is.</li> |
| <li><strong>object</strong> (<em>string</em>) – The name of the object to check in the Google cloud |
| storage bucket.</li> |
| <li><strong>ts</strong> (<em>datetime</em>) – The timestamp to check against.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list"> |
| <code class="descname">list</code><span class="sig-paren">(</span><em>bucket</em>, <em>versions=None</em>, <em>maxResults=None</em>, <em>prefix=None</em>, <em>delimiter=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.list"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list" title="Permalink to this definition">¶</a></dt> |
| <dd><p>List all objects from the bucket with the give string prefix in name</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – bucket name</li> |
| <li><strong>versions</strong> (<em>boolean</em>) – if true, list all versions of the objects</li> |
| <li><strong>maxResults</strong> (<em>integer</em>) – max count of items to return in a single page of responses</li> |
| <li><strong>prefix</strong> (<em>string</em>) – prefix string which filters objects whose name begin with |
| this prefix</li> |
| <li><strong>delimiter</strong> (<em>string</em>) – filters objects based on the delimiter (for e.g ‘.csv’)</li> |
| </ul> |
| </td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last">a stream of object names matching the filtering criteria</p> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite"> |
| <code class="descname">rewrite</code><span class="sig-paren">(</span><em>source_bucket</em>, <em>source_object</em>, <em>destination_bucket</em>, <em>destination_object=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.rewrite"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Has the same functionality as copy, except that will work on files |
| over 5 TB, as well as when copying between locations and/or storage |
| classes.</p> |
| <p>destination_object can be omitted, in which case source_object is used.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>source_bucket</strong> (<em>string</em>) – The bucket of the object to copy from.</li> |
| <li><strong>source_object</strong> (<em>string</em>) – The object to copy.</li> |
| <li><strong>destination_bucket</strong> (<em>string</em>) – The destination of the object to copied to.</li> |
| <li><strong>destination_object</strong> – The (renamed) path of the object if given. |
| Can be omitted; then the same name is used.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload"> |
| <code class="descname">upload</code><span class="sig-paren">(</span><em>bucket</em>, <em>object</em>, <em>filename</em>, <em>mime_type='application/octet-stream'</em>, <em>gzip=False</em>, <em>multipart=False</em>, <em>num_retries=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcs_hook.html#GoogleCloudStorageHook.upload"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Uploads a local file to Google Cloud Storage.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>bucket</strong> (<em>string</em>) – The bucket to upload to.</li> |
| <li><strong>object</strong> (<em>string</em>) – The object name to set when uploading the local file.</li> |
| <li><strong>filename</strong> (<em>string</em>) – The local file path to the file to be uploaded.</li> |
| <li><strong>mime_type</strong> (<em>str</em>) – The MIME type to set when uploading the file.</li> |
| <li><strong>gzip</strong> (<em>bool</em>) – Option to compress file for upload</li> |
| <li><strong>multipart</strong> (<em>bool</em><em> or </em><em>int</em>) – If True, the upload will be split into multiple HTTP requests. The |
| default size is 256MiB per request. Pass a number instead of True to |
| specify the request size, which must be a multiple of 262144 (256KiB).</li> |
| <li><strong>num_retries</strong> (<em>int</em>) – The number of times to attempt to re-upload the file (or individual |
| chunks, in the case of multipart uploads). Retries are attempted |
| with exponential backoff.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="gcptransferservicehook"> |
| <h4>GCPTransferServiceHook<a class="headerlink" href="#gcptransferservicehook" title="Permalink to this headline">¶</a></h4> |
| <dl class="class"> |
| <dt id="airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.hooks.gcp_transfer_hook.</code><code class="descname">GCPTransferServiceHook</code><span class="sig-paren">(</span><em>api_version='v1'</em>, <em>gcp_conn_id='google_cloud_default'</em>, <em>delegate_to=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_transfer_hook.html#GCPTransferServiceHook"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook" title="airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook</span></code></a></p> |
| <p>Hook for GCP Storage Transfer Service.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook.get_conn"> |
| <code class="descname">get_conn</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/hooks/gcp_transfer_hook.html#GCPTransferServiceHook.get_conn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.hooks.gcp_transfer_hook.GCPTransferServiceHook.get_conn" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Retrieves connection to Google Storage Transfer service.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Google Storage Transfer service object</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">dict</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| <div class="section" id="google-kubernetes-engine"> |
| <h3>Google Kubernetes Engine<a class="headerlink" href="#google-kubernetes-engine" title="Permalink to this headline">¶</a></h3> |
| <div class="section" id="google-kubernetes-engine-cluster-operators"> |
| <h4>Google Kubernetes Engine Cluster Operators<a class="headerlink" href="#google-kubernetes-engine-cluster-operators" title="Permalink to this headline">¶</a></h4> |
| <ul class="simple"> |
| <li><a class="reference internal" href="#id128"><span class="std std-ref">GKEClusterDeleteOperator</span></a> : Creates a Kubernetes Cluster in Google Cloud Platform</li> |
| <li><a class="reference internal" href="#id129"><span class="std std-ref">GKEPodOperator</span></a> : Deletes a Kubernetes Cluster in Google Cloud Platform</li> |
| </ul> |
| <div class="section" id="gkeclustercreateoperator"> |
| <h5>GKEClusterCreateOperator<a class="headerlink" href="#gkeclustercreateoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="gkeclusterdeleteoperator"> |
| <span id="id128"></span><h5>GKEClusterDeleteOperator<a class="headerlink" href="#gkeclusterdeleteoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| <div class="section" id="gkepodoperator"> |
| <span id="id129"></span><h5>GKEPodOperator<a class="headerlink" href="#gkepodoperator" title="Permalink to this headline">¶</a></h5> |
| </div> |
| </div> |
| <div class="section" id="google-kubernetes-engine-hook"> |
| <span id="id130"></span><h4>Google Kubernetes Engine Hook<a class="headerlink" href="#google-kubernetes-engine-hook" title="Permalink to this headline">¶</a></h4> |
| </div> |
| </div> |
| </div> |
| <div class="section" id="qubole"> |
| <span id="id131"></span><h2>Qubole<a class="headerlink" href="#qubole" title="Permalink to this headline">¶</a></h2> |
| <p>Apache Airflow has a native operator and hooks to talk to <a class="reference external" href="https://qubole.com/">Qubole</a>, |
| which lets you submit your big data jobs directly to Qubole from Apache Airflow.</p> |
| <div class="section" id="quboleoperator"> |
| <h3>QuboleOperator<a class="headerlink" href="#quboleoperator" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.qubole_operator.QuboleOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_operator.</code><code class="descname">QuboleOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_operator.html#QuboleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_operator.QuboleOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>Execute tasks (commands) on QDS (<a class="reference external" href="https://qubole.com">https://qubole.com</a>).</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</td> |
| </tr> |
| </tbody> |
| </table> |
| <dl class="docutils"> |
| <dt>kwargs:</dt> |
| <dd><table class="first docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">command_type:</th><td class="field-body">type of command to be executed, e.g. hivecmd, shellcmd, hadoopcmd</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">tags:</th><td class="field-body">array of tags to be assigned with the command</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">cluster_label:</th><td class="field-body">cluster label on which the command will be executed</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">name:</th><td class="field-body">name to be given to command</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">notify:</th><td class="field-body">whether to send email on command completion or not (default is False)</td> |
| </tr> |
| </tbody> |
| </table> |
| <p><strong>Arguments specific to command types</strong></p> |
| <dl class="last docutils"> |
| <dt>hivecmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">query:</th><td class="field-body">inline query statement</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">sample_size:</th><td class="field-body">size of sample in bytes on which to run query</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>prestocmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">query:</th><td class="field-body">inline query statement</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>hadoopcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">sub_commnad:</th><td class="field-body">must be one these [“jar”, “s3distcp”, “streaming”] followed by |
| 1 or more args</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>shellcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">script:</th><td class="field-body">inline command with args</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">files:</th><td class="field-body">list of files in s3 bucket as file1,file2 format. These files will be |
| copied into the working directory where the qubole command is being |
| executed.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">archives:</th><td class="field-body">list of archives in s3 bucket as archive1,archive2 format. These |
| will be unarchived intothe working directory where the qubole command is |
| being executed</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">parameters:</th><td class="field-body">any extra args which need to be passed to script (only when |
| script_location is supplied)</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>pigcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">script:</th><td class="field-body">inline query statement (latin_statements)</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing pig query</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">parameters:</th><td class="field-body">any extra args which need to be passed to script (only when |
| script_location is supplied</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>sparkcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">program:</th><td class="field-body">the complete Spark Program in Scala, SQL, Command, R, or Python</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">cmdline:</th><td class="field-body">spark-submit command line, all required information must be specify |
| in cmdline itself.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">sql:</th><td class="field-body">inline sql query</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">script_location:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">s3 location containing query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">language:</th><td class="field-body">language of the program, Scala, SQL, Command, R, or Python</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">app_id:</th><td class="field-body">ID of an Spark job server app</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">arguments:</th><td class="field-body">spark-submit command line arguments</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name" colspan="2">user_program_arguments:</th></tr> |
| <tr class="field-even field"><td> </td><td class="field-body">arguments that the user program takes in</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>dbtapquerycmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">db_tap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">query:</th><td class="field-body">inline query statement</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">macros:</th><td class="field-body">macro values which were used in query</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>dbexportcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">mode:</th><td class="field-body">1 (simple), 2 (advance)</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">hive_table:</th><td class="field-body">Name of the hive table</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">partition_spec:</th><td class="field-body">partition specification for Hive table.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">dbtap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">db_table:</th><td class="field-body">name of the db table</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">db_update_mode:</th><td class="field-body">allowinsert or updateonly</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">db_update_keys:</th><td class="field-body">columns used to determine the uniqueness of rows</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">export_dir:</th><td class="field-body">HDFS/S3 location from which data will be exported.</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name" colspan="2">fields_terminated_by:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">hex of the char used as column separator in the dataset</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| <dt>dbimportcmd:</dt> |
| <dd><table class="first last docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">mode:</th><td class="field-body">1 (simple), 2 (advance)</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">hive_table:</th><td class="field-body">Name of the hive table</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">dbtap_id:</th><td class="field-body">data store ID of the target database, in Qubole.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">db_table:</th><td class="field-body">name of the db table</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">where_clause:</th><td class="field-body">where clause, if any</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">parallelism:</th><td class="field-body">number of parallel db connections to use for extracting data</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">extract_query:</th><td class="field-body">SQL query to extract data from db. $CONDITIONS must be part |
| of the where clause.</td> |
| </tr> |
| <tr class="field-even field"><th class="field-name">boundary_query:</th><td class="field-body">Query to be used get range of row IDs to be extracted</td> |
| </tr> |
| <tr class="field-odd field"><th class="field-name">split_column:</th><td class="field-body">Column used as row ID to split data into ranges (mode 2)</td> |
| </tr> |
| </tbody> |
| </table> |
| </dd> |
| </dl> |
| </dd> |
| </dl> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p>Following fields are template-supported : <code class="docutils literal notranslate"><span class="pre">query</span></code>, <code class="docutils literal notranslate"><span class="pre">script_location</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">sub_command</span></code>, <code class="docutils literal notranslate"><span class="pre">script</span></code>, <code class="docutils literal notranslate"><span class="pre">files</span></code>, <code class="docutils literal notranslate"><span class="pre">archives</span></code>, <code class="docutils literal notranslate"><span class="pre">program</span></code>, <code class="docutils literal notranslate"><span class="pre">cmdline</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">sql</span></code>, <code class="docutils literal notranslate"><span class="pre">where_clause</span></code>, <code class="docutils literal notranslate"><span class="pre">extract_query</span></code>, <code class="docutils literal notranslate"><span class="pre">boundary_query</span></code>, <code class="docutils literal notranslate"><span class="pre">macros</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">tags</span></code>, <code class="docutils literal notranslate"><span class="pre">name</span></code>, <code class="docutils literal notranslate"><span class="pre">parameters</span></code>, <code class="docutils literal notranslate"><span class="pre">dbtap_id</span></code>, <code class="docutils literal notranslate"><span class="pre">hive_table</span></code>, <code class="docutils literal notranslate"><span class="pre">db_table</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">split_column</span></code>, <code class="docutils literal notranslate"><span class="pre">note_id</span></code>, <code class="docutils literal notranslate"><span class="pre">db_update_keys</span></code>, <code class="docutils literal notranslate"><span class="pre">export_dir</span></code>, |
| <code class="docutils literal notranslate"><span class="pre">partition_spec</span></code>, <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code>, <code class="docutils literal notranslate"><span class="pre">arguments</span></code>, <code class="docutils literal notranslate"><span class="pre">user_program_arguments</span></code>.</p> |
| <blockquote class="last"> |
| <div>You can also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template driven use cases.</div></blockquote> |
| </div> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">In QuboleOperator there is a default handler for task failures and retries, |
| which generally kills the command running at QDS for the corresponding task |
| instance. You can override this behavior by providing your own failure and retry |
| handler in task definition.</p> |
| </div> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="qubolepartitionsensor"> |
| <h3>QubolePartitionSensor<a class="headerlink" href="#qubolepartitionsensor" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.qubole_sensor.QubolePartitionSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.qubole_sensor.</code><code class="descname">QubolePartitionSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/qubole_sensor.html#QubolePartitionSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.qubole_sensor.QubolePartitionSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.sensors.qubole_sensor.QuboleSensor" title="airflow.contrib.sensors.qubole_sensor.QuboleSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.qubole_sensor.QuboleSensor</span></code></a></p> |
| <p>Wait for a Hive partition to show up in QHS (Qubole Hive Service) |
| and check for its presence via QDS APIs</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</li> |
| <li><strong>data</strong> (<em>a JSON object</em>) – a JSON object containing payload, whose presence needs to be checked. |
| Check this <a class="reference external" href="https://github.com/apache/airflow/blob/master/airflow/contrib/example_dags/example_qubole_sensor.py">example</a> for sample payload |
| structure.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">Both <code class="docutils literal notranslate"><span class="pre">data</span></code> and <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code> fields support templating. You can |
| also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template-driven use cases.</p> |
| </div> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="qubolefilesensor"> |
| <h3>QuboleFileSensor<a class="headerlink" href="#qubolefilesensor" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.sensors.qubole_sensor.QuboleFileSensor"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.sensors.qubole_sensor.</code><code class="descname">QuboleFileSensor</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/sensors/qubole_sensor.html#QuboleFileSensor"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.sensors.qubole_sensor.QuboleFileSensor" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.contrib.sensors.qubole_sensor.QuboleSensor" title="airflow.contrib.sensors.qubole_sensor.QuboleSensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.sensors.qubole_sensor.QuboleSensor</span></code></a></p> |
| <p>Wait for a file or folder to be present in cloud storage |
| and check for its presence via QDS APIs</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</li> |
| <li><strong>data</strong> (<em>a JSON object</em>) – <p>a JSON object containing payload, whose presence needs to be checked |
| Check this <a class="reference external" href="https://github.com/apache/airflow/blob/master/airflow/contrib/example_dags/example_qubole_sensor.py">example</a> for sample payload |
| structure.</p> |
| </li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">Both <code class="docutils literal notranslate"><span class="pre">data</span></code> and <code class="docutils literal notranslate"><span class="pre">qubole_conn_id</span></code> fields support templating. You can |
| also use <code class="docutils literal notranslate"><span class="pre">.txt</span></code> files for template-driven use cases.</p> |
| </div> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="qubolecheckoperator"> |
| <h3>QuboleCheckOperator<a class="headerlink" href="#qubolecheckoperator" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.qubole_check_operator.QuboleCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_check_operator.</code><code class="descname">QuboleCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_check_operator.html#QuboleCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_check_operator.QuboleCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.CheckOperator" title="airflow.operators.check_operator.CheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.CheckOperator</span></code></a>, <a class="reference internal" href="#airflow.contrib.operators.qubole_operator.QuboleOperator" title="airflow.contrib.operators.qubole_operator.QuboleOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.qubole_operator.QuboleOperator</span></code></a></p> |
| <p>Performs checks against Qubole Commands. <code class="docutils literal notranslate"><span class="pre">QuboleCheckOperator</span></code> expects |
| a command that will be executed on QDS. |
| By default, each value on first row of the result of this Qubole Command |
| is evaluated using python <code class="docutils literal notranslate"><span class="pre">bool</span></code> casting. If any of the |
| values return <code class="docutils literal notranslate"><span class="pre">False</span></code>, the check is failed and errors out.</p> |
| <p>Note that Python bool casting evals the following as <code class="docutils literal notranslate"><span class="pre">False</span></code>:</p> |
| <ul class="simple"> |
| <li><code class="docutils literal notranslate"><span class="pre">False</span></code></li> |
| <li><code class="docutils literal notranslate"><span class="pre">0</span></code></li> |
| <li>Empty string (<code class="docutils literal notranslate"><span class="pre">""</span></code>)</li> |
| <li>Empty list (<code class="docutils literal notranslate"><span class="pre">[]</span></code>)</li> |
| <li>Empty dictionary or set (<code class="docutils literal notranslate"><span class="pre">{}</span></code>)</li> |
| </ul> |
| <p>Given a query like <code class="docutils literal notranslate"><span class="pre">SELECT</span> <span class="pre">COUNT(*)</span> <span class="pre">FROM</span> <span class="pre">foo</span></code>, it will fail only if |
| the count <code class="docutils literal notranslate"><span class="pre">==</span> <span class="pre">0</span></code>. You can craft much more complex query that could, |
| for instance, check that the table has the same number of rows as |
| the source table upstream, or that the count of today’s partition is |
| greater than yesterday’s partition, or that a set of metrics are less |
| than 3 standard deviation for the 7 day average.</p> |
| <p>This operator can be used as a data quality check in your pipeline, and |
| depending on where you put it in your DAG, you have the choice to |
| stop the critical path, preventing from |
| publishing dubious data, or on the side and receive email alerts |
| without stopping the progress of the DAG.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</td> |
| </tr> |
| </tbody> |
| </table> |
| <p>kwargs:</p> |
| <blockquote> |
| <div><p>Arguments specific to Qubole command can be referred from QuboleOperator docs.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name" colspan="2">results_parser_callable:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">This is an optional parameter to |
| extend the flexibility of parsing the results of Qubole |
| command to the users. This is a python callable which |
| can hold the logic to parse list of rows returned by Qubole command. |
| By default, only the values on first row are used for performing checks. |
| This callable should return a list of records on |
| which the checks have to be performed.</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">All fields in common with template fields of |
| QuboleOperator and CheckOperator are template-supported.</p> |
| </div> |
| </dd></dl> |
| |
| </div> |
| <div class="section" id="qubolevaluecheckoperator"> |
| <h3>QuboleValueCheckOperator<a class="headerlink" href="#qubolevaluecheckoperator" title="Permalink to this headline">¶</a></h3> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.qubole_check_operator.QuboleValueCheckOperator"> |
| <em class="property">class </em><code class="descclassname">airflow.contrib.operators.qubole_check_operator.</code><code class="descname">QuboleValueCheckOperator</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/airflow/contrib/operators/qubole_check_operator.html#QuboleValueCheckOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.qubole_check_operator.QuboleValueCheckOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="code.html#airflow.operators.check_operator.ValueCheckOperator" title="airflow.operators.check_operator.ValueCheckOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.operators.check_operator.ValueCheckOperator</span></code></a>, <a class="reference internal" href="#airflow.contrib.operators.qubole_operator.QuboleOperator" title="airflow.contrib.operators.qubole_operator.QuboleOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.qubole_operator.QuboleOperator</span></code></a></p> |
| <p>Performs a simple value check using Qubole command. |
| By default, each value on the first row of this |
| Qubole command is compared with a pre-defined value. |
| The check fails and errors out if the output of the command |
| is not within the permissible limit of expected value.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple"> |
| <li><strong>qubole_conn_id</strong> (<em>str</em>) – Connection id which consists of qds auth_token</li> |
| <li><strong>pass_value</strong> (<em>str/int/float</em>) – Expected value of the query results.</li> |
| <li><strong>tolerance</strong> (<em>int/float</em>) – Defines the permissible pass_value range, for example if |
| tolerance is 2, the Qubole command output can be anything between |
| -2*pass_value and 2*pass_value, without the operator erring out.</li> |
| </ul> |
| </td> |
| </tr> |
| </tbody> |
| </table> |
| <p>kwargs:</p> |
| <blockquote> |
| <div><p>Arguments specific to Qubole command can be referred from QuboleOperator docs.</p> |
| <table class="docutils field-list" frame="void" rules="none"> |
| <col class="field-name" /> |
| <col class="field-body" /> |
| <tbody valign="top"> |
| <tr class="field-odd field"><th class="field-name" colspan="2">results_parser_callable:</th></tr> |
| <tr class="field-odd field"><td> </td><td class="field-body">This is an optional parameter to |
| extend the flexibility of parsing the results of Qubole |
| command to the users. This is a python callable which |
| can hold the logic to parse list of rows returned by Qubole command. |
| By default, only the values on first row are used for performing checks. |
| This callable should return a list of records on |
| which the checks have to be performed.</td> |
| </tr> |
| </tbody> |
| </table> |
| </div></blockquote> |
| <div class="admonition note"> |
| <p class="first admonition-title">Note</p> |
| <p class="last">All fields in common with template fields of |
| QuboleOperator and ValueCheckOperator are template-supported.</p> |
| </div> |
| </dd></dl> |
| |
| </div> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="metrics.html" class="btn btn-neutral float-right" title="Metrics" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a> |
| |
| |
| <a href="api.html" class="btn btn-neutral" title="Experimental Rest API" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script> |
| <script type="text/javascript" src="_static/jquery.js"></script> |
| <script type="text/javascript" src="_static/underscore.js"></script> |
| <script type="text/javascript" src="_static/doctools.js"></script> |
| <script type="text/javascript" src="_static/language_data.js"></script> |
| |
| |
| |
| |
| <script type="text/javascript" src="_static/js/theme.js"></script> |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| </body> |
| </html> |