| |
| |
| <!-- |
| Javascript to render AIRFLOW-XXX and PR references in text |
| as HTML links. |
| |
| Overrides extrahead block from sphinx_rtd_theme |
| https://www.sphinx-doc.org/en/master/templating.html |
| --> |
| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>airflow.contrib.operators.dataproc_operator — Airflow Documentation</title> |
| |
| |
| |
| |
| <link rel="shortcut icon" href="../../../../../_static/pin_32.png"/> |
| |
| |
| |
| |
| |
| <script type="text/javascript" src="../../../../../_static/js/modernizr.min.js"></script> |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="../../../../../" src="../../../../../_static/documentation_options.js"></script> |
| <script type="text/javascript" src="../../../../../_static/jquery.js"></script> |
| <script type="text/javascript" src="../../../../../_static/underscore.js"></script> |
| <script type="text/javascript" src="../../../../../_static/doctools.js"></script> |
| <script type="text/javascript" src="../../../../../_static/language_data.js"></script> |
| |
| <script type="text/javascript" src="../../../../../_static/js/theme.js"></script> |
| |
| |
| |
| |
| <link rel="stylesheet" href="../../../../../_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../../../_static/pygments.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../../../_static/graphviz.css" type="text/css" /> |
| <link rel="index" title="Index" href="../../../../../genindex.html" /> |
| <link rel="search" title="Search" href="../../../../../search.html" /> |
| <link rel="next" title="airflow.contrib.operators.datastore_export_operator" href="../datastore_export_operator/index.html" /> |
| <link rel="prev" title="airflow.contrib.operators.dataflow_operator" href="../dataflow_operator/index.html" /> |
| |
| <script> |
| document.addEventListener('DOMContentLoaded', function() { |
| var el = document.getElementById('changelog'); |
| if (el !== null ) { |
| // [AIRFLOW-...] |
| el.innerHTML = el.innerHTML.replace( |
| /\[(AIRFLOW-[\d]+)\]/g, |
| `<a href="https://issues.apache.org/jira/browse/$1">[$1]</a>` |
| ); |
| // (#...) |
| el.innerHTML = el.innerHTML.replace( |
| /\(#([\d]+)\)/g, |
| `<a href="https://github.com/apache/airflow/pull/$1">(#$1)</a>` |
| ); |
| }; |
| }) |
| </script> |
| <script type="text/javascript"> |
| var _gaq = _gaq || []; |
| _gaq.push(['_setAccount', 'UA-140539454-1']); |
| _gaq.push(['_trackPageview']); |
| </script> |
| <style> |
| .example-header { |
| position: relative; |
| background: #9AAA7A; |
| padding: 8px 16px; |
| margin-bottom: 0; |
| } |
| .example-header--with-button { |
| padding-right: 166px; |
| } |
| .example-header:after{ |
| content: ''; |
| display: table; |
| clear: both; |
| } |
| .example-title { |
| display:block; |
| padding: 4px; |
| margin-right: 16px; |
| color: white; |
| overflow-x: auto; |
| } |
| .example-header-button { |
| top: 8px; |
| right: 16px; |
| position: absolute; |
| } |
| .example-header + .highlight-python { |
| margin-top: 0 !important; |
| } |
| .viewcode-button { |
| display: inline-block; |
| padding: 8px 16px; |
| border: 0; |
| margin: 0; |
| outline: 0; |
| border-radius: 2px; |
| -webkit-box-shadow: 0 3px 5px 0 rgba(0,0,0,.3); |
| box-shadow: 0 3px 6px 0 rgba(0,0,0,.3); |
| color: #404040; |
| background-color: #e7e7e7; |
| cursor: pointer; |
| font-size: 16px; |
| font-weight: 500; |
| line-height: 1; |
| text-decoration: none; |
| text-overflow: ellipsis; |
| overflow: hidden; |
| text-transform: uppercase; |
| -webkit-transition: background-color .2s; |
| transition: background-color .2s; |
| vertical-align: middle; |
| white-space: nowrap; |
| } |
| .viewcode-button:visited { |
| color: #404040; |
| } |
| .viewcode-button:hover, .viewcode-button:focus { |
| color: #404040; |
| background-color: #d6d6d6; |
| } |
| </style> |
| |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="../../../../../index.html" class="icon icon-home"> Airflow |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| <div class="version"> |
| 1.10.6 |
| </div> |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="../../../../../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../project.html">Project</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../license.html">License</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../start.html">Quick Start</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../installation.html">Installation</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../tutorial.html">Tutorial</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../howto/index.html">How-to Guides</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../ui.html">UI / Screenshots</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../concepts.html">Concepts</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../profiling.html">Data Profiling</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../cli.html">Command Line Interface Reference</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../scheduler.html">Scheduling & Triggers</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../plugins.html">Plugins</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../security.html">Security</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../timezone.html">Time zones</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../api.html">REST API Reference</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../integration.html">Integration</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../metrics.html">Metrics</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../errors.html">Error Tracking</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../kubernetes.html">Kubernetes</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../lineage.html">Lineage</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../changelog.html">Changelog</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../faq.html">FAQ</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../macros.html">Macros reference</a></li> |
| <li class="toctree-l1 current"><a class="reference internal" href="../../../../index.html">Python API Reference</a><ul class="current"> |
| <li class="toctree-l2 current"><a class="reference internal" href="../../../../index.html#operators">Operators</a><ul class="current"> |
| <li class="toctree-l3"><a class="reference internal" href="../../../../index.html#baseoperator">BaseOperator</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="../../../../index.html#basesensoroperator">BaseSensorOperator</a></li> |
| <li class="toctree-l3 current"><a class="reference internal" href="../../../../index.html#operators-packages">Operators packages</a><ul class="current"> |
| <li class="toctree-l4"><a class="reference internal" href="../../../operators/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.operators</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="../../../sensors/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.sensors</span></code></a></li> |
| <li class="toctree-l4 current"><a class="reference internal" href="../index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.operators</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="../../sensors/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.sensors</span></code></a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#hooks">Hooks</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#executors">Executors</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#models">Models</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#core-and-community-package">Core and community package</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../../privacy_notice.html">Privacy Notice</a></li> |
| </ul> |
| <p class="caption"><span class="caption-text">References</span></p> |
| <ul class="current"> |
| <li class="toctree-l1 current"><a class="reference internal" href="../../../../index.html">Python API</a><ul class="current"> |
| <li class="toctree-l2 current"><a class="reference internal" href="../../../../index.html#operators">Operators</a><ul class="current"> |
| <li class="toctree-l3"><a class="reference internal" href="../../../../index.html#baseoperator">BaseOperator</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="../../../../index.html#basesensoroperator">BaseSensorOperator</a></li> |
| <li class="toctree-l3 current"><a class="reference internal" href="../../../../index.html#operators-packages">Operators packages</a><ul class="current"> |
| <li class="toctree-l4"><a class="reference internal" href="../../../operators/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.operators</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="../../../sensors/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.sensors</span></code></a></li> |
| <li class="toctree-l4 current"><a class="reference internal" href="../index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.operators</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="../../sensors/index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.sensors</span></code></a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#hooks">Hooks</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#executors">Executors</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#models">Models</a></li> |
| <li class="toctree-l2"><a class="reference internal" href="../../../../index.html#core-and-community-package">Core and community package</a></li> |
| </ul> |
| </li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../../../../../index.html">Airflow</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="../../../../../index.html">Docs</a> »</li> |
| |
| <li><a href="../../../../index.html">Python API Reference</a> »</li> |
| |
| <li><a href="../index.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.operators</span></code></a> »</li> |
| |
| <li><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator</span></code></li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| |
| <a href="../../../../../_sources/_api/airflow/contrib/operators/dataproc_operator/index.rst.txt" rel="nofollow"> View page source</a> |
| |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <div class="section" id="module-airflow.contrib.operators.dataproc_operator"> |
| <span id="airflow-contrib-operators-dataproc-operator"></span><h1><a class="reference internal" href="#module-airflow.contrib.operators.dataproc_operator" title="airflow.contrib.operators.dataproc_operator"><code class="xref py py-mod docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator</span></code></a><a class="headerlink" href="#module-airflow.contrib.operators.dataproc_operator" title="Permalink to this headline">¶</a></h1> |
| <p><p>This module contains Google Dataproc operators.</p> |
| </p> |
| <div class="section" id="module-contents"> |
| <h2>Module Contents<a class="headerlink" href="#module-contents" title="Permalink to this headline">¶</a></h2> |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataprocOperationBaseOperator</code><span class="sig-paren">(</span><em class="sig-param">project_id</em>, <em class="sig-param">region='global'</em>, <em class="sig-param">gcp_conn_id='google_cloud_default'</em>, <em class="sig-param">delegate_to=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocOperationBaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="../../../models/index.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>The base class for operators that poll on a Dataproc Operation.</p> |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocOperationBaseOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator.start"> |
| <code class="sig-name descname">start</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocOperationBaseOperator.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator.start" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataprocClusterCreateOperator</code><span class="sig-paren">(</span><em class="sig-param">project_id</em>, <em class="sig-param">cluster_name</em>, <em class="sig-param">num_workers</em>, <em class="sig-param">zone=None</em>, <em class="sig-param">network_uri=None</em>, <em class="sig-param">subnetwork_uri=None</em>, <em class="sig-param">internal_ip_only=None</em>, <em class="sig-param">tags=None</em>, <em class="sig-param">storage_bucket=None</em>, <em class="sig-param">init_actions_uris=None</em>, <em class="sig-param">init_action_timeout='10m'</em>, <em class="sig-param">metadata=None</em>, <em class="sig-param">custom_image=None</em>, <em class="sig-param">image_version=None</em>, <em class="sig-param">autoscaling_policy=None</em>, <em class="sig-param">properties=None</em>, <em class="sig-param">num_masters=1</em>, <em class="sig-param">master_machine_type='n1-standard-4'</em>, <em class="sig-param">master_disk_type='pd-standard'</em>, <em class="sig-param">master_disk_size=500</em>, <em class="sig-param">worker_machine_type='n1-standard-4'</em>, <em class="sig-param">worker_disk_type='pd-standard'</em>, <em class="sig-param">worker_disk_size=500</em>, <em class="sig-param">num_preemptible_workers=0</em>, <em class="sig-param">labels=None</em>, <em class="sig-param">region='global'</em>, <em class="sig-param">service_account=None</em>, <em class="sig-param">service_account_scopes=None</em>, <em class="sig-param">idle_delete_ttl=None</em>, <em class="sig-param">auto_delete_time=None</em>, <em class="sig-param">auto_delete_ttl=None</em>, <em class="sig-param">customer_managed_key=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator</span></code></a></p> |
| <p>Create a new cluster on Google Cloud Dataproc. The operator will wait until the |
| creation is successful or an error occurs in the creation process.</p> |
| <p>The parameters allow to configure the cluster. Please refer to</p> |
| <p><a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters</a></p> |
| <p>for a detailed explanation on the different parameters. Most of the configuration |
| parameters detailed in the link are available as a parameter to this operator.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>cluster_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The name of the DataProc cluster to create. (templated)</p></li> |
| <li><p><strong>project_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The ID of the google cloud project in which |
| to create the cluster. (templated)</p></li> |
| <li><p><strong>num_workers</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The # of workers to spin up. If set to zero will |
| spin up cluster in a single node mode</p></li> |
| <li><p><strong>storage_bucket</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The storage bucket to use, setting to None lets dataproc |
| generate a custom one for you</p></li> |
| <li><p><strong>init_actions_uris</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em>]</em>) – List of GCS uri’s containing |
| dataproc initialization scripts</p></li> |
| <li><p><strong>init_action_timeout</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Amount of time executable scripts in |
| init_actions_uris has to complete</p></li> |
| <li><p><strong>metadata</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – dict of key-value google compute engine metadata entries |
| to add to all instances</p></li> |
| <li><p><strong>image_version</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – the version of software inside the Dataproc cluster</p></li> |
| <li><p><strong>custom_image</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – custom Dataproc image for more info see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/guides/dataproc-images">https://cloud.google.com/dataproc/docs/guides/dataproc-images</a></p></li> |
| <li><p><strong>autoscaling_policy</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The autoscaling policy used by the cluster. Only resource names |
| including projectid and location (region) are valid. Example: |
| <code class="docutils literal notranslate"><span class="pre">projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]</span></code></p></li> |
| <li><p><strong>properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – dict of properties to set on |
| config files (e.g. spark-defaults.conf), see |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig">https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig</a></p></li> |
| <li><p><strong>num_masters</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The # of master nodes to spin up</p></li> |
| <li><p><strong>master_machine_type</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Compute engine machine type to use for the master node</p></li> |
| <li><p><strong>master_disk_type</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Type of the boot disk for the master node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</p></li> |
| <li><p><strong>master_disk_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – Disk size for the master node</p></li> |
| <li><p><strong>worker_machine_type</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Compute engine machine type to use for the worker nodes</p></li> |
| <li><p><strong>worker_disk_type</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Type of the boot disk for the worker node |
| (default is <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code>). |
| Valid values: <code class="docutils literal notranslate"><span class="pre">pd-ssd</span></code> (Persistent Disk Solid State Drive) or |
| <code class="docutils literal notranslate"><span class="pre">pd-standard</span></code> (Persistent Disk Hard Disk Drive).</p></li> |
| <li><p><strong>worker_disk_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – Disk size for the worker nodes</p></li> |
| <li><p><strong>num_preemptible_workers</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The # of preemptible worker nodes to spin up</p></li> |
| <li><p><strong>labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – dict of labels to add to the cluster</p></li> |
| <li><p><strong>zone</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The zone where the cluster will be located. Set to None to auto-zone. (templated)</p></li> |
| <li><p><strong>network_uri</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The network uri to be used for machine communication, cannot be |
| specified with subnetwork_uri</p></li> |
| <li><p><strong>subnetwork_uri</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The subnetwork uri to be used for machine communication, |
| cannot be specified with network_uri</p></li> |
| <li><p><strong>internal_ip_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – If true, all instances in the cluster will only |
| have internal IP addresses. This can only be enabled for subnetwork |
| enabled networks</p></li> |
| <li><p><strong>tags</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em>]</em>) – The GCE tags to add to all instances</p></li> |
| <li><p><strong>region</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – leave as ‘global’, might become relevant in the future. (templated)</p></li> |
| <li><p><strong>gcp_conn_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The connection ID to use connecting to Google Cloud Platform.</p></li> |
| <li><p><strong>delegate_to</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</p></li> |
| <li><p><strong>service_account</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The service account of the dataproc instances.</p></li> |
| <li><p><strong>service_account_scopes</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em>]</em>) – The URIs of service account scopes to be included.</p></li> |
| <li><p><strong>idle_delete_ttl</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The longest duration that cluster would keep alive while |
| staying idle. Passing this threshold will cause cluster to be auto-deleted. |
| A duration in seconds.</p></li> |
| <li><p><strong>auto_delete_time</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.datetime" title="(in Python v3.8)"><em>datetime.datetime</em></a>) – The time when cluster will be auto-deleted.</p></li> |
| <li><p><strong>auto_delete_ttl</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The life duration of cluster, the cluster will be |
| auto-deleted at the end of this duration. |
| A duration in seconds. (If auto_delete_time is set this parameter will be ignored)</p></li> |
| <li><p><strong>customer_managed_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The customer-managed key used for disk encryption |
| <code class="docutils literal notranslate"><span class="pre">projects/[PROJECT_STORING_KEYS]/locations/[LOCATION]/keyRings/[KEY_RING_NAME]/cryptoKeys/[KEY_NAME]</span></code> # noqa # pylint: disable=line-too-long</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['cluster_name', 'project_id', 'zone', 'region']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._get_init_action_timeout"> |
| <code class="sig-name descname">_get_init_action_timeout</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator._get_init_action_timeout"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._get_init_action_timeout" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._build_gce_cluster_config"> |
| <code class="sig-name descname">_build_gce_cluster_config</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">cluster_data</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator._build_gce_cluster_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._build_gce_cluster_config" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._build_lifecycle_config"> |
| <code class="sig-name descname">_build_lifecycle_config</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">cluster_data</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator._build_lifecycle_config"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._build_lifecycle_config" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._build_cluster_data"> |
| <code class="sig-name descname">_build_cluster_data</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator._build_cluster_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator._build_cluster_data" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.start"> |
| <code class="sig-name descname">start</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterCreateOperator.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterCreateOperator.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Create a new cluster on Google Cloud Dataproc.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataprocClusterScaleOperator</code><span class="sig-paren">(</span><em class="sig-param">cluster_name</em>, <em class="sig-param">project_id</em>, <em class="sig-param">region='global'</em>, <em class="sig-param">num_workers=2</em>, <em class="sig-param">num_preemptible_workers=0</em>, <em class="sig-param">graceful_decommission_timeout=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator</span></code></a></p> |
| <p>Scale, up or down, a cluster on Google Cloud Dataproc. |
| The operator will wait until the cluster is re-scaled.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataprocClusterScaleOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_scale'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="s1">'my-project'</span><span class="p">,</span> |
| <span class="n">cluster_name</span><span class="o">=</span><span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="n">num_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">num_preemptible_workers</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">graceful_decommission_timeout</span><span class="o">=</span><span class="s1">'1h'</span><span class="p">,</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p>For more detail on about scaling clusters have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters">https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters</a></p> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>cluster_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The name of the cluster to scale. (templated)</p></li> |
| <li><p><strong>project_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</p></li> |
| <li><p><strong>region</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The region for the dataproc cluster. (templated)</p></li> |
| <li><p><strong>gcp_conn_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The connection ID to use connecting to Google Cloud Platform.</p></li> |
| <li><p><strong>num_workers</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The new number of workers</p></li> |
| <li><p><strong>num_preemptible_workers</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The new number of preemptible workers</p></li> |
| <li><p><strong>graceful_decommission_timeout</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Timeout for graceful YARN decomissioning. |
| Maximum value is 1d</p></li> |
| <li><p><strong>delegate_to</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['cluster_name', 'project_id', 'region']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator._build_scale_cluster_data"> |
| <code class="sig-name descname">_build_scale_cluster_data</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator._build_scale_cluster_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator._build_scale_cluster_data" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator._get_graceful_decommission_timeout"> |
| <em class="property">static </em><code class="sig-name descname">_get_graceful_decommission_timeout</code><span class="sig-paren">(</span><em class="sig-param">timeout</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator._get_graceful_decommission_timeout"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator._get_graceful_decommission_timeout" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.start"> |
| <code class="sig-name descname">start</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterScaleOperator.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterScaleOperator.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Scale, up or down, a cluster on Google Cloud Dataproc.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataprocClusterDeleteOperator</code><span class="sig-paren">(</span><em class="sig-param">cluster_name</em>, <em class="sig-param">project_id</em>, <em class="sig-param">region='global'</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator</span></code></a></p> |
| <p>Delete a cluster on Google Cloud Dataproc. The operator will wait until the |
| cluster is destroyed.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>cluster_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The name of the cluster to delete. (templated)</p></li> |
| <li><p><strong>project_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The ID of the google cloud project in which |
| the cluster runs. (templated)</p></li> |
| <li><p><strong>region</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – leave as ‘global’, might become relevant in the future. (templated)</p></li> |
| <li><p><strong>gcp_conn_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The connection ID to use connecting to Google Cloud Platform.</p></li> |
| <li><p><strong>delegate_to</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['cluster_name', 'project_id', 'region']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.start"> |
| <code class="sig-name descname">start</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocClusterDeleteOperator.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocClusterDeleteOperator.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Delete a cluster on Google Cloud Dataproc.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataProcJobBaseOperator</code><span class="sig-paren">(</span><em class="sig-param">job_name='{{task.task_id}}_{{ds_nodash}}'</em>, <em class="sig-param">cluster_name='cluster-1'</em>, <em class="sig-param">dataproc_properties=None</em>, <em class="sig-param">dataproc_jars=None</em>, <em class="sig-param">gcp_conn_id='google_cloud_default'</em>, <em class="sig-param">delegate_to=None</em>, <em class="sig-param">labels=None</em>, <em class="sig-param">region='global'</em>, <em class="sig-param">job_error_states=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcJobBaseOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="../../../models/index.html#airflow.models.BaseOperator" title="airflow.models.BaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.models.BaseOperator</span></code></a></p> |
| <p>The base class for operators that launch job on DataProc.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>job_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The job name used in the DataProc cluster. This name by default |
| is the task_id appended with the execution data, but can be templated. The |
| name will always be appended with a random number to avoid name clashes.</p></li> |
| <li><p><strong>cluster_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The name of the DataProc cluster.</p></li> |
| <li><p><strong>dataproc_properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map for the Hive properties. Ideal to put in |
| default arguments (templated)</p></li> |
| <li><p><strong>dataproc_jars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop |
| MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. (templated)</p></li> |
| <li><p><strong>gcp_conn_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The connection ID to use connecting to Google Cloud Platform.</p></li> |
| <li><p><strong>delegate_to</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</p></li> |
| <li><p><strong>labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – The labels to associate with this job. Label keys must contain 1 to 63 characters, |
| and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 |
| characters, and must conform to RFC 1035. No more than 32 labels can be associated with a job.</p></li> |
| <li><p><strong>region</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The specified region where the dataproc cluster is created.</p></li> |
| <li><p><strong>job_error_states</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#set" title="(in Python v3.8)"><em>set</em></a>) – Job states that should be considered error states. |
| Any states in this set will result in an error being raised and failure of the |
| task. Eg, if the <code class="docutils literal notranslate"><span class="pre">CANCELLED</span></code> state should also be considered a task failure, |
| pass in <code class="docutils literal notranslate"><span class="pre">{'ERROR',</span> <span class="pre">'CANCELLED'}</span></code>. Possible values are currently only |
| <code class="docutils literal notranslate"><span class="pre">'ERROR'</span></code> and <code class="docutils literal notranslate"><span class="pre">'CANCELLED'</span></code>, but could change in the future. Defaults to |
| <code class="docutils literal notranslate"><span class="pre">{'ERROR'}</span></code>.</p></li> |
| </ul> |
| </dd> |
| <dt class="field-even">Variables</dt> |
| <dd class="field-even"><p><strong>dataproc_job_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The actual “jobId” as submitted to the Dataproc API. |
| This is useful for identifying or linking to the job in the Google Cloud Console |
| Dataproc UI, as the actual “jobId” submitted to the Dataproc API is appended with |
| an 8 character random string.</p> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.job_type"> |
| <code class="sig-name descname">job_type</code><em class="property"> =</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcJobBaseOperator.job_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.job_type" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.create_job_template"> |
| <code class="sig-name descname">create_job_template</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcJobBaseOperator.create_job_template"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.create_job_template" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Initialize <cite>self.job_template</cite> with default values</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcJobBaseOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Build <cite>self.job</cite> based on the job template, and submit it. |
| :raises AirflowException if no template has been initialized (see create_job_template)</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.on_kill"> |
| <code class="sig-name descname">on_kill</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcJobBaseOperator.on_kill"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator.on_kill" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Callback called when the operator is killed. |
| Cancel any running job.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataProcPigOperator</code><span class="sig-paren">(</span><em class="sig-param">query=None</em>, <em class="sig-param">query_uri=None</em>, <em class="sig-param">variables=None</em>, <em class="sig-param">dataproc_pig_properties=None</em>, <em class="sig-param">dataproc_pig_jars=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator</span></code></a></p> |
| <p>Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation |
| will be passed to the cluster.</p> |
| <p>It’s a good practice to define dataproc_* parameters in the default_args of the dag |
| like the cluster name and UDFs.</p> |
| <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">default_args</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'cluster_name'</span><span class="p">:</span> <span class="s1">'cluster-1'</span><span class="p">,</span> |
| <span class="s1">'dataproc_pig_jars'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="s1">'gs://example/udf/jar/datafu/1.2.0/datafu.jar'</span><span class="p">,</span> |
| <span class="s1">'gs://example/udf/jar/gpig/1.2/gpig.jar'</span> |
| <span class="p">]</span> |
| <span class="p">}</span> |
| </pre></div> |
| </div> |
| <p>You can pass a pig script as string or file reference. Use variables to pass on |
| variables for the pig script to be resolved on the cluster or use the parameters to |
| be resolved in the script as template parameters.</p> |
| <p><strong>Example</strong>:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">t1</span> <span class="o">=</span> <span class="n">DataProcPigOperator</span><span class="p">(</span> |
| <span class="n">task_id</span><span class="o">=</span><span class="s1">'dataproc_pig'</span><span class="p">,</span> |
| <span class="n">query</span><span class="o">=</span><span class="s1">'a_pig_script.pig'</span><span class="p">,</span> |
| <span class="n">variables</span><span class="o">=</span><span class="p">{</span><span class="s1">'out'</span><span class="p">:</span> <span class="s1">'gs://example/output/{{ds}}'</span><span class="p">},</span> |
| <span class="n">dag</span><span class="o">=</span><span class="n">dag</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p>For more detail on about job submission have a look at the reference: |
| <a class="reference external" href="https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs">https://cloud.google.com/dataproc/reference/rest/v1/projects.regions.jobs</a></p> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>query</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The query or reference to the query |
| file (pg or pig extension). (templated)</p></li> |
| <li><p><strong>query_uri</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The HCFS URI of the script that contains the Pig queries.</p></li> |
| <li><p><strong>variables</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map of named parameters for the query. (templated)</p></li> |
| <li><p><strong>dataproc_pig_properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map for the Pig properties. Ideal to put in |
| default arguments (templated)</p></li> |
| <li><p><strong>dataproc_pig_jars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop |
| MapReduce (MR) tasks. Can contain Pig UDFs. (templated)</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['query', 'variables', 'job_name', 'cluster_name', 'region', 'dataproc_jars', 'dataproc_properties']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator.template_ext"> |
| <code class="sig-name descname">template_ext</code><em class="property"> = ['.pg', '.pig']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator.template_ext"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator.template_ext" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator.ui_color"> |
| <code class="sig-name descname">ui_color</code><em class="property"> = #0273d4</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator.ui_color"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator.ui_color" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator.job_type"> |
| <code class="sig-name descname">job_type</code><em class="property"> = pigJob</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator.job_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator.job_type" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPigOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPigOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPigOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataProcHiveOperator</code><span class="sig-paren">(</span><em class="sig-param">query=None</em>, <em class="sig-param">query_uri=None</em>, <em class="sig-param">variables=None</em>, <em class="sig-param">dataproc_hive_properties=None</em>, <em class="sig-param">dataproc_hive_jars=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator</span></code></a></p> |
| <p>Start a Hive query Job on a Cloud DataProc cluster.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>query</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The query or reference to the query file (q extension).</p></li> |
| <li><p><strong>query_uri</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The HCFS URI of the script that contains the Hive queries.</p></li> |
| <li><p><strong>variables</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map of named parameters for the query.</p></li> |
| <li><p><strong>dataproc_hive_properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map for the Pig properties. Ideal to put in |
| default arguments (templated)</p></li> |
| <li><p><strong>dataproc_hive_jars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop |
| MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. (templated)</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['query', 'variables', 'job_name', 'cluster_name', 'region', 'dataproc_jars', 'dataproc_properties']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.template_ext"> |
| <code class="sig-name descname">template_ext</code><em class="property"> = ['.q', '.hql']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator.template_ext"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.template_ext" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.ui_color"> |
| <code class="sig-name descname">ui_color</code><em class="property"> = #0273d4</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator.ui_color"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.ui_color" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.job_type"> |
| <code class="sig-name descname">job_type</code><em class="property"> = hiveJob</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator.job_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.job_type" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHiveOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHiveOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataProcSparkSqlOperator</code><span class="sig-paren">(</span><em class="sig-param">query=None</em>, <em class="sig-param">query_uri=None</em>, <em class="sig-param">variables=None</em>, <em class="sig-param">dataproc_spark_properties=None</em>, <em class="sig-param">dataproc_spark_jars=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator</span></code></a></p> |
| <p>Start a Spark SQL query Job on a Cloud DataProc cluster.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>query</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The query or reference to the query file (q extension). (templated)</p></li> |
| <li><p><strong>query_uri</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The HCFS URI of the script that contains the SQL queries.</p></li> |
| <li><p><strong>variables</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map of named parameters for the query. (templated)</p></li> |
| <li><p><strong>dataproc_spark_properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map for the Pig properties. Ideal to put in |
| default arguments (templated)</p></li> |
| <li><p><strong>dataproc_spark_jars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – HCFS URIs of jar files to be added to the Spark CLASSPATH. (templated)</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['query', 'variables', 'job_name', 'cluster_name', 'region', 'dataproc_jars', 'dataproc_properties']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.template_ext"> |
| <code class="sig-name descname">template_ext</code><em class="property"> = ['.q']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator.template_ext"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.template_ext" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.ui_color"> |
| <code class="sig-name descname">ui_color</code><em class="property"> = #0273d4</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator.ui_color"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.ui_color" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.job_type"> |
| <code class="sig-name descname">job_type</code><em class="property"> = sparkSqlJob</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator.job_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.job_type" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkSqlOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkSqlOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataProcSparkOperator</code><span class="sig-paren">(</span><em class="sig-param">main_jar=None</em>, <em class="sig-param">main_class=None</em>, <em class="sig-param">arguments=None</em>, <em class="sig-param">archives=None</em>, <em class="sig-param">files=None</em>, <em class="sig-param">dataproc_spark_properties=None</em>, <em class="sig-param">dataproc_spark_jars=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator</span></code></a></p> |
| <p>Start a Spark Job on a Cloud DataProc cluster.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>main_jar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The HCFS URI of the jar file that contains the main class |
| (use this or the main_class, not both together).</p></li> |
| <li><p><strong>main_class</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Name of the job class. (use this or the main_jar, not both |
| together).</p></li> |
| <li><p><strong>arguments</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – Arguments for the job. (templated)</p></li> |
| <li><p><strong>archives</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</p></li> |
| <li><p><strong>files</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – List of files to be copied to the working directory</p></li> |
| <li><p><strong>dataproc_spark_properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map for the Pig properties. Ideal to put in |
| default arguments (templated)</p></li> |
| <li><p><strong>dataproc_spark_jars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – HCFS URIs of files to be copied to the working directory of Spark drivers |
| and distributed tasks. Useful for naively parallel tasks. (templated)</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['arguments', 'job_name', 'cluster_name', 'region', 'dataproc_jars', 'dataproc_properties']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.ui_color"> |
| <code class="sig-name descname">ui_color</code><em class="property"> = #0273d4</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator.ui_color"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.ui_color" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.job_type"> |
| <code class="sig-name descname">job_type</code><em class="property"> = sparkJob</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator.job_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.job_type" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcSparkOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcSparkOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataProcHadoopOperator</code><span class="sig-paren">(</span><em class="sig-param">main_jar=None</em>, <em class="sig-param">main_class=None</em>, <em class="sig-param">arguments=None</em>, <em class="sig-param">archives=None</em>, <em class="sig-param">files=None</em>, <em class="sig-param">dataproc_hadoop_properties=None</em>, <em class="sig-param">dataproc_hadoop_jars=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator</span></code></a></p> |
| <p>Start a Hadoop Job on a Cloud DataProc cluster.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>main_jar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The HCFS URI of the jar file containing the main class |
| (use this or the main_class, not both together).</p></li> |
| <li><p><strong>main_class</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – Name of the job class. (use this or the main_jar, not both |
| together).</p></li> |
| <li><p><strong>arguments</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – Arguments for the job. (templated)</p></li> |
| <li><p><strong>archives</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</p></li> |
| <li><p><strong>files</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – List of files to be copied to the working directory</p></li> |
| <li><p><strong>dataproc_hadoop_properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map for the Pig properties. Ideal to put in |
| default arguments (tempplated)</p></li> |
| <li><p><strong>dataproc_hadoop_jars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – Jar file URIs to add to the CLASSPATHs of the Hadoop driver and |
| tasks. (templated)</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['arguments', 'job_name', 'cluster_name', 'region', 'dataproc_jars', 'dataproc_properties']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.ui_color"> |
| <code class="sig-name descname">ui_color</code><em class="property"> = #0273d4</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator.ui_color"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.ui_color" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.job_type"> |
| <code class="sig-name descname">job_type</code><em class="property"> = hadoopJob</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator.job_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.job_type" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcHadoopOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcHadoopOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataProcPySparkOperator</code><span class="sig-paren">(</span><em class="sig-param">main</em>, <em class="sig-param">arguments=None</em>, <em class="sig-param">archives=None</em>, <em class="sig-param">pyfiles=None</em>, <em class="sig-param">files=None</em>, <em class="sig-param">dataproc_pyspark_properties=None</em>, <em class="sig-param">dataproc_pyspark_jars=None</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataProcJobBaseOperator</span></code></a></p> |
| <p>Start a PySpark Job on a Cloud DataProc cluster.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>main</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main |
| Python file to use as the driver. Must be a .py file.</p></li> |
| <li><p><strong>arguments</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – Arguments for the job. (templated)</p></li> |
| <li><p><strong>archives</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – List of archived files that will be unpacked in the work |
| directory. Should be stored in Cloud Storage.</p></li> |
| <li><p><strong>files</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – List of files to be copied to the working directory</p></li> |
| <li><p><strong>pyfiles</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – List of Python files to pass to the PySpark framework. |
| Supported file types: .py, .egg, and .zip</p></li> |
| <li><p><strong>dataproc_pyspark_properties</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.8)"><em>dict</em></a>) – Map for the Pig properties. Ideal to put in |
| default arguments (templated)</p></li> |
| <li><p><strong>dataproc_pyspark_jars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.8)"><em>list</em></a>) – HCFS URIs of jar files to add to the CLASSPATHs of the Python |
| driver and tasks. (templated)</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['arguments', 'job_name', 'cluster_name', 'region', 'dataproc_jars', 'dataproc_properties']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.ui_color"> |
| <code class="sig-name descname">ui_color</code><em class="property"> = #0273d4</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator.ui_color"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.ui_color" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.job_type"> |
| <code class="sig-name descname">job_type</code><em class="property"> = pysparkJob</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator.job_type"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.job_type" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator._generate_temp_filename"> |
| <em class="property">static </em><code class="sig-name descname">_generate_temp_filename</code><span class="sig-paren">(</span><em class="sig-param">filename</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator._generate_temp_filename"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator._generate_temp_filename" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator._upload_file_temp"> |
| <code class="sig-name descname">_upload_file_temp</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">bucket</em>, <em class="sig-param">local_file</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator._upload_file_temp"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator._upload_file_temp" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Upload a local file to a Google Cloud Storage bucket.</p> |
| </dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.execute"> |
| <code class="sig-name descname">execute</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">context</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataProcPySparkOperator.execute"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataProcPySparkOperator.execute" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataprocWorkflowTemplateInstantiateOperator</code><span class="sig-paren">(</span><em class="sig-param">template_id</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait |
| until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p>Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate</a></p> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>template_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The id of the template. (templated)</p></li> |
| <li><p><strong>project_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The ID of the google cloud project in which |
| the template runs</p></li> |
| <li><p><strong>region</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – leave as ‘global’, might become relevant in the future</p></li> |
| <li><p><strong>gcp_conn_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The connection ID to use connecting to Google Cloud Platform.</p></li> |
| <li><p><strong>delegate_to</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['template_id']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator.start"> |
| <code class="sig-name descname">start</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateOperator.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateOperator.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Instantiate a WorkflowTemplate on Google Cloud Dataproc.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="class"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator"> |
| <em class="property">class </em><code class="sig-prename descclassname">airflow.contrib.operators.dataproc_operator.</code><code class="sig-name descname">DataprocWorkflowTemplateInstantiateInlineOperator</code><span class="sig-paren">(</span><em class="sig-param">template</em>, <em class="sig-param">*args</em>, <em class="sig-param">**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Bases: <a class="reference internal" href="#airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator" title="airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator"><code class="xref py py-class docutils literal notranslate"><span class="pre">airflow.contrib.operators.dataproc_operator.DataprocOperationBaseOperator</span></code></a></p> |
| <p>Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will |
| wait until the WorkflowTemplate is finished executing.</p> |
| <div class="admonition seealso"> |
| <p class="admonition-title">See also</p> |
| <p>Please refer to: |
| <a class="reference external" href="https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline">https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline</a></p> |
| </div> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>template</strong> (<em>map</em>) – The template contents. (templated)</p></li> |
| <li><p><strong>project_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The ID of the google cloud project in which |
| the template runs</p></li> |
| <li><p><strong>region</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – leave as ‘global’, might become relevant in the future</p></li> |
| <li><p><strong>gcp_conn_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The connection ID to use connecting to Google Cloud Platform.</p></li> |
| <li><p><strong>delegate_to</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The account to impersonate, if any. |
| For this to work, the service account making the request must have domain-wide |
| delegation enabled.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="attribute"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator.template_fields"> |
| <code class="sig-name descname">template_fields</code><em class="property"> = ['template']</em><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator.template_fields"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator.template_fields" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="method"> |
| <dt id="airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator.start"> |
| <code class="sig-name descname">start</code><span class="sig-paren">(</span><em class="sig-param">self</em><span class="sig-paren">)</span><a class="reference internal" href="../../../../../_modules/airflow/contrib/operators/dataproc_operator.html#DataprocWorkflowTemplateInstantiateInlineOperator.start"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#airflow.contrib.operators.dataproc_operator.DataprocWorkflowTemplateInstantiateInlineOperator.start" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc.</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| </div> |
| |
| |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="../datastore_export_operator/index.html" class="btn btn-neutral float-right" title="airflow.contrib.operators.datastore_export_operator" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a> |
| |
| |
| <a href="../dataflow_operator/index.html" class="btn btn-neutral float-left" title="airflow.contrib.operators.dataflow_operator" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| <div class="footer">This page uses <a href="https://analytics.google.com/"> |
| Google Analytics</a> to collect statistics. You can disable it by blocking |
| the JavaScript coming from www.google-analytics.com. Check our |
| <a href="../../../../../privacy_notice.html">Privacy Policy</a> |
| for more details. |
| <script type="text/javascript"> |
| (function() { |
| var ga = document.createElement('script'); |
| ga.src = ('https:' == document.location.protocol ? |
| 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; |
| ga.setAttribute('async', 'true'); |
| var nodes = document.documentElement.childNodes; |
| var i = -1; |
| var node; |
| do { |
| i++; |
| node = nodes[i] |
| } while(node.nodeType !== Node.ELEMENT_NODE); |
| node.appendChild(ga); |
| })(); |
| </script> |
| </div> |
| |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| |
| |
| |
| |
| |
| </body> |
| </html> |