blob: d3a5a506b2fd9372e6d31c053605d559eb6f9caf [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>airflow.contrib.hooks.gcs_hook &mdash; Airflow Documentation</title>
<script type="text/javascript" src="../../../../_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../../../_static/jquery.js"></script>
<script type="text/javascript" src="../../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../../_static/doctools.js"></script>
<script type="text/javascript" src="../../../../_static/language_data.js"></script>
<script type="text/javascript" src="../../../../_static/js/theme.js"></script>
<link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<script>
document.addEventListener('DOMContentLoaded', function() {
var el = document.getElementById('changelog');
if (el !== null ) {
// [AIRFLOW-...]
el.innerHTML = el.innerHTML.replace(
/\[(AIRFLOW-[\d]+)\]/g,
`<a href="https://issues.apache.org/jira/browse/$1">[$1]</a>`
);
// (#...)
el.innerHTML = el.innerHTML.replace(
/\(#([\d]+)\)/g,
`<a href="https://github.com/apache/airflow/pull/$1">(#$1)</a>`
);
};
})
</script>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-140539454-1']);
_gaq.push(['_trackPageview']);
</script>
<style>
.example-header {
position: relative;
background: #9AAA7A;
padding: 8px 16px;
margin-bottom: 0;
}
.example-header--with-button {
padding-right: 166px;
}
.example-header:after{
content: '';
display: table;
clear: both;
}
.example-title {
display:block;
padding: 4px;
margin-right: 16px;
color: white;
overflow-x: auto;
}
.example-header-button {
top: 8px;
right: 16px;
position: absolute;
}
.example-header + .highlight-python {
margin-top: 0 !important;
}
.viewcode-button {
display: inline-block;
padding: 8px 16px;
border: 0;
margin: 0;
outline: 0;
border-radius: 2px;
-webkit-box-shadow: 0 3px 5px 0 rgba(0,0,0,.3);
box-shadow: 0 3px 6px 0 rgba(0,0,0,.3);
color: #404040;
background-color: #e7e7e7;
cursor: pointer;
font-size: 16px;
font-weight: 500;
line-height: 1;
text-decoration: none;
text-overflow: ellipsis;
overflow: hidden;
text-transform: uppercase;
-webkit-transition: background-color .2s;
transition: background-color .2s;
vertical-align: middle;
white-space: nowrap;
}
.viewcode-button:visited {
color: #404040;
}
.viewcode-button:hover, .viewcode-button:focus {
color: #404040;
background-color: #d6d6d6;
}
</style>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../../index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.5
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorial.html">Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../howto/index.html">How-to Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../cli.html">Command Line Interface Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api.html">REST API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../integration.html">Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../macros.html">Macros reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../_api/index.html">Python API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../privacy_notice.html">Privacy Notice</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../../index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../../index.html">Docs</a> &raquo;</li>
<li><a href="../../../index.html">Module code</a> &raquo;</li>
<li><a href="../hooks.html">airflow.contrib.hooks</a> &raquo;</li>
<li>airflow.contrib.hooks.gcs_hook</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for airflow.contrib.hooks.gcs_hook</h1><div class="highlight"><pre>
<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
<span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="c1"># or more contributor license agreements. See the NOTICE file</span>
<span class="c1"># distributed with this work for additional information</span>
<span class="c1"># regarding copyright ownership. The ASF licenses this file</span>
<span class="c1"># to you under the Apache License, Version 2.0 (the</span>
<span class="c1"># &quot;License&quot;); you may not use this file except in compliance</span>
<span class="c1"># with the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing,</span>
<span class="c1"># software distributed under the License is distributed on an</span>
<span class="c1"># &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span>
<span class="c1"># KIND, either express or implied. See the License for the</span>
<span class="c1"># specific language governing permissions and limitations</span>
<span class="c1"># under the License.</span>
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">gzip</span> <span class="k">as</span> <span class="nn">gz</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">shutil</span>
<span class="kn">import</span> <span class="nn">warnings</span>
<span class="kn">from</span> <span class="nn">google.cloud</span> <span class="k">import</span> <span class="n">storage</span>
<span class="kn">from</span> <span class="nn">airflow.contrib.hooks.gcp_api_base_hook</span> <span class="k">import</span> <span class="n">GoogleCloudBaseHook</span>
<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="k">import</span> <span class="n">AirflowException</span>
<div class="viewcode-block" id="GoogleCloudStorageHook"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook">[docs]</a><span class="k">class</span> <span class="nc">GoogleCloudStorageHook</span><span class="p">(</span><span class="n">GoogleCloudBaseHook</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Interact with Google Cloud Storage. This hook uses the Google Cloud Platform</span>
<span class="sd"> connection.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="GoogleCloudStorageHook._conn"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook._conn">[docs]</a> <span class="n">_conn</span> <span class="o">=</span> <span class="kc">None</span></div>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">&#39;google_cloud_default&#39;</span><span class="p">,</span>
<span class="n">delegate_to</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">GoogleCloudStorageHook</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">google_cloud_storage_conn_id</span><span class="p">,</span>
<span class="n">delegate_to</span><span class="p">)</span>
<div class="viewcode-block" id="GoogleCloudStorageHook.get_conn"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a Google Cloud Storage service object.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_conn</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_conn</span> <span class="o">=</span> <span class="n">storage</span><span class="o">.</span><span class="n">Client</span><span class="p">(</span><span class="n">credentials</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_get_credentials</span><span class="p">(),</span>
<span class="n">project</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_conn</span></div>
<span class="c1"># pylint:disable=redefined-builtin</span>
<div class="viewcode-block" id="GoogleCloudStorageHook.copy"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy">[docs]</a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">,</span> <span class="n">destination_bucket</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">destination_object</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Copies an object from a bucket to another, with renaming if requested.</span>
<span class="sd"> destination_bucket or destination_object can be omitted, in which case</span>
<span class="sd"> source bucket/object is used, but not both.</span>
<span class="sd"> :param source_bucket: The bucket of the object to copy from.</span>
<span class="sd"> :type source_bucket: str</span>
<span class="sd"> :param source_object: The object to copy.</span>
<span class="sd"> :type source_object: str</span>
<span class="sd"> :param destination_bucket: The destination of the object to copied to.</span>
<span class="sd"> Can be omitted; then the same bucket is used.</span>
<span class="sd"> :type destination_bucket: str</span>
<span class="sd"> :param destination_object: The (renamed) path of the object if given.</span>
<span class="sd"> Can be omitted; then the same name is used.</span>
<span class="sd"> :type destination_object: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">destination_bucket</span> <span class="o">=</span> <span class="n">destination_bucket</span> <span class="ow">or</span> <span class="n">source_bucket</span>
<span class="n">destination_object</span> <span class="o">=</span> <span class="n">destination_object</span> <span class="ow">or</span> <span class="n">source_object</span>
<span class="k">if</span> <span class="n">source_bucket</span> <span class="o">==</span> <span class="n">destination_bucket</span> <span class="ow">and</span> \
<span class="n">source_object</span> <span class="o">==</span> <span class="n">destination_object</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s1">&#39;Either source/destination bucket or source/destination object &#39;</span>
<span class="s1">&#39;must be different, not both the same: bucket=</span><span class="si">%s</span><span class="s1">, object=</span><span class="si">%s</span><span class="s1">&#39;</span> <span class="o">%</span>
<span class="p">(</span><span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">source_bucket</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">source_object</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;source_bucket and source_object cannot be empty.&#39;</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">source_bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">source_bucket</span><span class="p">)</span>
<span class="n">source_object</span> <span class="o">=</span> <span class="n">source_bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">source_object</span><span class="p">)</span>
<span class="n">destination_bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">destination_bucket</span><span class="p">)</span>
<span class="n">destination_object</span> <span class="o">=</span> <span class="n">source_bucket</span><span class="o">.</span><span class="n">copy_blob</span><span class="p">(</span>
<span class="n">blob</span><span class="o">=</span><span class="n">source_object</span><span class="p">,</span>
<span class="n">destination_bucket</span><span class="o">=</span><span class="n">destination_bucket</span><span class="p">,</span>
<span class="n">new_name</span><span class="o">=</span><span class="n">destination_object</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Object </span><span class="si">%s</span><span class="s1"> in bucket </span><span class="si">%s</span><span class="s1"> copied to object </span><span class="si">%s</span><span class="s1"> in bucket </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">source_bucket</span><span class="o">.</span><span class="n">name</span><span class="p">,</span>
<span class="n">destination_object</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">destination_bucket</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.rewrite"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite">[docs]</a> <span class="k">def</span> <span class="nf">rewrite</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">,</span> <span class="n">destination_bucket</span><span class="p">,</span>
<span class="n">destination_object</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Has the same functionality as copy, except that will work on files</span>
<span class="sd"> over 5 TB, as well as when copying between locations and/or storage</span>
<span class="sd"> classes.</span>
<span class="sd"> destination_object can be omitted, in which case source_object is used.</span>
<span class="sd"> :param source_bucket: The bucket of the object to copy from.</span>
<span class="sd"> :type source_bucket: str</span>
<span class="sd"> :param source_object: The object to copy.</span>
<span class="sd"> :type source_object: str</span>
<span class="sd"> :param destination_bucket: The destination of the object to copied to.</span>
<span class="sd"> :type destination_bucket: str</span>
<span class="sd"> :param destination_object: The (renamed) path of the object if given.</span>
<span class="sd"> Can be omitted; then the same name is used.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">destination_object</span> <span class="o">=</span> <span class="n">destination_object</span> <span class="ow">or</span> <span class="n">source_object</span>
<span class="k">if</span> <span class="p">(</span><span class="n">source_bucket</span> <span class="o">==</span> <span class="n">destination_bucket</span> <span class="ow">and</span>
<span class="n">source_object</span> <span class="o">==</span> <span class="n">destination_object</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s1">&#39;Either source/destination bucket or source/destination object &#39;</span>
<span class="s1">&#39;must be different, not both the same: bucket=</span><span class="si">%s</span><span class="s1">, object=</span><span class="si">%s</span><span class="s1">&#39;</span> <span class="o">%</span>
<span class="p">(</span><span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">source_bucket</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">source_object</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;source_bucket and source_object cannot be empty.&#39;</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">source_bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">source_bucket</span><span class="p">)</span>
<span class="n">source_object</span> <span class="o">=</span> <span class="n">source_bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="n">source_object</span><span class="p">)</span>
<span class="n">destination_bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">destination_bucket</span><span class="p">)</span>
<span class="n">token</span><span class="p">,</span> <span class="n">bytes_rewritten</span><span class="p">,</span> <span class="n">total_bytes</span> <span class="o">=</span> <span class="n">destination_bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span>
<span class="n">blob_name</span><span class="o">=</span><span class="n">destination_object</span><span class="p">)</span><span class="o">.</span><span class="n">rewrite</span><span class="p">(</span>
<span class="n">source</span><span class="o">=</span><span class="n">source_object</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Total Bytes: </span><span class="si">%s</span><span class="s1"> | Bytes Written: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">total_bytes</span><span class="p">,</span> <span class="n">bytes_rewritten</span><span class="p">)</span>
<span class="k">while</span> <span class="n">token</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">token</span><span class="p">,</span> <span class="n">bytes_rewritten</span><span class="p">,</span> <span class="n">total_bytes</span> <span class="o">=</span> <span class="n">destination_bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span>
<span class="n">blob_name</span><span class="o">=</span><span class="n">destination_object</span><span class="p">)</span><span class="o">.</span><span class="n">rewrite</span><span class="p">(</span>
<span class="n">source</span><span class="o">=</span><span class="n">source_object</span><span class="p">,</span> <span class="n">token</span><span class="o">=</span><span class="n">token</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Total Bytes: </span><span class="si">%s</span><span class="s1"> | Bytes Written: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">total_bytes</span><span class="p">,</span> <span class="n">bytes_rewritten</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Object </span><span class="si">%s</span><span class="s1"> in bucket </span><span class="si">%s</span><span class="s1"> copied to object </span><span class="si">%s</span><span class="s1"> in bucket </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">source_object</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">source_bucket</span><span class="o">.</span><span class="n">name</span><span class="p">,</span>
<span class="n">destination_object</span><span class="p">,</span> <span class="n">destination_bucket</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="c1"># pylint:disable=redefined-builtin</span>
<div class="viewcode-block" id="GoogleCloudStorageHook.download"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download">[docs]</a> <span class="k">def</span> <span class="nf">download</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get a file from Google Cloud Storage.</span>
<span class="sd"> :param bucket: The bucket to fetch from.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: The object to fetch.</span>
<span class="sd"> :type object: str</span>
<span class="sd"> :param filename: If set, a local file path where the file should be written to.</span>
<span class="sd"> :type filename: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="k">if</span> <span class="n">filename</span><span class="p">:</span>
<span class="n">blob</span><span class="o">.</span><span class="n">download_to_filename</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;File downloaded to </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="k">return</span> <span class="n">blob</span><span class="o">.</span><span class="n">download_as_string</span><span class="p">()</span></div>
<span class="c1"># pylint:disable=redefined-builtin</span>
<div class="viewcode-block" id="GoogleCloudStorageHook.upload"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload">[docs]</a> <span class="k">def</span> <span class="nf">upload</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">filename</span><span class="p">,</span>
<span class="n">mime_type</span><span class="o">=</span><span class="s1">&#39;application/octet-stream&#39;</span><span class="p">,</span> <span class="n">gzip</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">multipart</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">num_retries</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Uploads a local file to Google Cloud Storage.</span>
<span class="sd"> :param bucket: The bucket to upload to.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: The object name to set when uploading the local file.</span>
<span class="sd"> :type object: str</span>
<span class="sd"> :param filename: The local file path to the file to be uploaded.</span>
<span class="sd"> :type filename: str</span>
<span class="sd"> :param mime_type: The MIME type to set when uploading the file.</span>
<span class="sd"> :type mime_type: str</span>
<span class="sd"> :param gzip: Option to compress file for upload</span>
<span class="sd"> :type gzip: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">multipart</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;&#39;multipart&#39; parameter is deprecated.&quot;</span>
<span class="s2">&quot; It is handled automatically by the Storage client&quot;</span><span class="p">,</span> <span class="ne">DeprecationWarning</span><span class="p">)</span>
<span class="k">if</span> <span class="n">num_retries</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;&#39;num_retries&#39; parameter is deprecated.&quot;</span>
<span class="s2">&quot; It is handled automatically by the Storage client&quot;</span><span class="p">,</span> <span class="ne">DeprecationWarning</span><span class="p">)</span>
<span class="k">if</span> <span class="n">gzip</span><span class="p">:</span>
<span class="n">filename_gz</span> <span class="o">=</span> <span class="n">filename</span> <span class="o">+</span> <span class="s1">&#39;.gz&#39;</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f_in</span><span class="p">:</span>
<span class="k">with</span> <span class="n">gz</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">filename_gz</span><span class="p">,</span> <span class="s1">&#39;wb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f_out</span><span class="p">:</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">copyfileobj</span><span class="p">(</span><span class="n">f_in</span><span class="p">,</span> <span class="n">f_out</span><span class="p">)</span>
<span class="n">filename</span> <span class="o">=</span> <span class="n">filename_gz</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="n">blob</span><span class="o">.</span><span class="n">upload_from_filename</span><span class="p">(</span><span class="n">filename</span><span class="o">=</span><span class="n">filename</span><span class="p">,</span>
<span class="n">content_type</span><span class="o">=</span><span class="n">mime_type</span><span class="p">)</span>
<span class="k">if</span> <span class="n">gzip</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;File </span><span class="si">%s</span><span class="s1"> uploaded to </span><span class="si">%s</span><span class="s1"> in </span><span class="si">%s</span><span class="s1"> bucket&#39;</span><span class="p">,</span> <span class="n">filename</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span></div>
<span class="c1"># pylint:disable=redefined-builtin</span>
<div class="viewcode-block" id="GoogleCloudStorageHook.exists"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists">[docs]</a> <span class="k">def</span> <span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks for the existence of a file in Google Cloud Storage.</span>
<span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: The name of the object to check in the Google cloud</span>
<span class="sd"> storage bucket.</span>
<span class="sd"> :type object: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="k">return</span> <span class="n">blob</span><span class="o">.</span><span class="n">exists</span><span class="p">()</span></div>
<span class="c1"># pylint:disable=redefined-builtin</span>
<div class="viewcode-block" id="GoogleCloudStorageHook.is_updated_after"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after">[docs]</a> <span class="k">def</span> <span class="nf">is_updated_after</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">ts</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks if an object is updated in Google Cloud Storage.</span>
<span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: The name of the object to check in the Google cloud</span>
<span class="sd"> storage bucket.</span>
<span class="sd"> :type object: str</span>
<span class="sd"> :param ts: The timestamp to check against.</span>
<span class="sd"> :type ts: datetime.datetime</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="k">if</span> <span class="n">blob</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Object (</span><span class="si">{}</span><span class="s2">) not found in Bucket (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">object</span><span class="p">,</span> <span class="n">bucket</span><span class="p">))</span>
<span class="n">blob_update_time</span> <span class="o">=</span> <span class="n">blob</span><span class="o">.</span><span class="n">updated</span>
<span class="k">if</span> <span class="n">blob_update_time</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">dateutil.tz</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">ts</span><span class="o">.</span><span class="n">tzinfo</span><span class="p">:</span>
<span class="n">ts</span> <span class="o">=</span> <span class="n">ts</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">tzinfo</span><span class="o">=</span><span class="n">dateutil</span><span class="o">.</span><span class="n">tz</span><span class="o">.</span><span class="n">tzutc</span><span class="p">())</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Verify object date: </span><span class="si">%s</span><span class="s2"> &gt; </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">blob_update_time</span><span class="p">,</span> <span class="n">ts</span><span class="p">)</span>
<span class="k">if</span> <span class="n">blob_update_time</span> <span class="o">&gt;</span> <span class="n">ts</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">return</span> <span class="kc">False</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.delete"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete">[docs]</a> <span class="k">def</span> <span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">generation</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deletes an object from the bucket.</span>
<span class="sd"> :param bucket: name of the bucket, where the object resides</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: name of the object to delete</span>
<span class="sd"> :type object: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">generation</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;&#39;generation&#39; parameter is no longer supported&quot;</span><span class="p">,</span> <span class="ne">DeprecationWarning</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="n">blob</span><span class="o">.</span><span class="n">delete</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Blob </span><span class="si">%s</span><span class="s1"> deleted.&#39;</span><span class="p">,</span> <span class="nb">object</span><span class="p">)</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.list"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list">[docs]</a> <span class="k">def</span> <span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="n">versions</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">maxResults</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> List all objects from the bucket with the give string prefix in name</span>
<span class="sd"> :param bucket: bucket name</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param versions: if true, list all versions of the objects</span>
<span class="sd"> :type versions: bool</span>
<span class="sd"> :param maxResults: max count of items to return in a single page of responses</span>
<span class="sd"> :type maxResults: int</span>
<span class="sd"> :param prefix: prefix string which filters objects whose name begin with</span>
<span class="sd"> this prefix</span>
<span class="sd"> :type prefix: str</span>
<span class="sd"> :param delimiter: filters objects based on the delimiter (for e.g &#39;.csv&#39;)</span>
<span class="sd"> :type delimiter: str</span>
<span class="sd"> :return: a stream of object names matching the filtering criteria</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">ids</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">pageToken</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">blobs</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">list_blobs</span><span class="p">(</span>
<span class="n">max_results</span><span class="o">=</span><span class="n">maxResults</span><span class="p">,</span>
<span class="n">page_token</span><span class="o">=</span><span class="n">pageToken</span><span class="p">,</span>
<span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span>
<span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span><span class="p">,</span>
<span class="n">versions</span><span class="o">=</span><span class="n">versions</span>
<span class="p">)</span>
<span class="n">blob_names</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">blob</span> <span class="ow">in</span> <span class="n">blobs</span><span class="p">:</span>
<span class="n">blob_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">blob</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="n">prefixes</span> <span class="o">=</span> <span class="n">blobs</span><span class="o">.</span><span class="n">prefixes</span>
<span class="k">if</span> <span class="n">prefixes</span><span class="p">:</span>
<span class="n">ids</span> <span class="o">+=</span> <span class="nb">list</span><span class="p">(</span><span class="n">prefixes</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">ids</span> <span class="o">+=</span> <span class="n">blob_names</span>
<span class="n">pageToken</span> <span class="o">=</span> <span class="n">blobs</span><span class="o">.</span><span class="n">next_page_token</span>
<span class="k">if</span> <span class="n">pageToken</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># empty next page token</span>
<span class="k">break</span>
<span class="k">return</span> <span class="n">ids</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.get_size"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size">[docs]</a> <span class="k">def</span> <span class="nf">get_size</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the size of a file in Google Cloud Storage in bytes.</span>
<span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: The name of the object to check in the Google cloud storage bucket.</span>
<span class="sd"> :type object: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Checking the file size of object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="nb">object</span><span class="p">,</span>
<span class="n">bucket</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="n">blob_size</span> <span class="o">=</span> <span class="n">blob</span><span class="o">.</span><span class="n">size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;The file size of </span><span class="si">%s</span><span class="s1"> is </span><span class="si">%s</span><span class="s1"> bytes.&#39;</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">blob_size</span><span class="p">)</span>
<span class="k">return</span> <span class="n">blob_size</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.get_crc32c"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c">[docs]</a> <span class="k">def</span> <span class="nf">get_crc32c</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the CRC32c checksum of an object in Google Cloud Storage.</span>
<span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: The name of the object to check in the Google cloud</span>
<span class="sd"> storage bucket.</span>
<span class="sd"> :type object: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Retrieving the crc32c checksum of &#39;</span>
<span class="s1">&#39;object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="n">blob_crc32c</span> <span class="o">=</span> <span class="n">blob</span><span class="o">.</span><span class="n">crc32c</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;The crc32c checksum of </span><span class="si">%s</span><span class="s1"> is </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">blob_crc32c</span><span class="p">)</span>
<span class="k">return</span> <span class="n">blob_crc32c</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.get_md5hash"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash">[docs]</a> <span class="k">def</span> <span class="nf">get_md5hash</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the MD5 hash of an object in Google Cloud Storage.</span>
<span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object: The name of the object to check in the Google cloud</span>
<span class="sd"> storage bucket.</span>
<span class="sd"> :type object: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Retrieving the MD5 hash of &#39;</span>
<span class="s1">&#39;object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">get_blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
<span class="n">blob_md5hash</span> <span class="o">=</span> <span class="n">blob</span><span class="o">.</span><span class="n">md5_hash</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;The md5Hash of </span><span class="si">%s</span><span class="s1"> is </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">blob_md5hash</span><span class="p">)</span>
<span class="k">return</span> <span class="n">blob_md5hash</span></div>
<span class="nd">@GoogleCloudBaseHook</span><span class="o">.</span><span class="n">catch_http_exception</span>
<span class="nd">@GoogleCloudBaseHook</span><span class="o">.</span><span class="n">fallback_to_default_project_id</span>
<div class="viewcode-block" id="GoogleCloudStorageHook.create_bucket"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket">[docs]</a> <span class="k">def</span> <span class="nf">create_bucket</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">bucket_name</span><span class="p">,</span>
<span class="n">resource</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">storage_class</span><span class="o">=</span><span class="s1">&#39;MULTI_REGIONAL&#39;</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="s1">&#39;US&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="kc">None</span>
<span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates a new bucket. Google Cloud Storage uses a flat namespace, so</span>
<span class="sd"> you can&#39;t create a bucket with a name that is already in use.</span>
<span class="sd"> .. seealso::</span>
<span class="sd"> For more information, see Bucket Naming Guidelines:</span>
<span class="sd"> https://cloud.google.com/storage/docs/bucketnaming.html#requirements</span>
<span class="sd"> :param bucket_name: The name of the bucket.</span>
<span class="sd"> :type bucket_name: str</span>
<span class="sd"> :param resource: An optional dict with parameters for creating the bucket.</span>
<span class="sd"> For information on available parameters, see Cloud Storage API doc:</span>
<span class="sd"> https://cloud.google.com/storage/docs/json_api/v1/buckets/insert</span>
<span class="sd"> :type resource: dict</span>
<span class="sd"> :param storage_class: This defines how objects in the bucket are stored</span>
<span class="sd"> and determines the SLA and the cost of storage. Values include</span>
<span class="sd"> - ``MULTI_REGIONAL``</span>
<span class="sd"> - ``REGIONAL``</span>
<span class="sd"> - ``STANDARD``</span>
<span class="sd"> - ``NEARLINE``</span>
<span class="sd"> - ``COLDLINE``.</span>
<span class="sd"> If this value is not specified when the bucket is</span>
<span class="sd"> created, it will default to STANDARD.</span>
<span class="sd"> :type storage_class: str</span>
<span class="sd"> :param location: The location of the bucket.</span>
<span class="sd"> Object data for objects in the bucket resides in physical storage</span>
<span class="sd"> within this region. Defaults to US.</span>
<span class="sd"> .. seealso::</span>
<span class="sd"> https://developers.google.com/storage/docs/bucket-locations</span>
<span class="sd"> :type location: str</span>
<span class="sd"> :param project_id: The ID of the GCP Project.</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :param labels: User-provided labels, in key/value pairs.</span>
<span class="sd"> :type labels: dict</span>
<span class="sd"> :return: If successful, it returns the ``id`` of the bucket.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Creating Bucket: </span><span class="si">%s</span><span class="s1">; Location: </span><span class="si">%s</span><span class="s1">; Storage Class: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">bucket_name</span><span class="p">,</span> <span class="n">location</span><span class="p">,</span> <span class="n">storage_class</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="o">=</span><span class="n">bucket_name</span><span class="p">)</span>
<span class="n">bucket_resource</span> <span class="o">=</span> <span class="n">resource</span> <span class="ow">or</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">bucket_resource</span><span class="p">:</span>
<span class="k">if</span> <span class="n">item</span> <span class="o">!=</span> <span class="s2">&quot;name&quot;</span><span class="p">:</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">_patch_property</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">item</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">resource</span><span class="p">[</span><span class="n">item</span><span class="p">])</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">storage_class</span> <span class="o">=</span> <span class="n">storage_class</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">labels</span> <span class="ow">or</span> <span class="p">{}</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">project</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span> <span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">)</span>
<span class="k">return</span> <span class="n">bucket</span><span class="o">.</span><span class="n">id</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.insert_bucket_acl"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_bucket_acl">[docs]</a> <span class="k">def</span> <span class="nf">insert_bucket_acl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="n">entity</span><span class="p">,</span> <span class="n">role</span><span class="p">,</span> <span class="n">user_project</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates a new ACL entry on the specified bucket.</span>
<span class="sd"> See: https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert</span>
<span class="sd"> :param bucket: Name of a bucket.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param entity: The entity holding the permission, in one of the following forms:</span>
<span class="sd"> user-userId, user-email, group-groupId, group-email, domain-domain,</span>
<span class="sd"> project-team-projectId, allUsers, allAuthenticatedUsers.</span>
<span class="sd"> See: https://cloud.google.com/storage/docs/access-control/lists#scopes</span>
<span class="sd"> :type entity: str</span>
<span class="sd"> :param role: The access permission for the entity.</span>
<span class="sd"> Acceptable values are: &quot;OWNER&quot;, &quot;READER&quot;, &quot;WRITER&quot;.</span>
<span class="sd"> :type role: str</span>
<span class="sd"> :param user_project: (Optional) The project to be billed for this request.</span>
<span class="sd"> Required for Requester Pays buckets.</span>
<span class="sd"> :type user_project: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Creating a new ACL entry in bucket: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="o">=</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">reload</span><span class="p">()</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">entity_from_dict</span><span class="p">(</span><span class="n">entity_dict</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;entity&quot;</span><span class="p">:</span> <span class="n">entity</span><span class="p">,</span> <span class="s2">&quot;role&quot;</span><span class="p">:</span> <span class="n">role</span><span class="p">})</span>
<span class="k">if</span> <span class="n">user_project</span><span class="p">:</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">user_project</span> <span class="o">=</span> <span class="n">user_project</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">save</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;A new ACL entry created in bucket: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.insert_object_acl"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.insert_object_acl">[docs]</a> <span class="k">def</span> <span class="nf">insert_object_acl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="n">object_name</span><span class="p">,</span> <span class="n">entity</span><span class="p">,</span> <span class="n">role</span><span class="p">,</span> <span class="n">generation</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">user_project</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates a new ACL entry on the specified object.</span>
<span class="sd"> See: https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert</span>
<span class="sd"> :param bucket: Name of a bucket.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param object_name: Name of the object. For information about how to URL encode</span>
<span class="sd"> object names to be path safe, see:</span>
<span class="sd"> https://cloud.google.com/storage/docs/json_api/#encoding</span>
<span class="sd"> :type object_name: str</span>
<span class="sd"> :param entity: The entity holding the permission, in one of the following forms:</span>
<span class="sd"> user-userId, user-email, group-groupId, group-email, domain-domain,</span>
<span class="sd"> project-team-projectId, allUsers, allAuthenticatedUsers</span>
<span class="sd"> See: https://cloud.google.com/storage/docs/access-control/lists#scopes</span>
<span class="sd"> :type entity: str</span>
<span class="sd"> :param role: The access permission for the entity.</span>
<span class="sd"> Acceptable values are: &quot;OWNER&quot;, &quot;READER&quot;.</span>
<span class="sd"> :type role: str</span>
<span class="sd"> :param user_project: (Optional) The project to be billed for this request.</span>
<span class="sd"> Required for Requester Pays buckets.</span>
<span class="sd"> :type user_project: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">generation</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;&#39;generation&#39; parameter is no longer supported&quot;</span><span class="p">,</span> <span class="ne">DeprecationWarning</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Creating a new ACL entry for object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">object_name</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket_name</span><span class="o">=</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">object_name</span><span class="p">)</span>
<span class="c1"># Reload fetches the current ACL from Cloud Storage.</span>
<span class="n">blob</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">reload</span><span class="p">()</span>
<span class="n">blob</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">entity_from_dict</span><span class="p">(</span><span class="n">entity_dict</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;entity&quot;</span><span class="p">:</span> <span class="n">entity</span><span class="p">,</span> <span class="s2">&quot;role&quot;</span><span class="p">:</span> <span class="n">role</span><span class="p">})</span>
<span class="k">if</span> <span class="n">user_project</span><span class="p">:</span>
<span class="n">blob</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">user_project</span> <span class="o">=</span> <span class="n">user_project</span>
<span class="n">blob</span><span class="o">.</span><span class="n">acl</span><span class="o">.</span><span class="n">save</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;A new ACL entry created for object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">object_name</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span></div>
<div class="viewcode-block" id="GoogleCloudStorageHook.compose"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.compose">[docs]</a> <span class="k">def</span> <span class="nf">compose</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="n">source_objects</span><span class="p">,</span> <span class="n">destination_object</span><span class="p">,</span> <span class="n">num_retries</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Composes a list of existing object into a new object in the same storage bucket</span>
<span class="sd"> Currently it only supports up to 32 objects that can be concatenated</span>
<span class="sd"> in a single operation</span>
<span class="sd"> https://cloud.google.com/storage/docs/json_api/v1/objects/compose</span>
<span class="sd"> :param bucket: The name of the bucket containing the source objects.</span>
<span class="sd"> This is also the same bucket to store the composed destination object.</span>
<span class="sd"> :type bucket: str</span>
<span class="sd"> :param source_objects: The list of source objects that will be composed</span>
<span class="sd"> into a single object.</span>
<span class="sd"> :type source_objects: list</span>
<span class="sd"> :param destination_object: The path of the object if given.</span>
<span class="sd"> :type destination_object: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">num_retries</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;&#39;num_retries&#39; parameter is Deprecated. Retries are &quot;</span>
<span class="s2">&quot;now handled automatically&quot;</span><span class="p">,</span> <span class="ne">DeprecationWarning</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">source_objects</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">source_objects</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;source_objects cannot be empty.&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">bucket</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">destination_object</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;bucket and destination_object cannot be empty.&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Composing </span><span class="si">%s</span><span class="s2"> to </span><span class="si">%s</span><span class="s2"> in the bucket </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span>
<span class="n">source_objects</span><span class="p">,</span> <span class="n">destination_object</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span>
<span class="n">client</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">bucket</span><span class="p">(</span><span class="n">bucket</span><span class="p">)</span>
<span class="n">destination_blob</span> <span class="o">=</span> <span class="n">bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">destination_object</span><span class="p">)</span>
<span class="n">destination_blob</span><span class="o">.</span><span class="n">compose</span><span class="p">(</span>
<span class="n">sources</span><span class="o">=</span><span class="p">[</span>
<span class="n">bucket</span><span class="o">.</span><span class="n">blob</span><span class="p">(</span><span class="n">blob_name</span><span class="o">=</span><span class="n">source_object</span><span class="p">)</span> <span class="k">for</span> <span class="n">source_object</span> <span class="ow">in</span> <span class="n">source_objects</span>
<span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Completed successfully.&quot;</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="_parse_gcs_url"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/gcs_hook/index.html#airflow.contrib.hooks.gcs_hook._parse_gcs_url">[docs]</a><span class="k">def</span> <span class="nf">_parse_gcs_url</span><span class="p">(</span><span class="n">gsurl</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Given a Google Cloud Storage URL (gs://&lt;bucket&gt;/&lt;blob&gt;), returns a</span>
<span class="sd"> tuple containing the corresponding bucket and blob.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Python 3</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="k">import</span> <span class="n">urlparse</span>
<span class="c1"># Python 2</span>
<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">urlparse</span> <span class="k">import</span> <span class="n">urlparse</span>
<span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">gsurl</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">&#39;Please provide a bucket name&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">bucket</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span>
<span class="c1"># Remove leading &#39;/&#39; but NOT trailing one</span>
<span class="n">blob</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">lstrip</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">bucket</span><span class="p">,</span> <span class="n">blob</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
<div class="footer">This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics. You can disable it by blocking
the JavaScript coming from www.google-analytics.com. Check our
<a href="../../../../privacy_notice.html">Privacy Policy</a>
for more details.
<script type="text/javascript">
(function() {
var ga = document.createElement('script');
ga.src = ('https:' == document.location.protocol ?
'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
ga.setAttribute('async', 'true');
var nodes = document.documentElement.childNodes;
var i = -1;
var node;
do {
i++;
node = nodes[i]
} while(node.nodeType !== Node.ELEMENT_NODE);
node.appendChild(ga);
})();
</script>
</div>
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>