| |
| |
| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>airflow.contrib.hooks.gcs_hook — Airflow Documentation</title> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" /> |
| <link rel="index" title="Index" href="../../../../genindex.html" /> |
| <link rel="search" title="Search" href="../../../../search.html" /> |
| |
| |
| <script src="../../../../_static/js/modernizr.min.js"></script> |
| |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| |
| |
| <div class="wy-grid-for-nav"> |
| |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search"> |
| |
| |
| |
| <a href="../../../../index.html" class="icon icon-home"> Airflow |
| |
| |
| |
| </a> |
| |
| |
| |
| |
| |
| |
| |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| |
| |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| |
| |
| |
| |
| |
| |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../project.html">Project</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../license.html">License</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../start.html">Quick Start</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../tutorial.html">Tutorial</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../howto/index.html">How-to Guides</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../ui.html">UI / Screenshots</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../concepts.html">Concepts</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../profiling.html">Data Profiling</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../cli.html">Command Line Interface</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../scheduler.html">Scheduling & Triggers</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../plugins.html">Plugins</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../security.html">Security</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../timezone.html">Time zones</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../api.html">Experimental Rest API</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../integration.html">Integration</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../lineage.html">Lineage</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../faq.html">FAQ</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="../../../../code.html">API Reference</a></li> |
| </ul> |
| |
| |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" aria-label="top navigation"> |
| |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../../../../index.html">Airflow</a> |
| |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| |
| <div class="rst-content"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| |
| <ul class="wy-breadcrumbs"> |
| |
| <li><a href="../../../../index.html">Docs</a> »</li> |
| |
| <li><a href="../../../index.html">Module code</a> »</li> |
| |
| <li>airflow.contrib.hooks.gcs_hook</li> |
| |
| |
| <li class="wy-breadcrumbs-aside"> |
| |
| </li> |
| |
| </ul> |
| |
| |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <h1>Source code for airflow.contrib.hooks.gcs_hook</h1><div class="highlight"><pre> |
| <span></span><span class="c1"># -*- coding: utf-8 -*-</span> |
| <span class="c1">#</span> |
| <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="c1"># or more contributor license agreements. See the NOTICE file</span> |
| <span class="c1"># distributed with this work for additional information</span> |
| <span class="c1"># regarding copyright ownership. The ASF licenses this file</span> |
| <span class="c1"># to you under the Apache License, Version 2.0 (the</span> |
| <span class="c1"># "License"); you may not use this file except in compliance</span> |
| <span class="c1"># with the License. You may obtain a copy of the License at</span> |
| <span class="c1">#</span> |
| <span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="c1">#</span> |
| <span class="c1"># Unless required by applicable law or agreed to in writing,</span> |
| <span class="c1"># software distributed under the License is distributed on an</span> |
| <span class="c1"># "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span> |
| <span class="c1"># KIND, either express or implied. See the License for the</span> |
| <span class="c1"># specific language governing permissions and limitations</span> |
| <span class="c1"># under the License.</span> |
| <span class="c1">#</span> |
| <span class="kn">from</span> <span class="nn">apiclient.discovery</span> <span class="k">import</span> <span class="n">build</span> |
| <span class="kn">from</span> <span class="nn">apiclient.http</span> <span class="k">import</span> <span class="n">MediaFileUpload</span> |
| <span class="kn">from</span> <span class="nn">googleapiclient</span> <span class="k">import</span> <span class="n">errors</span> |
| |
| <span class="kn">from</span> <span class="nn">airflow.contrib.hooks.gcp_api_base_hook</span> <span class="k">import</span> <span class="n">GoogleCloudBaseHook</span> |
| <span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="k">import</span> <span class="n">AirflowException</span> |
| |
| <span class="kn">import</span> <span class="nn">gzip</span> <span class="k">as</span> <span class="nn">gz</span> |
| <span class="kn">import</span> <span class="nn">shutil</span> |
| <span class="kn">import</span> <span class="nn">re</span> |
| <span class="kn">import</span> <span class="nn">os</span> |
| |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook">[docs]</a><span class="k">class</span> <span class="nc">GoogleCloudStorageHook</span><span class="p">(</span><span class="n">GoogleCloudBaseHook</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Interact with Google Cloud Storage. This hook uses the Google Cloud Platform</span> |
| <span class="sd"> connection.</span> |
| <span class="sd"> """</span> |
| |
| <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> |
| <span class="n">google_cloud_storage_conn_id</span><span class="o">=</span><span class="s1">'google_cloud_default'</span><span class="p">,</span> |
| <span class="n">delegate_to</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">GoogleCloudStorageHook</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">google_cloud_storage_conn_id</span><span class="p">,</span> |
| <span class="n">delegate_to</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.get_conn"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Returns a Google Cloud Storage service object.</span> |
| <span class="sd"> """</span> |
| <span class="n">http_authorized</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_authorize</span><span class="p">()</span> |
| <span class="k">return</span> <span class="n">build</span><span class="p">(</span> |
| <span class="s1">'storage'</span><span class="p">,</span> <span class="s1">'v1'</span><span class="p">,</span> <span class="n">http</span><span class="o">=</span><span class="n">http_authorized</span><span class="p">,</span> <span class="n">cache_discovery</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></div> |
| |
| <span class="c1"># pylint:disable=redefined-builtin</span> |
| <div class="viewcode-block" id="GoogleCloudStorageHook.copy"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.copy">[docs]</a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">,</span> <span class="n">destination_bucket</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Copies an object from a bucket to another, with renaming if requested.</span> |
| |
| <span class="sd"> destination_bucket or destination_object can be omitted, in which case</span> |
| <span class="sd"> source bucket/object is used, but not both.</span> |
| |
| <span class="sd"> :param source_bucket: The bucket of the object to copy from.</span> |
| <span class="sd"> :type source_bucket: string</span> |
| <span class="sd"> :param source_object: The object to copy.</span> |
| <span class="sd"> :type source_object: string</span> |
| <span class="sd"> :param destination_bucket: The destination of the object to copied to.</span> |
| <span class="sd"> Can be omitted; then the same bucket is used.</span> |
| <span class="sd"> :type destination_bucket: string</span> |
| <span class="sd"> :param destination_object: The (renamed) path of the object if given.</span> |
| <span class="sd"> Can be omitted; then the same name is used.</span> |
| <span class="sd"> """</span> |
| <span class="n">destination_bucket</span> <span class="o">=</span> <span class="n">destination_bucket</span> <span class="ow">or</span> <span class="n">source_bucket</span> |
| <span class="n">destination_object</span> <span class="o">=</span> <span class="n">destination_object</span> <span class="ow">or</span> <span class="n">source_object</span> |
| <span class="k">if</span> <span class="n">source_bucket</span> <span class="o">==</span> <span class="n">destination_bucket</span> <span class="ow">and</span> \ |
| <span class="n">source_object</span> <span class="o">==</span> <span class="n">destination_object</span><span class="p">:</span> |
| |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <span class="s1">'Either source/destination bucket or source/destination object '</span> |
| <span class="s1">'must be different, not both the same: bucket=</span><span class="si">%s</span><span class="s1">, object=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> |
| <span class="p">(</span><span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">))</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">source_bucket</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">source_object</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'source_bucket and source_object cannot be empty.'</span><span class="p">)</span> |
| |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">service</span> \ |
| <span class="o">.</span><span class="n">objects</span><span class="p">()</span> \ |
| <span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">sourceBucket</span><span class="o">=</span><span class="n">source_bucket</span><span class="p">,</span> <span class="n">sourceObject</span><span class="o">=</span><span class="n">source_object</span><span class="p">,</span> |
| <span class="n">destinationBucket</span><span class="o">=</span><span class="n">destination_bucket</span><span class="p">,</span> |
| <span class="n">destinationObject</span><span class="o">=</span><span class="n">destination_object</span><span class="p">,</span> <span class="n">body</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> \ |
| <span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">False</span> |
| <span class="k">raise</span></div> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.rewrite"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.rewrite">[docs]</a> <span class="k">def</span> <span class="nf">rewrite</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">,</span> <span class="n">destination_bucket</span><span class="p">,</span> |
| <span class="n">destination_object</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Has the same functionality as copy, except that will work on files</span> |
| <span class="sd"> over 5 TB, as well as when copying between locations and/or storage</span> |
| <span class="sd"> classes.</span> |
| |
| <span class="sd"> destination_object can be omitted, in which case source_object is used.</span> |
| |
| <span class="sd"> :param source_bucket: The bucket of the object to copy from.</span> |
| <span class="sd"> :type source_bucket: string</span> |
| <span class="sd"> :param source_object: The object to copy.</span> |
| <span class="sd"> :type source_object: string</span> |
| <span class="sd"> :param destination_bucket: The destination of the object to copied to.</span> |
| <span class="sd"> :type destination_bucket: string</span> |
| <span class="sd"> :param destination_object: The (renamed) path of the object if given.</span> |
| <span class="sd"> Can be omitted; then the same name is used.</span> |
| <span class="sd"> """</span> |
| <span class="n">destination_object</span> <span class="o">=</span> <span class="n">destination_object</span> <span class="ow">or</span> <span class="n">source_object</span> |
| <span class="k">if</span> <span class="p">(</span><span class="n">source_bucket</span> <span class="o">==</span> <span class="n">destination_bucket</span> <span class="ow">and</span> |
| <span class="n">source_object</span> <span class="o">==</span> <span class="n">destination_object</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <span class="s1">'Either source/destination bucket or source/destination object '</span> |
| <span class="s1">'must be different, not both the same: bucket=</span><span class="si">%s</span><span class="s1">, object=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> |
| <span class="p">(</span><span class="n">source_bucket</span><span class="p">,</span> <span class="n">source_object</span><span class="p">))</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">source_bucket</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">source_object</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'source_bucket and source_object cannot be empty.'</span><span class="p">)</span> |
| |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="n">request_count</span> <span class="o">=</span> <span class="mi">1</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="n">service</span><span class="o">.</span><span class="n">objects</span><span class="p">()</span> \ |
| <span class="o">.</span><span class="n">rewrite</span><span class="p">(</span><span class="n">sourceBucket</span><span class="o">=</span><span class="n">source_bucket</span><span class="p">,</span> <span class="n">sourceObject</span><span class="o">=</span><span class="n">source_object</span><span class="p">,</span> |
| <span class="n">destinationBucket</span><span class="o">=</span><span class="n">destination_bucket</span><span class="p">,</span> |
| <span class="n">destinationObject</span><span class="o">=</span><span class="n">destination_object</span><span class="p">,</span> <span class="n">body</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> \ |
| <span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Rewrite request #</span><span class="si">%s</span><span class="s1">: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">request_count</span><span class="p">,</span> <span class="n">result</span><span class="p">)</span> |
| <span class="k">while</span> <span class="ow">not</span> <span class="n">result</span><span class="p">[</span><span class="s1">'done'</span><span class="p">]:</span> |
| <span class="n">request_count</span> <span class="o">+=</span> <span class="mi">1</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="n">service</span><span class="o">.</span><span class="n">objects</span><span class="p">()</span> \ |
| <span class="o">.</span><span class="n">rewrite</span><span class="p">(</span><span class="n">sourceBucket</span><span class="o">=</span><span class="n">source_bucket</span><span class="p">,</span> <span class="n">sourceObject</span><span class="o">=</span><span class="n">source_object</span><span class="p">,</span> |
| <span class="n">destinationBucket</span><span class="o">=</span><span class="n">destination_bucket</span><span class="p">,</span> |
| <span class="n">destinationObject</span><span class="o">=</span><span class="n">destination_object</span><span class="p">,</span> |
| <span class="n">rewriteToken</span><span class="o">=</span><span class="n">result</span><span class="p">[</span><span class="s1">'rewriteToken'</span><span class="p">],</span> <span class="n">body</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span> \ |
| <span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Rewrite request #</span><span class="si">%s</span><span class="s1">: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">request_count</span><span class="p">,</span> <span class="n">result</span><span class="p">)</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">False</span> |
| <span class="k">raise</span></div> |
| |
| <span class="c1"># pylint:disable=redefined-builtin</span> |
| <div class="viewcode-block" id="GoogleCloudStorageHook.download"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.download">[docs]</a> <span class="k">def</span> <span class="nf">download</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Get a file from Google Cloud Storage.</span> |
| |
| <span class="sd"> :param bucket: The bucket to fetch from.</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: The object to fetch.</span> |
| <span class="sd"> :type object: string</span> |
| <span class="sd"> :param filename: If set, a local file path where the file should be written to.</span> |
| <span class="sd"> :type filename: string</span> |
| <span class="sd"> """</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="n">downloaded_file_bytes</span> <span class="o">=</span> <span class="n">service</span> \ |
| <span class="o">.</span><span class="n">objects</span><span class="p">()</span> \ |
| <span class="o">.</span><span class="n">get_media</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span> \ |
| <span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| |
| <span class="c1"># Write the file to local file path, if requested.</span> |
| <span class="k">if</span> <span class="n">filename</span><span class="p">:</span> |
| <span class="n">write_argument</span> <span class="o">=</span> <span class="s1">'wb'</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">downloaded_file_bytes</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">)</span> <span class="k">else</span> <span class="s1">'w'</span> |
| <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">write_argument</span><span class="p">)</span> <span class="k">as</span> <span class="n">file_fd</span><span class="p">:</span> |
| <span class="n">file_fd</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">downloaded_file_bytes</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">downloaded_file_bytes</span></div> |
| |
| <span class="c1"># pylint:disable=redefined-builtin</span> |
| <div class="viewcode-block" id="GoogleCloudStorageHook.upload"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.upload">[docs]</a> <span class="k">def</span> <span class="nf">upload</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">filename</span><span class="p">,</span> |
| <span class="n">mime_type</span><span class="o">=</span><span class="s1">'application/octet-stream'</span><span class="p">,</span> <span class="n">gzip</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Uploads a local file to Google Cloud Storage.</span> |
| |
| <span class="sd"> :param bucket: The bucket to upload to.</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: The object name to set when uploading the local file.</span> |
| <span class="sd"> :type object: string</span> |
| <span class="sd"> :param filename: The local file path to the file to be uploaded.</span> |
| <span class="sd"> :type filename: string</span> |
| <span class="sd"> :param mime_type: The MIME type to set when uploading the file.</span> |
| <span class="sd"> :type mime_type: str</span> |
| <span class="sd"> :param gzip: Option to compress file for upload</span> |
| <span class="sd"> :type gzip: bool</span> |
| <span class="sd"> """</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| |
| <span class="k">if</span> <span class="n">gzip</span><span class="p">:</span> |
| <span class="n">filename_gz</span> <span class="o">=</span> <span class="n">filename</span> <span class="o">+</span> <span class="s1">'.gz'</span> |
| |
| <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f_in</span><span class="p">:</span> |
| <span class="k">with</span> <span class="n">gz</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">filename_gz</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">f_out</span><span class="p">:</span> |
| <span class="n">shutil</span><span class="o">.</span><span class="n">copyfileobj</span><span class="p">(</span><span class="n">f_in</span><span class="p">,</span> <span class="n">f_out</span><span class="p">)</span> |
| <span class="n">filename</span> <span class="o">=</span> <span class="n">filename_gz</span> |
| |
| <span class="n">media</span> <span class="o">=</span> <span class="n">MediaFileUpload</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="n">mime_type</span><span class="p">)</span> |
| |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">service</span> \ |
| <span class="o">.</span><span class="n">objects</span><span class="p">()</span> \ |
| <span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="nb">object</span><span class="p">,</span> <span class="n">media_body</span><span class="o">=</span><span class="n">media</span><span class="p">)</span> \ |
| <span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| |
| <span class="c1"># Clean up gzip file</span> |
| <span class="k">if</span> <span class="n">gzip</span><span class="p">:</span> |
| <span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">False</span> |
| <span class="k">raise</span></div> |
| |
| <span class="c1"># pylint:disable=redefined-builtin</span> |
| <div class="viewcode-block" id="GoogleCloudStorageHook.exists"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.exists">[docs]</a> <span class="k">def</span> <span class="nf">exists</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Checks for the existence of a file in Google Cloud Storage.</span> |
| |
| <span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: The name of the object to check in the Google cloud</span> |
| <span class="sd"> storage bucket.</span> |
| <span class="sd"> :type object: string</span> |
| <span class="sd"> """</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">service</span> \ |
| <span class="o">.</span><span class="n">objects</span><span class="p">()</span> \ |
| <span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span> \ |
| <span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">False</span> |
| <span class="k">raise</span></div> |
| |
| <span class="c1"># pylint:disable=redefined-builtin</span> |
| <div class="viewcode-block" id="GoogleCloudStorageHook.is_updated_after"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.is_updated_after">[docs]</a> <span class="k">def</span> <span class="nf">is_updated_after</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">ts</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Checks if an object is updated in Google Cloud Storage.</span> |
| |
| <span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: The name of the object to check in the Google cloud</span> |
| <span class="sd"> storage bucket.</span> |
| <span class="sd"> :type object: string</span> |
| <span class="sd"> :param ts: The timestamp to check against.</span> |
| <span class="sd"> :type ts: datetime</span> |
| <span class="sd"> """</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">response</span> <span class="o">=</span> <span class="p">(</span><span class="n">service</span> |
| <span class="o">.</span><span class="n">objects</span><span class="p">()</span> |
| <span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span> |
| <span class="o">.</span><span class="n">execute</span><span class="p">())</span> |
| |
| <span class="k">if</span> <span class="s1">'updated'</span> <span class="ow">in</span> <span class="n">response</span><span class="p">:</span> |
| <span class="kn">import</span> <span class="nn">dateutil.parser</span> |
| <span class="kn">import</span> <span class="nn">dateutil.tz</span> |
| |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">ts</span><span class="o">.</span><span class="n">tzinfo</span><span class="p">:</span> |
| <span class="n">ts</span> <span class="o">=</span> <span class="n">ts</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">tzinfo</span><span class="o">=</span><span class="n">dateutil</span><span class="o">.</span><span class="n">tz</span><span class="o">.</span><span class="n">tzutc</span><span class="p">())</span> |
| |
| <span class="n">updated</span> <span class="o">=</span> <span class="n">dateutil</span><span class="o">.</span><span class="n">parser</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">response</span><span class="p">[</span><span class="s1">'updated'</span><span class="p">])</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Verify object date: </span><span class="si">%s</span><span class="s2"> > </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">updated</span><span class="p">,</span> <span class="n">ts</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="n">updated</span> <span class="o">></span> <span class="n">ts</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">raise</span> |
| |
| <span class="k">return</span> <span class="kc">False</span></div> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.delete"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.delete">[docs]</a> <span class="k">def</span> <span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">generation</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Delete an object if versioning is not enabled for the bucket, or if generation</span> |
| <span class="sd"> parameter is used.</span> |
| |
| <span class="sd"> :param bucket: name of the bucket, where the object resides</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: name of the object to delete</span> |
| <span class="sd"> :type object: string</span> |
| <span class="sd"> :param generation: if present, permanently delete the object of this generation</span> |
| <span class="sd"> :type generation: string</span> |
| <span class="sd"> :return: True if succeeded</span> |
| <span class="sd"> """</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">service</span> \ |
| <span class="o">.</span><span class="n">objects</span><span class="p">()</span> \ |
| <span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="o">=</span><span class="nb">object</span><span class="p">,</span> <span class="n">generation</span><span class="o">=</span><span class="n">generation</span><span class="p">)</span> \ |
| <span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| <span class="k">return</span> <span class="kc">True</span> |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">return</span> <span class="kc">False</span> |
| <span class="k">raise</span></div> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.list"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.list">[docs]</a> <span class="k">def</span> <span class="nf">list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="n">versions</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">maxResults</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">prefix</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delimiter</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> List all objects from the bucket with the give string prefix in name</span> |
| |
| <span class="sd"> :param bucket: bucket name</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param versions: if true, list all versions of the objects</span> |
| <span class="sd"> :type versions: boolean</span> |
| <span class="sd"> :param maxResults: max count of items to return in a single page of responses</span> |
| <span class="sd"> :type maxResults: integer</span> |
| <span class="sd"> :param prefix: prefix string which filters objects whose name begin with</span> |
| <span class="sd"> this prefix</span> |
| <span class="sd"> :type prefix: string</span> |
| <span class="sd"> :param delimiter: filters objects based on the delimiter (for e.g '.csv')</span> |
| <span class="sd"> :type delimiter: string</span> |
| <span class="sd"> :return: a stream of object names matching the filtering criteria</span> |
| <span class="sd"> """</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| |
| <span class="n">ids</span> <span class="o">=</span> <span class="nb">list</span><span class="p">()</span> |
| <span class="n">pageToken</span> <span class="o">=</span> <span class="kc">None</span> |
| <span class="k">while</span><span class="p">(</span><span class="kc">True</span><span class="p">):</span> |
| <span class="n">response</span> <span class="o">=</span> <span class="n">service</span><span class="o">.</span><span class="n">objects</span><span class="p">()</span><span class="o">.</span><span class="n">list</span><span class="p">(</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> |
| <span class="n">versions</span><span class="o">=</span><span class="n">versions</span><span class="p">,</span> |
| <span class="n">maxResults</span><span class="o">=</span><span class="n">maxResults</span><span class="p">,</span> |
| <span class="n">pageToken</span><span class="o">=</span><span class="n">pageToken</span><span class="p">,</span> |
| <span class="n">prefix</span><span class="o">=</span><span class="n">prefix</span><span class="p">,</span> |
| <span class="n">delimiter</span><span class="o">=</span><span class="n">delimiter</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| |
| <span class="k">if</span> <span class="s1">'prefixes'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">response</span><span class="p">:</span> |
| <span class="k">if</span> <span class="s1">'items'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">response</span><span class="p">:</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"No items found for prefix: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">prefix</span><span class="p">)</span> |
| <span class="k">break</span> |
| |
| <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">response</span><span class="p">[</span><span class="s1">'items'</span><span class="p">]:</span> |
| <span class="k">if</span> <span class="n">item</span> <span class="ow">and</span> <span class="s1">'name'</span> <span class="ow">in</span> <span class="n">item</span><span class="p">:</span> |
| <span class="n">ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="s1">'name'</span><span class="p">])</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">response</span><span class="p">[</span><span class="s1">'prefixes'</span><span class="p">]:</span> |
| <span class="n">ids</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="s1">'nextPageToken'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">response</span><span class="p">:</span> |
| <span class="c1"># no further pages of results, so stop the loop</span> |
| <span class="k">break</span> |
| |
| <span class="n">pageToken</span> <span class="o">=</span> <span class="n">response</span><span class="p">[</span><span class="s1">'nextPageToken'</span><span class="p">]</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">pageToken</span><span class="p">:</span> |
| <span class="c1"># empty next page token</span> |
| <span class="k">break</span> |
| <span class="k">return</span> <span class="n">ids</span></div> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.get_size"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_size">[docs]</a> <span class="k">def</span> <span class="nf">get_size</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Gets the size of a file in Google Cloud Storage.</span> |
| |
| <span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: The name of the object to check in the Google cloud storage bucket.</span> |
| <span class="sd"> :type object: string</span> |
| |
| <span class="sd"> """</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Checking the file size of object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> |
| <span class="nb">object</span><span class="p">,</span> |
| <span class="n">bucket</span><span class="p">)</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">response</span> <span class="o">=</span> <span class="n">service</span><span class="o">.</span><span class="n">objects</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> |
| <span class="nb">object</span><span class="o">=</span><span class="nb">object</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| |
| <span class="k">if</span> <span class="s1">'name'</span> <span class="ow">in</span> <span class="n">response</span> <span class="ow">and</span> <span class="n">response</span><span class="p">[</span><span class="s1">'name'</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'/'</span><span class="p">:</span> |
| <span class="c1"># Remove Directories & Just check size of files</span> |
| <span class="n">size</span> <span class="o">=</span> <span class="n">response</span><span class="p">[</span><span class="s1">'size'</span><span class="p">]</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'The file size of </span><span class="si">%s</span><span class="s1"> is </span><span class="si">%s</span><span class="s1"> bytes.'</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">size</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">size</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Object is not a file'</span><span class="p">)</span> |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Object Not Found'</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.get_crc32c"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_crc32c">[docs]</a> <span class="k">def</span> <span class="nf">get_crc32c</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Gets the CRC32c checksum of an object in Google Cloud Storage.</span> |
| |
| <span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: The name of the object to check in the Google cloud</span> |
| <span class="sd"> storage bucket.</span> |
| <span class="sd"> :type object: string</span> |
| <span class="sd"> """</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Retrieving the crc32c checksum of '</span> |
| <span class="s1">'object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">response</span> <span class="o">=</span> <span class="n">service</span><span class="o">.</span><span class="n">objects</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> |
| <span class="nb">object</span><span class="o">=</span><span class="nb">object</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| |
| <span class="n">crc32c</span> <span class="o">=</span> <span class="n">response</span><span class="p">[</span><span class="s1">'crc32c'</span><span class="p">]</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'The crc32c checksum of </span><span class="si">%s</span><span class="s1"> is </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">crc32c</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">crc32c</span> |
| |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Object Not Found'</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.get_md5hash"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.get_md5hash">[docs]</a> <span class="k">def</span> <span class="nf">get_md5hash</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="nb">object</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Gets the MD5 hash of an object in Google Cloud Storage.</span> |
| |
| <span class="sd"> :param bucket: The Google cloud storage bucket where the object is.</span> |
| <span class="sd"> :type bucket: string</span> |
| <span class="sd"> :param object: The name of the object to check in the Google cloud</span> |
| <span class="sd"> storage bucket.</span> |
| <span class="sd"> :type object: string</span> |
| <span class="sd"> """</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Retrieving the MD5 hash of '</span> |
| <span class="s1">'object: </span><span class="si">%s</span><span class="s1"> in bucket: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">bucket</span><span class="p">)</span> |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">response</span> <span class="o">=</span> <span class="n">service</span><span class="o">.</span><span class="n">objects</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> |
| <span class="n">bucket</span><span class="o">=</span><span class="n">bucket</span><span class="p">,</span> |
| <span class="nb">object</span><span class="o">=</span><span class="nb">object</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| |
| <span class="n">md5hash</span> <span class="o">=</span> <span class="n">response</span><span class="p">[</span><span class="s1">'md5Hash'</span><span class="p">]</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'The md5Hash of </span><span class="si">%s</span><span class="s1"> is </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="nb">object</span><span class="p">,</span> <span class="n">md5hash</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">md5hash</span> |
| |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">if</span> <span class="n">ex</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'404'</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Object Not Found'</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GoogleCloudStorageHook.create_bucket"><a class="viewcode-back" href="../../../../integration.html#airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.create_bucket">[docs]</a> <span class="k">def</span> <span class="nf">create_bucket</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> |
| <span class="n">bucket_name</span><span class="p">,</span> |
| <span class="n">storage_class</span><span class="o">=</span><span class="s1">'MULTI_REGIONAL'</span><span class="p">,</span> |
| <span class="n">location</span><span class="o">=</span><span class="s1">'US'</span><span class="p">,</span> |
| <span class="n">project_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> |
| <span class="n">labels</span><span class="o">=</span><span class="kc">None</span> |
| <span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Creates a new bucket. Google Cloud Storage uses a flat namespace, so</span> |
| <span class="sd"> you can't create a bucket with a name that is already in use.</span> |
| |
| <span class="sd"> .. seealso::</span> |
| <span class="sd"> For more information, see Bucket Naming Guidelines:</span> |
| <span class="sd"> https://cloud.google.com/storage/docs/bucketnaming.html#requirements</span> |
| |
| <span class="sd"> :param bucket_name: The name of the bucket.</span> |
| <span class="sd"> :type bucket_name: string</span> |
| <span class="sd"> :param storage_class: This defines how objects in the bucket are stored</span> |
| <span class="sd"> and determines the SLA and the cost of storage. Values include</span> |
| |
| <span class="sd"> - ``MULTI_REGIONAL``</span> |
| <span class="sd"> - ``REGIONAL``</span> |
| <span class="sd"> - ``STANDARD``</span> |
| <span class="sd"> - ``NEARLINE``</span> |
| <span class="sd"> - ``COLDLINE``.</span> |
| <span class="sd"> If this value is not specified when the bucket is</span> |
| <span class="sd"> created, it will default to STANDARD.</span> |
| <span class="sd"> :type storage_class: string</span> |
| <span class="sd"> :param location: The location of the bucket.</span> |
| <span class="sd"> Object data for objects in the bucket resides in physical storage</span> |
| <span class="sd"> within this region. Defaults to US.</span> |
| |
| <span class="sd"> .. seealso::</span> |
| <span class="sd"> https://developers.google.com/storage/docs/bucket-locations</span> |
| |
| <span class="sd"> :type location: string</span> |
| <span class="sd"> :param project_id: The ID of the GCP Project.</span> |
| <span class="sd"> :type project_id: string</span> |
| <span class="sd"> :param labels: User-provided labels, in key/value pairs.</span> |
| <span class="sd"> :type labels: dict</span> |
| <span class="sd"> :return: If successful, it returns the ``id`` of the bucket.</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span> |
| <span class="n">storage_classes</span> <span class="o">=</span> <span class="p">[</span> |
| <span class="s1">'MULTI_REGIONAL'</span><span class="p">,</span> |
| <span class="s1">'REGIONAL'</span><span class="p">,</span> |
| <span class="s1">'NEARLINE'</span><span class="p">,</span> |
| <span class="s1">'COLDLINE'</span><span class="p">,</span> |
| <span class="s1">'STANDARD'</span><span class="p">,</span> <span class="c1"># alias for MULTI_REGIONAL/REGIONAL, based on location</span> |
| <span class="p">]</span> |
| |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Creating Bucket: </span><span class="si">%s</span><span class="s1">; Location: </span><span class="si">%s</span><span class="s1">; Storage Class: </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> |
| <span class="n">bucket_name</span><span class="p">,</span> <span class="n">location</span><span class="p">,</span> <span class="n">storage_class</span><span class="p">)</span> |
| <span class="k">if</span> <span class="n">storage_class</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">storage_classes</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span> |
| <span class="s1">'Invalid value (</span><span class="si">{}</span><span class="s1">) passed to storage_class. Value should be '</span> |
| <span class="s1">'one of </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">storage_class</span><span class="p">,</span> <span class="n">storage_classes</span><span class="p">))</span> |
| |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'[a-zA-Z0-9]+'</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Bucket names must start with a number or letter.'</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'[a-zA-Z0-9]+'</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]):</span> |
| <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Bucket names must end with a number or letter.'</span><span class="p">)</span> |
| |
| <span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> |
| <span class="n">bucket_resource</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'name'</span><span class="p">:</span> <span class="n">bucket_name</span><span class="p">,</span> |
| <span class="s1">'location'</span><span class="p">:</span> <span class="n">location</span><span class="p">,</span> |
| <span class="s1">'storageClass'</span><span class="p">:</span> <span class="n">storage_class</span> |
| <span class="p">}</span> |
| |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'The Default Project ID is </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">)</span> |
| |
| <span class="k">if</span> <span class="n">labels</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| <span class="n">bucket_resource</span><span class="p">[</span><span class="s1">'labels'</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span> |
| |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">response</span> <span class="o">=</span> <span class="n">service</span><span class="o">.</span><span class="n">buckets</span><span class="p">()</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span> |
| <span class="n">project</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span> |
| <span class="n">body</span><span class="o">=</span><span class="n">bucket_resource</span> |
| <span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">()</span> |
| |
| <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Bucket: </span><span class="si">%s</span><span class="s1"> created successfully.'</span><span class="p">,</span> <span class="n">bucket_name</span><span class="p">)</span> |
| |
| <span class="k">return</span> <span class="n">response</span><span class="p">[</span><span class="s1">'id'</span><span class="p">]</span> |
| |
| <span class="k">except</span> <span class="n">errors</span><span class="o">.</span><span class="n">HttpError</span> <span class="k">as</span> <span class="n">ex</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span> |
| <span class="s1">'Bucket creation failed. Error was: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">ex</span><span class="o">.</span><span class="n">content</span><span class="p">)</span> |
| <span class="p">)</span></div></div> |
| |
| |
| <span class="k">def</span> <span class="nf">_parse_gcs_url</span><span class="p">(</span><span class="n">gsurl</span><span class="p">):</span> |
| <span class="sd">"""</span> |
| <span class="sd"> Given a Google Cloud Storage URL (gs://<bucket>/<blob>), returns a</span> |
| <span class="sd"> tuple containing the corresponding bucket and blob.</span> |
| <span class="sd"> """</span> |
| <span class="c1"># Python 3</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">urllib.parse</span> <span class="k">import</span> <span class="n">urlparse</span> |
| <span class="c1"># Python 2</span> |
| <span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">urlparse</span> <span class="k">import</span> <span class="n">urlparse</span> |
| |
| <span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">gsurl</span><span class="p">)</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'Please provide a bucket name'</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="n">bucket</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> |
| <span class="c1"># Remove leading '/' but NOT trailing one</span> |
| <span class="n">blob</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">lstrip</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">bucket</span><span class="p">,</span> <span class="n">blob</span> |
| </pre></div> |
| |
| </div> |
| |
| </div> |
| <footer> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p> |
| |
| </p> |
| </div> |
| Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| |
| <script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script> |
| <script type="text/javascript" src="../../../../_static/jquery.js"></script> |
| <script type="text/javascript" src="../../../../_static/underscore.js"></script> |
| <script type="text/javascript" src="../../../../_static/doctools.js"></script> |
| |
| |
| |
| |
| <script type="text/javascript" src="../../../../_static/js/theme.js"></script> |
| |
| <script type="text/javascript"> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| </body> |
| </html> |