blob: 81fd29d3b639bafdd53d83a7bae3717277db6d76 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>airflow.contrib.hooks.bigquery_hook &mdash; Airflow Documentation</title>
<script type="text/javascript" src="../../../../_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script>
<script type="text/javascript" src="../../../../_static/jquery.js"></script>
<script type="text/javascript" src="../../../../_static/underscore.js"></script>
<script type="text/javascript" src="../../../../_static/doctools.js"></script>
<script type="text/javascript" src="../../../../_static/language_data.js"></script>
<script type="text/javascript" src="../../../../_static/js/theme.js"></script>
<link rel="stylesheet" href="../../../../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<script>
document.addEventListener('DOMContentLoaded', function() {
var el = document.getElementById('changelog');
if (el !== null ) {
// [AIRFLOW-...]
el.innerHTML = el.innerHTML.replace(
/\[(AIRFLOW-[\d]+)\]/g,
`<a href="https://issues.apache.org/jira/browse/$1">[$1]</a>`
);
// (#...)
el.innerHTML = el.innerHTML.replace(
/\(#([\d]+)\)/g,
`<a href="https://github.com/apache/airflow/pull/$1">(#$1)</a>`
);
};
})
</script>
<style>
.example-header {
position: relative;
background: #9AAA7A;
padding: 8px 16px;
margin-bottom: 0;
}
.example-header--with-button {
padding-right: 166px;
}
.example-header:after{
content: '';
display: table;
clear: both;
}
.example-title {
display:block;
padding: 4px;
margin-right: 16px;
color: white;
overflow-x: auto;
}
.example-header-button {
top: 8px;
right: 16px;
position: absolute;
}
.example-header + .highlight-python {
margin-top: 0 !important;
}
.viewcode-button {
display: inline-block;
padding: 8px 16px;
border: 0;
margin: 0;
outline: 0;
border-radius: 2px;
-webkit-box-shadow: 0 3px 5px 0 rgba(0,0,0,.3);
box-shadow: 0 3px 6px 0 rgba(0,0,0,.3);
color: #404040;
background-color: #e7e7e7;
cursor: pointer;
font-size: 16px;
font-weight: 500;
line-height: 1;
text-decoration: none;
text-overflow: ellipsis;
overflow: hidden;
text-transform: uppercase;
-webkit-transition: background-color .2s;
transition: background-color .2s;
vertical-align: middle;
white-space: nowrap;
}
.viewcode-button:visited {
color: #404040;
}
.viewcode-button:hover, .viewcode-button:focus {
color: #404040;
background-color: #d6d6d6;
}
</style>
<script type="application/javascript">
window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
ga("create", "UA-140539454-1", "auto");
ga("send", "pageview");
</script>
<script async src="https://www.google-analytics.com/analytics.js"></script>
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../../index.html" class="icon icon-home"> Airflow
</a>
<div class="version">
1.10.4
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../project.html">Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../license.html">License</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorial.html">Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../howto/index.html">How-to Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../ui.html">UI / Screenshots</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../concepts.html">Concepts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../profiling.html">Data Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../cli.html">Command Line Interface</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../scheduler.html">Scheduling &amp; Triggers</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../plugins.html">Plugins</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../security.html">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../timezone.html">Time zones</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api.html">Experimental Rest API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../integration.html">Integration</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../metrics.html">Metrics</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../kubernetes.html">Kubernetes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../lineage.html">Lineage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../changelog.html">Changelog</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../faq.html">FAQ</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../macros.html">Macros reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../_api/index.html">API Reference</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../../index.html">Airflow</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../../index.html">Docs</a> &raquo;</li>
<li><a href="../../../index.html">Module code</a> &raquo;</li>
<li><a href="../hooks.html">airflow.contrib.hooks</a> &raquo;</li>
<li>airflow.contrib.hooks.bigquery_hook</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for airflow.contrib.hooks.bigquery_hook</h1><div class="highlight"><pre>
<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
<span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="c1"># or more contributor license agreements. See the NOTICE file</span>
<span class="c1"># distributed with this work for additional information</span>
<span class="c1"># regarding copyright ownership. The ASF licenses this file</span>
<span class="c1"># to you under the Apache License, Version 2.0 (the</span>
<span class="c1"># &quot;License&quot;); you may not use this file except in compliance</span>
<span class="c1"># with the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing,</span>
<span class="c1"># software distributed under the License is distributed on an</span>
<span class="c1"># &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span>
<span class="c1"># KIND, either express or implied. See the License for the</span>
<span class="c1"># specific language governing permissions and limitations</span>
<span class="c1"># under the License.</span>
<span class="c1">#</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd">This module contains a BigQuery Hook, as well as a very basic PEP 249</span>
<span class="sd">implementation for BigQuery.</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">import</span> <span class="nn">six</span>
<span class="kn">from</span> <span class="nn">builtins</span> <span class="k">import</span> <span class="nb">range</span>
<span class="kn">from</span> <span class="nn">copy</span> <span class="k">import</span> <span class="n">deepcopy</span>
<span class="kn">from</span> <span class="nn">six</span> <span class="k">import</span> <span class="n">iteritems</span>
<span class="kn">from</span> <span class="nn">past.builtins</span> <span class="k">import</span> <span class="n">basestring</span>
<span class="kn">from</span> <span class="nn">airflow</span> <span class="k">import</span> <span class="n">AirflowException</span>
<span class="kn">from</span> <span class="nn">airflow.contrib.hooks.gcp_api_base_hook</span> <span class="k">import</span> <span class="n">GoogleCloudBaseHook</span>
<span class="kn">from</span> <span class="nn">airflow.hooks.dbapi_hook</span> <span class="k">import</span> <span class="n">DbApiHook</span>
<span class="kn">from</span> <span class="nn">airflow.utils.log.logging_mixin</span> <span class="k">import</span> <span class="n">LoggingMixin</span>
<span class="kn">from</span> <span class="nn">googleapiclient.discovery</span> <span class="k">import</span> <span class="n">build</span>
<span class="kn">from</span> <span class="nn">googleapiclient.errors</span> <span class="k">import</span> <span class="n">HttpError</span>
<span class="kn">from</span> <span class="nn">pandas_gbq.gbq</span> <span class="k">import</span> \
<span class="n">_check_google_client_version</span> <span class="k">as</span> <span class="n">gbq_check_google_client_version</span>
<span class="kn">from</span> <span class="nn">pandas_gbq</span> <span class="k">import</span> <span class="n">read_gbq</span>
<span class="kn">from</span> <span class="nn">pandas_gbq.gbq</span> <span class="k">import</span> \
<span class="n">_test_google_api_imports</span> <span class="k">as</span> <span class="n">gbq_test_google_api_imports</span>
<span class="kn">from</span> <span class="nn">pandas_gbq.gbq</span> <span class="k">import</span> <span class="n">GbqConnector</span>
<div class="viewcode-block" id="BigQueryHook"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryHook">[docs]</a><span class="k">class</span> <span class="nc">BigQueryHook</span><span class="p">(</span><span class="n">GoogleCloudBaseHook</span><span class="p">,</span> <span class="n">DbApiHook</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Interact with BigQuery. This hook uses the Google Cloud Platform</span>
<span class="sd"> connection.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="BigQueryHook.conn_name_attr"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryHook.conn_name_attr">[docs]</a> <span class="n">conn_name_attr</span> <span class="o">=</span> <span class="s1">&#39;bigquery_conn_id&#39;</span></div>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">bigquery_conn_id</span><span class="o">=</span><span class="s1">&#39;bigquery_default&#39;</span><span class="p">,</span>
<span class="n">delegate_to</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">use_legacy_sql</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">BigQueryHook</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
<span class="n">gcp_conn_id</span><span class="o">=</span><span class="n">bigquery_conn_id</span><span class="p">,</span> <span class="n">delegate_to</span><span class="o">=</span><span class="n">delegate_to</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span> <span class="o">=</span> <span class="n">use_legacy_sql</span>
<span class="bp">self</span><span class="o">.</span><span class="n">location</span> <span class="o">=</span> <span class="n">location</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_field</span><span class="p">(</span><span class="s1">&#39;num_retries&#39;</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<div class="viewcode-block" id="BigQueryHook.get_conn"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_conn">[docs]</a> <span class="k">def</span> <span class="nf">get_conn</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a BigQuery PEP 249 connection object.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_service</span><span class="p">()</span>
<span class="n">project</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_field</span><span class="p">(</span><span class="s1">&#39;project&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">BigQueryConnection</span><span class="p">(</span>
<span class="n">service</span><span class="o">=</span><span class="n">service</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="n">project</span><span class="p">,</span>
<span class="n">use_legacy_sql</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="p">,</span>
<span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span></div>
<span class="p">)</span>
<div class="viewcode-block" id="BigQueryHook.get_service"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service">[docs]</a> <span class="k">def</span> <span class="nf">get_service</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a BigQuery service object.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">http_authorized</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_authorize</span><span class="p">()</span>
<span class="k">return</span> <span class="n">build</span><span class="p">(</span>
<span class="s1">&#39;bigquery&#39;</span><span class="p">,</span> <span class="s1">&#39;v2&#39;</span><span class="p">,</span> <span class="n">http</span><span class="o">=</span><span class="n">http_authorized</span><span class="p">,</span> <span class="n">cache_discovery</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryHook.insert_rows"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryHook.insert_rows">[docs]</a> <span class="k">def</span> <span class="nf">insert_rows</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">table</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="n">target_fields</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">commit_every</span><span class="o">=</span><span class="mi">1000</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Insertion is currently unsupported. Theoretically, you could use</span>
<span class="sd"> BigQuery&#39;s streaming API to insert rows into a table, but this hasn&#39;t</span>
<span class="sd"> been implemented.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
<div class="viewcode-block" id="BigQueryHook.get_pandas_df"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df">[docs]</a> <span class="k">def</span> <span class="nf">get_pandas_df</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sql</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">dialect</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns a Pandas DataFrame for the results produced by a BigQuery</span>
<span class="sd"> query. The DbApiHook method must be overridden because Pandas</span>
<span class="sd"> doesn&#39;t support PEP 249 connections, except for SQLite. See:</span>
<span class="sd"> https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447</span>
<span class="sd"> https://github.com/pydata/pandas/issues/6900</span>
<span class="sd"> :param sql: The BigQuery SQL to execute.</span>
<span class="sd"> :type sql: str</span>
<span class="sd"> :param parameters: The parameters to render the SQL query with (not</span>
<span class="sd"> used, leave to override superclass method)</span>
<span class="sd"> :type parameters: mapping or iterable</span>
<span class="sd"> :param dialect: Dialect of BigQuery SQL – legacy SQL or standard SQL</span>
<span class="sd"> defaults to use `self.use_legacy_sql` if not specified</span>
<span class="sd"> :type dialect: str in {&#39;legacy&#39;, &#39;standard&#39;}</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">private_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_field</span><span class="p">(</span><span class="s1">&#39;key_path&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_field</span><span class="p">(</span><span class="s1">&#39;keyfile_dict&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="n">dialect</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">dialect</span> <span class="o">=</span> <span class="s1">&#39;legacy&#39;</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span> <span class="k">else</span> <span class="s1">&#39;standard&#39;</span>
<span class="k">return</span> <span class="n">read_gbq</span><span class="p">(</span><span class="n">sql</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_get_field</span><span class="p">(</span><span class="s1">&#39;project&#39;</span><span class="p">),</span>
<span class="n">dialect</span><span class="o">=</span><span class="n">dialect</span><span class="p">,</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">private_key</span><span class="o">=</span><span class="n">private_key</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryHook.table_exists"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryHook.table_exists">[docs]</a> <span class="k">def</span> <span class="nf">table_exists</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks for the existence of a table in Google BigQuery.</span>
<span class="sd"> :param project_id: The Google cloud project in which to look for the</span>
<span class="sd"> table. The connection supplied to the hook must provide access to</span>
<span class="sd"> the specified project.</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :param dataset_id: The name of the dataset in which to look for the</span>
<span class="sd"> table.</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :param table_id: The name of the table to check the existence of.</span>
<span class="sd"> :type table_id: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">service</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_service</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span> <span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">tableId</span><span class="o">=</span><span class="n">table_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">if</span> <span class="n">e</span><span class="o">.</span><span class="n">resp</span><span class="p">[</span><span class="s1">&#39;status&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;404&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">raise</span></div></div>
<div class="viewcode-block" id="BigQueryPandasConnector"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryPandasConnector">[docs]</a><span class="k">class</span> <span class="nc">BigQueryPandasConnector</span><span class="p">(</span><span class="n">GbqConnector</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This connector behaves identically to GbqConnector (from Pandas), except</span>
<span class="sd"> that it allows the service to be injected, and disables a call to</span>
<span class="sd"> self.get_credentials(). This allows Airflow to use BigQuery with Pandas</span>
<span class="sd"> without forcing a three legged OAuth connection. Instead, we can inject</span>
<span class="sd"> service account credentials into the binding.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span>
<span class="n">service</span><span class="p">,</span>
<span class="n">reauth</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">dialect</span><span class="o">=</span><span class="s1">&#39;legacy&#39;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">BigQueryPandasConnector</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">project_id</span><span class="p">)</span>
<span class="n">gbq_check_google_client_version</span><span class="p">()</span>
<span class="n">gbq_test_google_api_imports</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">project_id</span> <span class="o">=</span> <span class="n">project_id</span>
<span class="bp">self</span><span class="o">.</span><span class="n">reauth</span> <span class="o">=</span> <span class="n">reauth</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span> <span class="o">=</span> <span class="n">service</span>
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dialect</span> <span class="o">=</span> <span class="n">dialect</span></div>
<div class="viewcode-block" id="BigQueryConnection"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryConnection">[docs]</a><span class="k">class</span> <span class="nc">BigQueryConnection</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> BigQuery does not have a notion of a persistent connection. Thus, these</span>
<span class="sd"> objects are small stateless factories for cursors, which do all the real</span>
<span class="sd"> work.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_args</span> <span class="o">=</span> <span class="n">args</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_kwargs</span> <span class="o">=</span> <span class="n">kwargs</span>
<div class="viewcode-block" id="BigQueryConnection.close"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryConnection.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; BigQueryConnection does not have anything to close. &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="BigQueryConnection.commit"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryConnection.commit">[docs]</a> <span class="k">def</span> <span class="nf">commit</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; BigQueryConnection does not support transactions. &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="BigQueryConnection.cursor"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryConnection.cursor">[docs]</a> <span class="k">def</span> <span class="nf">cursor</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Return a new :py:class:`Cursor` object using the connection. &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">BigQueryCursor</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">_args</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">_kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryConnection.rollback"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryConnection.rollback">[docs]</a> <span class="k">def</span> <span class="nf">rollback</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="s2">&quot;BigQueryConnection does not have transactions&quot;</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="BigQueryBaseCursor"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor">[docs]</a><span class="k">class</span> <span class="nc">BigQueryBaseCursor</span><span class="p">(</span><span class="n">LoggingMixin</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The BigQuery base cursor contains helper methods to execute queries against</span>
<span class="sd"> BigQuery. The methods can be used directly by operators, in cases where a</span>
<span class="sd"> PEP 249 cursor isn&#39;t needed.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">service</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span>
<span class="n">use_legacy_sql</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">api_resource_configs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">num_retries</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span> <span class="o">=</span> <span class="n">service</span>
<span class="bp">self</span><span class="o">.</span><span class="n">project_id</span> <span class="o">=</span> <span class="n">project_id</span>
<span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span> <span class="o">=</span> <span class="n">use_legacy_sql</span>
<span class="k">if</span> <span class="n">api_resource_configs</span><span class="p">:</span>
<span class="n">_validate_value</span><span class="p">(</span><span class="s2">&quot;api_resource_configs&quot;</span><span class="p">,</span> <span class="n">api_resource_configs</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">api_resource_configs</span> <span class="o">=</span> <span class="n">api_resource_configs</span> \
<span class="k">if</span> <span class="n">api_resource_configs</span> <span class="k">else</span> <span class="p">{}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">location</span> <span class="o">=</span> <span class="n">location</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span> <span class="o">=</span> <span class="n">num_retries</span>
<div class="viewcode-block" id="BigQueryBaseCursor.create_empty_table"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.create_empty_table">[docs]</a> <span class="k">def</span> <span class="nf">create_empty_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="p">,</span>
<span class="n">table_id</span><span class="p">,</span>
<span class="n">schema_fields</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">time_partitioning</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">cluster_fields</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">view</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">num_retries</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates a new, empty table in the dataset.</span>
<span class="sd"> To create a view, which is defined by a SQL query, parse a dictionary to &#39;view&#39; kwarg</span>
<span class="sd"> :param project_id: The project to create the table into.</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :param dataset_id: The dataset to create the table into.</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :param table_id: The Name of the table to be created.</span>
<span class="sd"> :type table_id: str</span>
<span class="sd"> :param schema_fields: If set, the schema field list as defined here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</span>
<span class="sd"> :type schema_fields: list</span>
<span class="sd"> :param labels: a dictionary containing labels for the table, passed to BigQuery</span>
<span class="sd"> :type labels: dict</span>
<span class="sd"> **Example**: ::</span>
<span class="sd"> schema_fields=[{&quot;name&quot;: &quot;emp_name&quot;, &quot;type&quot;: &quot;STRING&quot;, &quot;mode&quot;: &quot;REQUIRED&quot;},</span>
<span class="sd"> {&quot;name&quot;: &quot;salary&quot;, &quot;type&quot;: &quot;INTEGER&quot;, &quot;mode&quot;: &quot;NULLABLE&quot;}]</span>
<span class="sd"> :param time_partitioning: configure optional time partitioning fields i.e.</span>
<span class="sd"> partition by field, type and expiration as per API specifications.</span>
<span class="sd"> .. seealso::</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning</span>
<span class="sd"> :type time_partitioning: dict</span>
<span class="sd"> :param cluster_fields: [Optional] The fields used for clustering.</span>
<span class="sd"> Must be specified with time_partitioning, data in the table will be first</span>
<span class="sd"> partitioned and subsequently clustered.</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields</span>
<span class="sd"> :type cluster_fields: list</span>
<span class="sd"> :param view: [Optional] A dictionary containing definition for the view.</span>
<span class="sd"> If set, it will create a view instead of a table:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#view</span>
<span class="sd"> :type view: dict</span>
<span class="sd"> **Example**: ::</span>
<span class="sd"> view = {</span>
<span class="sd"> &quot;query&quot;: &quot;SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 1000&quot;,</span>
<span class="sd"> &quot;useLegacySql&quot;: False</span>
<span class="sd"> }</span>
<span class="sd"> :return: None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="n">table_resource</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;tableReference&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span> <span class="n">table_id</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">schema_fields</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;schema&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="n">schema_fields</span><span class="p">}</span>
<span class="k">if</span> <span class="n">time_partitioning</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;timePartitioning&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_partitioning</span>
<span class="k">if</span> <span class="n">cluster_fields</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;clustering&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="n">cluster_fields</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">labels</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;labels&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span>
<span class="k">if</span> <span class="n">view</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;view&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">view</span>
<span class="n">num_retries</span> <span class="o">=</span> <span class="n">num_retries</span> <span class="k">if</span> <span class="n">num_retries</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Creating Table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">body</span><span class="o">=</span><span class="n">table_resource</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Table created successfully: </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">)</span></div>
<span class="p">)</span>
<div class="viewcode-block" id="BigQueryBaseCursor.create_external_table"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.create_external_table">[docs]</a> <span class="k">def</span> <span class="nf">create_external_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">external_project_dataset_table</span><span class="p">,</span>
<span class="n">schema_fields</span><span class="p">,</span>
<span class="n">source_uris</span><span class="p">,</span>
<span class="n">source_format</span><span class="o">=</span><span class="s1">&#39;CSV&#39;</span><span class="p">,</span>
<span class="n">autodetect</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">compression</span><span class="o">=</span><span class="s1">&#39;NONE&#39;</span><span class="p">,</span>
<span class="n">ignore_unknown_values</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">max_bad_records</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">skip_leading_rows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">field_delimiter</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">,</span>
<span class="n">quote_character</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">allow_quoted_newlines</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">allow_jagged_rows</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">src_fmt_configs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="kc">None</span>
<span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates a new external table in the dataset with the data in Google</span>
<span class="sd"> Cloud Storage. See here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource</span>
<span class="sd"> for more details about these parameters.</span>
<span class="sd"> :param external_project_dataset_table:</span>
<span class="sd"> The dotted ``(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt;($&lt;partition&gt;)`` BigQuery</span>
<span class="sd"> table name to create external table.</span>
<span class="sd"> If ``&lt;project&gt;`` is not included, project will be the</span>
<span class="sd"> project defined in the connection json.</span>
<span class="sd"> :type external_project_dataset_table: str</span>
<span class="sd"> :param schema_fields: The schema field list as defined here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource</span>
<span class="sd"> :type schema_fields: list</span>
<span class="sd"> :param source_uris: The source Google Cloud</span>
<span class="sd"> Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild</span>
<span class="sd"> per-object name can be used.</span>
<span class="sd"> :type source_uris: list</span>
<span class="sd"> :param source_format: File format to export.</span>
<span class="sd"> :type source_format: str</span>
<span class="sd"> :param autodetect: Try to detect schema and format options automatically.</span>
<span class="sd"> Any option specified explicitly will be honored.</span>
<span class="sd"> :type autodetect: bool</span>
<span class="sd"> :param compression: [Optional] The compression type of the data source.</span>
<span class="sd"> Possible values include GZIP and NONE.</span>
<span class="sd"> The default value is NONE.</span>
<span class="sd"> This setting is ignored for Google Cloud Bigtable,</span>
<span class="sd"> Google Cloud Datastore backups and Avro formats.</span>
<span class="sd"> :type compression: str</span>
<span class="sd"> :param ignore_unknown_values: [Optional] Indicates if BigQuery should allow</span>
<span class="sd"> extra values that are not represented in the table schema.</span>
<span class="sd"> If true, the extra values are ignored. If false, records with extra columns</span>
<span class="sd"> are treated as bad records, and if there are too many bad records, an</span>
<span class="sd"> invalid error is returned in the job result.</span>
<span class="sd"> :type ignore_unknown_values: bool</span>
<span class="sd"> :param max_bad_records: The maximum number of bad records that BigQuery can</span>
<span class="sd"> ignore when running the job.</span>
<span class="sd"> :type max_bad_records: int</span>
<span class="sd"> :param skip_leading_rows: Number of rows to skip when loading from a CSV.</span>
<span class="sd"> :type skip_leading_rows: int</span>
<span class="sd"> :param field_delimiter: The delimiter to use when loading from a CSV.</span>
<span class="sd"> :type field_delimiter: str</span>
<span class="sd"> :param quote_character: The value that is used to quote data sections in a CSV</span>
<span class="sd"> file.</span>
<span class="sd"> :type quote_character: str</span>
<span class="sd"> :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not</span>
<span class="sd"> (false).</span>
<span class="sd"> :type allow_quoted_newlines: bool</span>
<span class="sd"> :param allow_jagged_rows: Accept rows that are missing trailing optional columns.</span>
<span class="sd"> The missing values are treated as nulls. If false, records with missing</span>
<span class="sd"> trailing columns are treated as bad records, and if there are too many bad</span>
<span class="sd"> records, an invalid error is returned in the job result. Only applicable when</span>
<span class="sd"> soure_format is CSV.</span>
<span class="sd"> :type allow_jagged_rows: bool</span>
<span class="sd"> :param src_fmt_configs: configure optional fields specific to the source format</span>
<span class="sd"> :type src_fmt_configs: dict</span>
<span class="sd"> :param labels: a dictionary containing labels for the table, passed to BigQuery</span>
<span class="sd"> :type labels: dict</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">src_fmt_configs</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">src_fmt_configs</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">external_table_id</span> <span class="o">=</span> \
<span class="n">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="o">=</span><span class="n">external_project_dataset_table</span><span class="p">,</span>
<span class="n">default_project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">var_name</span><span class="o">=</span><span class="s1">&#39;external_project_dataset_table&#39;</span><span class="p">)</span>
<span class="c1"># bigquery only allows certain source formats</span>
<span class="c1"># we check to make sure the passed source format is valid</span>
<span class="c1"># if it&#39;s not, we raise a ValueError</span>
<span class="c1"># Refer to this link for more details:</span>
<span class="c1"># https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat</span>
<span class="n">source_format</span> <span class="o">=</span> <span class="n">source_format</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
<span class="n">allowed_formats</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;CSV&quot;</span><span class="p">,</span> <span class="s2">&quot;NEWLINE_DELIMITED_JSON&quot;</span><span class="p">,</span> <span class="s2">&quot;AVRO&quot;</span><span class="p">,</span> <span class="s2">&quot;GOOGLE_SHEETS&quot;</span><span class="p">,</span>
<span class="s2">&quot;DATASTORE_BACKUP&quot;</span><span class="p">,</span> <span class="s2">&quot;PARQUET&quot;</span>
<span class="p">]</span>
<span class="k">if</span> <span class="n">source_format</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">allowed_formats</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{0}</span><span class="s2"> is not a valid source format. &quot;</span>
<span class="s2">&quot;Please use one of the following types: </span><span class="si">{1}</span><span class="s2">&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">source_format</span><span class="p">,</span> <span class="n">allowed_formats</span><span class="p">))</span>
<span class="n">compression</span> <span class="o">=</span> <span class="n">compression</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
<span class="n">allowed_compressions</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;NONE&#39;</span><span class="p">,</span> <span class="s1">&#39;GZIP&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">compression</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">allowed_compressions</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{0}</span><span class="s2"> is not a valid compression format. &quot;</span>
<span class="s2">&quot;Please use one of the following types: </span><span class="si">{1}</span><span class="s2">&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">compression</span><span class="p">,</span> <span class="n">allowed_compressions</span><span class="p">))</span>
<span class="n">table_resource</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;externalDataConfiguration&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;autodetect&#39;</span><span class="p">:</span> <span class="n">autodetect</span><span class="p">,</span>
<span class="s1">&#39;sourceFormat&#39;</span><span class="p">:</span> <span class="n">source_format</span><span class="p">,</span>
<span class="s1">&#39;sourceUris&#39;</span><span class="p">:</span> <span class="n">source_uris</span><span class="p">,</span>
<span class="s1">&#39;compression&#39;</span><span class="p">:</span> <span class="n">compression</span><span class="p">,</span>
<span class="s1">&#39;ignoreUnknownValues&#39;</span><span class="p">:</span> <span class="n">ignore_unknown_values</span>
<span class="p">},</span>
<span class="s1">&#39;tableReference&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;projectId&#39;</span><span class="p">:</span> <span class="n">project_id</span><span class="p">,</span>
<span class="s1">&#39;datasetId&#39;</span><span class="p">:</span> <span class="n">dataset_id</span><span class="p">,</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span> <span class="n">external_table_id</span><span class="p">,</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">schema_fields</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;externalDataConfiguration&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">({</span>
<span class="s1">&#39;schema&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="n">schema_fields</span>
<span class="p">}</span>
<span class="p">})</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Creating external table: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">external_project_dataset_table</span><span class="p">)</span>
<span class="k">if</span> <span class="n">max_bad_records</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;externalDataConfiguration&#39;</span><span class="p">][</span><span class="s1">&#39;maxBadRecords&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">max_bad_records</span>
<span class="c1"># if following fields are not specified in src_fmt_configs,</span>
<span class="c1"># honor the top-level params for backward-compatibility</span>
<span class="k">if</span> <span class="s1">&#39;skipLeadingRows&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;skipLeadingRows&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">skip_leading_rows</span>
<span class="k">if</span> <span class="s1">&#39;fieldDelimiter&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;fieldDelimiter&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">field_delimiter</span>
<span class="k">if</span> <span class="s1">&#39;quote_character&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;quote&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">quote_character</span>
<span class="k">if</span> <span class="s1">&#39;allowQuotedNewlines&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;allowQuotedNewlines&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">allow_quoted_newlines</span>
<span class="k">if</span> <span class="s1">&#39;allowJaggedRows&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;allowJaggedRows&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">allow_jagged_rows</span>
<span class="n">src_fmt_to_param_mapping</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;CSV&#39;</span><span class="p">:</span> <span class="s1">&#39;csvOptions&#39;</span><span class="p">,</span>
<span class="s1">&#39;GOOGLE_SHEETS&#39;</span><span class="p">:</span> <span class="s1">&#39;googleSheetsOptions&#39;</span>
<span class="p">}</span>
<span class="n">src_fmt_to_configs_mapping</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;csvOptions&#39;</span><span class="p">:</span> <span class="p">[</span>
<span class="s1">&#39;allowJaggedRows&#39;</span><span class="p">,</span> <span class="s1">&#39;allowQuotedNewlines&#39;</span><span class="p">,</span>
<span class="s1">&#39;fieldDelimiter&#39;</span><span class="p">,</span> <span class="s1">&#39;skipLeadingRows&#39;</span><span class="p">,</span>
<span class="s1">&#39;quote&#39;</span>
<span class="p">],</span>
<span class="s1">&#39;googleSheetsOptions&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;skipLeadingRows&#39;</span><span class="p">]</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">source_format</span> <span class="ow">in</span> <span class="n">src_fmt_to_param_mapping</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">valid_configs</span> <span class="o">=</span> <span class="n">src_fmt_to_configs_mapping</span><span class="p">[</span>
<span class="n">src_fmt_to_param_mapping</span><span class="p">[</span><span class="n">source_format</span><span class="p">]</span>
<span class="p">]</span>
<span class="n">src_fmt_configs</span> <span class="o">=</span> <span class="p">{</span>
<span class="n">k</span><span class="p">:</span> <span class="n">v</span>
<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">valid_configs</span>
<span class="p">}</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;externalDataConfiguration&#39;</span><span class="p">][</span><span class="n">src_fmt_to_param_mapping</span><span class="p">[</span>
<span class="n">source_format</span><span class="p">]]</span> <span class="o">=</span> <span class="n">src_fmt_configs</span>
<span class="k">if</span> <span class="n">labels</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;labels&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">body</span><span class="o">=</span><span class="n">table_resource</span>
<span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;External table created successfully: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">external_project_dataset_table</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">)</span></div>
<span class="p">)</span>
<div class="viewcode-block" id="BigQueryBaseCursor.patch_table"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.patch_table">[docs]</a> <span class="k">def</span> <span class="nf">patch_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">dataset_id</span><span class="p">,</span>
<span class="n">table_id</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">description</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">expiration_time</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">external_data_configuration</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">friendly_name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">schema</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">time_partitioning</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">view</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">require_partition_filter</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Patch information in an existing table.</span>
<span class="sd"> It only updates fileds that are provided in the request object.</span>
<span class="sd"> Reference: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch</span>
<span class="sd"> :param dataset_id: The dataset containing the table to be patched.</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :param table_id: The Name of the table to be patched.</span>
<span class="sd"> :type table_id: str</span>
<span class="sd"> :param project_id: The project containing the table to be patched.</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :param description: [Optional] A user-friendly description of this table.</span>
<span class="sd"> :type description: str</span>
<span class="sd"> :param expiration_time: [Optional] The time when this table expires,</span>
<span class="sd"> in milliseconds since the epoch.</span>
<span class="sd"> :type expiration_time: int</span>
<span class="sd"> :param external_data_configuration: [Optional] A dictionary containing</span>
<span class="sd"> properties of a table stored outside of BigQuery.</span>
<span class="sd"> :type external_data_configuration: dict</span>
<span class="sd"> :param friendly_name: [Optional] A descriptive name for this table.</span>
<span class="sd"> :type friendly_name: str</span>
<span class="sd"> :param labels: [Optional] A dictionary containing labels associated with this table.</span>
<span class="sd"> :type labels: dict</span>
<span class="sd"> :param schema: [Optional] If set, the schema field list as defined here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema</span>
<span class="sd"> The supported schema modifications and unsupported schema modification are listed here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/managing-table-schemas</span>
<span class="sd"> **Example**: ::</span>
<span class="sd"> schema=[{&quot;name&quot;: &quot;emp_name&quot;, &quot;type&quot;: &quot;STRING&quot;, &quot;mode&quot;: &quot;REQUIRED&quot;},</span>
<span class="sd"> {&quot;name&quot;: &quot;salary&quot;, &quot;type&quot;: &quot;INTEGER&quot;, &quot;mode&quot;: &quot;NULLABLE&quot;}]</span>
<span class="sd"> :type schema: list</span>
<span class="sd"> :param time_partitioning: [Optional] A dictionary containing time-based partitioning</span>
<span class="sd"> definition for the table.</span>
<span class="sd"> :type time_partitioning: dict</span>
<span class="sd"> :param view: [Optional] A dictionary containing definition for the view.</span>
<span class="sd"> If set, it will patch a view instead of a table:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#view</span>
<span class="sd"> **Example**: ::</span>
<span class="sd"> view = {</span>
<span class="sd"> &quot;query&quot;: &quot;SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 500&quot;,</span>
<span class="sd"> &quot;useLegacySql&quot;: False</span>
<span class="sd"> }</span>
<span class="sd"> :type view: dict</span>
<span class="sd"> :param require_partition_filter: [Optional] If true, queries over the this table require a</span>
<span class="sd"> partition filter. If false, queries over the table</span>
<span class="sd"> :type require_partition_filter: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="n">table_resource</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="n">description</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;description&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">description</span>
<span class="k">if</span> <span class="n">expiration_time</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;expirationTime&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">expiration_time</span>
<span class="k">if</span> <span class="n">external_data_configuration</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;externalDataConfiguration&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">external_data_configuration</span>
<span class="k">if</span> <span class="n">friendly_name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;friendlyName&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">friendly_name</span>
<span class="k">if</span> <span class="n">labels</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;labels&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span>
<span class="k">if</span> <span class="n">schema</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;schema&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="n">schema</span><span class="p">}</span>
<span class="k">if</span> <span class="n">time_partitioning</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;timePartitioning&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">time_partitioning</span>
<span class="k">if</span> <span class="n">view</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;view&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">view</span>
<span class="k">if</span> <span class="n">require_partition_filter</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;requirePartitionFilter&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">require_partition_filter</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Patching Table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span><span class="o">.</span><span class="n">patch</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">tableId</span><span class="o">=</span><span class="n">table_id</span><span class="p">,</span>
<span class="n">body</span><span class="o">=</span><span class="n">table_resource</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Table patched successfully: </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">)</span></div>
<span class="p">)</span>
<div class="viewcode-block" id="BigQueryBaseCursor.run_query"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_query">[docs]</a> <span class="k">def</span> <span class="nf">run_query</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">bql</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">sql</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">destination_dataset_table</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">write_disposition</span><span class="o">=</span><span class="s1">&#39;WRITE_EMPTY&#39;</span><span class="p">,</span>
<span class="n">allow_large_results</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">flatten_results</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">udf_config</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">use_legacy_sql</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">maximum_billing_tier</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">maximum_bytes_billed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">create_disposition</span><span class="o">=</span><span class="s1">&#39;CREATE_IF_NEEDED&#39;</span><span class="p">,</span>
<span class="n">query_params</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">schema_update_options</span><span class="o">=</span><span class="p">(),</span>
<span class="n">priority</span><span class="o">=</span><span class="s1">&#39;INTERACTIVE&#39;</span><span class="p">,</span>
<span class="n">time_partitioning</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">api_resource_configs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">cluster_fields</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Executes a BigQuery SQL query. Optionally persists results in a BigQuery</span>
<span class="sd"> table. See here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/jobs</span>
<span class="sd"> For more details about these parameters.</span>
<span class="sd"> :param bql: (Deprecated. Use `sql` parameter instead) The BigQuery SQL</span>
<span class="sd"> to execute.</span>
<span class="sd"> :type bql: str</span>
<span class="sd"> :param sql: The BigQuery SQL to execute.</span>
<span class="sd"> :type sql: str</span>
<span class="sd"> :param destination_dataset_table: The dotted ``&lt;dataset&gt;.&lt;table&gt;``</span>
<span class="sd"> BigQuery table to save the query results.</span>
<span class="sd"> :type destination_dataset_table: str</span>
<span class="sd"> :param write_disposition: What to do if the table already exists in</span>
<span class="sd"> BigQuery.</span>
<span class="sd"> :type write_disposition: str</span>
<span class="sd"> :param allow_large_results: Whether to allow large results.</span>
<span class="sd"> :type allow_large_results: bool</span>
<span class="sd"> :param flatten_results: If true and query uses legacy SQL dialect, flattens</span>
<span class="sd"> all nested and repeated fields in the query results. ``allowLargeResults``</span>
<span class="sd"> must be true if this is set to false. For standard SQL queries, this</span>
<span class="sd"> flag is ignored and results are never flattened.</span>
<span class="sd"> :type flatten_results: bool</span>
<span class="sd"> :param udf_config: The User Defined Function configuration for the query.</span>
<span class="sd"> See https://cloud.google.com/bigquery/user-defined-functions for details.</span>
<span class="sd"> :type udf_config: list</span>
<span class="sd"> :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false).</span>
<span class="sd"> If `None`, defaults to `self.use_legacy_sql`.</span>
<span class="sd"> :type use_legacy_sql: bool</span>
<span class="sd"> :param api_resource_configs: a dictionary that contain params</span>
<span class="sd"> &#39;configuration&#39; applied for Google BigQuery Jobs API:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs</span>
<span class="sd"> for example, {&#39;query&#39;: {&#39;useQueryCache&#39;: False}}. You could use it</span>
<span class="sd"> if you need to provide some params that are not supported by the</span>
<span class="sd"> BigQueryHook like args.</span>
<span class="sd"> :type api_resource_configs: dict</span>
<span class="sd"> :param maximum_billing_tier: Positive integer that serves as a</span>
<span class="sd"> multiplier of the basic price.</span>
<span class="sd"> :type maximum_billing_tier: int</span>
<span class="sd"> :param maximum_bytes_billed: Limits the bytes billed for this job.</span>
<span class="sd"> Queries that will have bytes billed beyond this limit will fail</span>
<span class="sd"> (without incurring a charge). If unspecified, this will be</span>
<span class="sd"> set to your project default.</span>
<span class="sd"> :type maximum_bytes_billed: float</span>
<span class="sd"> :param create_disposition: Specifies whether the job is allowed to</span>
<span class="sd"> create new tables.</span>
<span class="sd"> :type create_disposition: str</span>
<span class="sd"> :param query_params: a list of dictionary containing query parameter types and</span>
<span class="sd"> values, passed to BigQuery</span>
<span class="sd"> :type query_params: list</span>
<span class="sd"> :param labels: a dictionary containing labels for the job/query,</span>
<span class="sd"> passed to BigQuery</span>
<span class="sd"> :type labels: dict</span>
<span class="sd"> :param schema_update_options: Allows the schema of the destination</span>
<span class="sd"> table to be updated as a side effect of the query job.</span>
<span class="sd"> :type schema_update_options: tuple</span>
<span class="sd"> :param priority: Specifies a priority for the query.</span>
<span class="sd"> Possible values include INTERACTIVE and BATCH.</span>
<span class="sd"> The default value is INTERACTIVE.</span>
<span class="sd"> :type priority: str</span>
<span class="sd"> :param time_partitioning: configure optional time partitioning fields i.e.</span>
<span class="sd"> partition by field, type and expiration as per API specifications.</span>
<span class="sd"> :type time_partitioning: dict</span>
<span class="sd"> :param cluster_fields: Request that the result of this query be stored sorted</span>
<span class="sd"> by one or more columns. This is only available in combination with</span>
<span class="sd"> time_partitioning. The order of columns given determines the sort order.</span>
<span class="sd"> :type cluster_fields: list[str]</span>
<span class="sd"> :param location: The geographic location of the job. Required except for</span>
<span class="sd"> US and EU. See details at</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/locations#specifying_your_location</span>
<span class="sd"> :type location: str</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">time_partitioning</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">time_partitioning</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="n">location</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">location</span> <span class="o">=</span> <span class="n">location</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">api_resource_configs</span><span class="p">:</span>
<span class="n">api_resource_configs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">api_resource_configs</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">_validate_value</span><span class="p">(</span><span class="s1">&#39;api_resource_configs&#39;</span><span class="p">,</span>
<span class="n">api_resource_configs</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span>
<span class="n">configuration</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">api_resource_configs</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;query&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">configuration</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">_validate_value</span><span class="p">(</span><span class="s2">&quot;api_resource_configs[&#39;query&#39;]&quot;</span><span class="p">,</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">],</span> <span class="nb">dict</span><span class="p">)</span>
<span class="n">sql</span> <span class="o">=</span> <span class="n">bql</span> <span class="k">if</span> <span class="n">sql</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">sql</span>
<span class="c1"># TODO remove `bql` in Airflow 2.0 - Jira: [AIRFLOW-2513]</span>
<span class="k">if</span> <span class="n">bql</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">warnings</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s1">&#39;Deprecated parameter `bql` used in &#39;</span>
<span class="s1">&#39;`BigQueryBaseCursor.run_query` &#39;</span>
<span class="s1">&#39;Use `sql` parameter instead to pass the sql to be &#39;</span>
<span class="s1">&#39;executed. `bql` parameter is deprecated and &#39;</span>
<span class="s1">&#39;will be removed in a future version of &#39;</span>
<span class="s1">&#39;Airflow.&#39;</span><span class="p">,</span>
<span class="n">category</span><span class="o">=</span><span class="ne">DeprecationWarning</span><span class="p">)</span>
<span class="k">if</span> <span class="n">sql</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;query&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">&#39;`BigQueryBaseCursor.run_query` &#39;</span>
<span class="s1">&#39;missing 1 required positional argument: `sql`&#39;</span><span class="p">)</span>
<span class="c1"># BigQuery also allows you to define how you want a table&#39;s schema to change</span>
<span class="c1"># as a side effect of a query job</span>
<span class="c1"># for more details:</span>
<span class="c1"># https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.schemaUpdateOptions</span>
<span class="n">allowed_schema_update_options</span> <span class="o">=</span> <span class="p">[</span>
<span class="s1">&#39;ALLOW_FIELD_ADDITION&#39;</span><span class="p">,</span> <span class="s2">&quot;ALLOW_FIELD_RELAXATION&quot;</span>
<span class="p">]</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">set</span><span class="p">(</span><span class="n">allowed_schema_update_options</span>
<span class="p">)</span><span class="o">.</span><span class="n">issuperset</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">schema_update_options</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{0}</span><span class="s2"> contains invalid schema update options. &quot;</span>
<span class="s2">&quot;Please only use one or more of the following &quot;</span>
<span class="s2">&quot;options: </span><span class="si">{1}</span><span class="s2">&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">schema_update_options</span><span class="p">,</span>
<span class="n">allowed_schema_update_options</span><span class="p">))</span>
<span class="k">if</span> <span class="n">schema_update_options</span><span class="p">:</span>
<span class="k">if</span> <span class="n">write_disposition</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;WRITE_APPEND&quot;</span><span class="p">,</span> <span class="s2">&quot;WRITE_TRUNCATE&quot;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;schema_update_options is only &quot;</span>
<span class="s2">&quot;allowed if write_disposition is &quot;</span>
<span class="s2">&quot;&#39;WRITE_APPEND&#39; or &#39;WRITE_TRUNCATE&#39;.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">destination_dataset_table</span><span class="p">:</span>
<span class="n">destination_project</span><span class="p">,</span> <span class="n">destination_dataset</span><span class="p">,</span> <span class="n">destination_table</span> <span class="o">=</span> \
<span class="n">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="o">=</span><span class="n">destination_dataset_table</span><span class="p">,</span>
<span class="n">default_project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">)</span>
<span class="n">destination_dataset_table</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;projectId&#39;</span><span class="p">:</span> <span class="n">destination_project</span><span class="p">,</span>
<span class="s1">&#39;datasetId&#39;</span><span class="p">:</span> <span class="n">destination_dataset</span><span class="p">,</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span> <span class="n">destination_table</span><span class="p">,</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">cluster_fields</span><span class="p">:</span>
<span class="n">cluster_fields</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="n">cluster_fields</span><span class="p">}</span>
<span class="n">query_param_list</span> <span class="o">=</span> <span class="p">[</span>
<span class="p">(</span><span class="n">sql</span><span class="p">,</span> <span class="s1">&#39;query&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">),</span>
<span class="p">(</span><span class="n">priority</span><span class="p">,</span> <span class="s1">&#39;priority&#39;</span><span class="p">,</span> <span class="s1">&#39;INTERACTIVE&#39;</span><span class="p">,</span> <span class="n">six</span><span class="o">.</span><span class="n">string_types</span><span class="p">),</span>
<span class="p">(</span><span class="n">use_legacy_sql</span><span class="p">,</span> <span class="s1">&#39;useLegacySql&#39;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">use_legacy_sql</span><span class="p">,</span> <span class="nb">bool</span><span class="p">),</span>
<span class="p">(</span><span class="n">query_params</span><span class="p">,</span> <span class="s1">&#39;queryParameters&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span>
<span class="p">(</span><span class="n">udf_config</span><span class="p">,</span> <span class="s1">&#39;userDefinedFunctionResources&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">list</span><span class="p">),</span>
<span class="p">(</span><span class="n">maximum_billing_tier</span><span class="p">,</span> <span class="s1">&#39;maximumBillingTier&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">int</span><span class="p">),</span>
<span class="p">(</span><span class="n">maximum_bytes_billed</span><span class="p">,</span> <span class="s1">&#39;maximumBytesBilled&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">float</span><span class="p">),</span>
<span class="p">(</span><span class="n">time_partitioning</span><span class="p">,</span> <span class="s1">&#39;timePartitioning&#39;</span><span class="p">,</span> <span class="p">{},</span> <span class="nb">dict</span><span class="p">),</span>
<span class="p">(</span><span class="n">schema_update_options</span><span class="p">,</span> <span class="s1">&#39;schemaUpdateOptions&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">),</span>
<span class="p">(</span><span class="n">destination_dataset_table</span><span class="p">,</span> <span class="s1">&#39;destinationTable&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">dict</span><span class="p">),</span>
<span class="p">(</span><span class="n">cluster_fields</span><span class="p">,</span> <span class="s1">&#39;clustering&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="nb">dict</span><span class="p">),</span>
<span class="p">]</span>
<span class="k">for</span> <span class="n">param_tuple</span> <span class="ow">in</span> <span class="n">query_param_list</span><span class="p">:</span>
<span class="n">param</span><span class="p">,</span> <span class="n">param_name</span><span class="p">,</span> <span class="n">param_default</span><span class="p">,</span> <span class="n">param_type</span> <span class="o">=</span> <span class="n">param_tuple</span>
<span class="k">if</span> <span class="n">param_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">]</span> <span class="ow">and</span> <span class="n">param</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="p">{},</span> <span class="p">()]:</span>
<span class="k">if</span> <span class="n">param_name</span> <span class="o">==</span> <span class="s1">&#39;timePartitioning&#39;</span><span class="p">:</span>
<span class="n">param_default</span> <span class="o">=</span> <span class="n">_cleanse_time_partitioning</span><span class="p">(</span>
<span class="n">destination_dataset_table</span><span class="p">,</span> <span class="n">time_partitioning</span><span class="p">)</span>
<span class="n">param</span> <span class="o">=</span> <span class="n">param_default</span>
<span class="k">if</span> <span class="n">param</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="p">{},</span> <span class="p">()]:</span>
<span class="n">_api_resource_configs_duplication_check</span><span class="p">(</span>
<span class="n">param_name</span><span class="p">,</span> <span class="n">param</span><span class="p">,</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">])</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">][</span><span class="n">param_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">param</span>
<span class="c1"># check valid type of provided param,</span>
<span class="c1"># it last step because we can get param from 2 sources,</span>
<span class="c1"># and first of all need to find it</span>
<span class="n">_validate_value</span><span class="p">(</span><span class="n">param_name</span><span class="p">,</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">][</span><span class="n">param_name</span><span class="p">],</span>
<span class="n">param_type</span><span class="p">)</span>
<span class="k">if</span> <span class="n">param_name</span> <span class="o">==</span> <span class="s1">&#39;schemaUpdateOptions&#39;</span> <span class="ow">and</span> <span class="n">param</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Adding experimental &#39;schemaUpdateOptions&#39;: &quot;</span>
<span class="s2">&quot;</span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">schema_update_options</span><span class="p">)</span>
<span class="k">if</span> <span class="n">param_name</span> <span class="o">==</span> <span class="s1">&#39;destinationTable&#39;</span><span class="p">:</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;projectId&#39;</span><span class="p">,</span> <span class="s1">&#39;datasetId&#39;</span><span class="p">,</span> <span class="s1">&#39;tableId&#39;</span><span class="p">]:</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">][</span><span class="s1">&#39;destinationTable&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Not correct &#39;destinationTable&#39; in &quot;</span>
<span class="s2">&quot;api_resource_configs. &#39;destinationTable&#39; &quot;</span>
<span class="s2">&quot;must be a dict with {&#39;projectId&#39;:&#39;&#39;, &quot;</span>
<span class="s2">&quot;&#39;datasetId&#39;:&#39;&#39;, &#39;tableId&#39;:&#39;&#39;}&quot;</span><span class="p">)</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">({</span>
<span class="s1">&#39;allowLargeResults&#39;</span><span class="p">:</span> <span class="n">allow_large_results</span><span class="p">,</span>
<span class="s1">&#39;flattenResults&#39;</span><span class="p">:</span> <span class="n">flatten_results</span><span class="p">,</span>
<span class="s1">&#39;writeDisposition&#39;</span><span class="p">:</span> <span class="n">write_disposition</span><span class="p">,</span>
<span class="s1">&#39;createDisposition&#39;</span><span class="p">:</span> <span class="n">create_disposition</span><span class="p">,</span>
<span class="p">})</span>
<span class="k">if</span> <span class="s1">&#39;useLegacySql&#39;</span> <span class="ow">in</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">]</span> <span class="ow">and</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">][</span><span class="s1">&#39;useLegacySql&#39;</span><span class="p">]</span> <span class="ow">and</span>\
<span class="s1">&#39;queryParameters&#39;</span> <span class="ow">in</span> <span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;query&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Query parameters are not allowed &quot;</span>
<span class="s2">&quot;when using legacy SQL&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">labels</span><span class="p">:</span>
<span class="n">_api_resource_configs_duplication_check</span><span class="p">(</span>
<span class="s1">&#39;labels&#39;</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">configuration</span><span class="p">)</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;labels&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">run_with_configuration</span><span class="p">(</span><span class="n">configuration</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.run_extract"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_extract">[docs]</a> <span class="k">def</span> <span class="nf">run_extract</span><span class="p">(</span> <span class="c1"># noqa</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">source_project_dataset_table</span><span class="p">,</span>
<span class="n">destination_cloud_storage_uris</span><span class="p">,</span>
<span class="n">compression</span><span class="o">=</span><span class="s1">&#39;NONE&#39;</span><span class="p">,</span>
<span class="n">export_format</span><span class="o">=</span><span class="s1">&#39;CSV&#39;</span><span class="p">,</span>
<span class="n">field_delimiter</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">,</span>
<span class="n">print_header</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Executes a BigQuery extract command to copy data from BigQuery to</span>
<span class="sd"> Google Cloud Storage. See here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/jobs</span>
<span class="sd"> For more details about these parameters.</span>
<span class="sd"> :param source_project_dataset_table: The dotted ``&lt;dataset&gt;.&lt;table&gt;``</span>
<span class="sd"> BigQuery table to use as the source data.</span>
<span class="sd"> :type source_project_dataset_table: str</span>
<span class="sd"> :param destination_cloud_storage_uris: The destination Google Cloud</span>
<span class="sd"> Storage URI (e.g. gs://some-bucket/some-file.txt). Follows</span>
<span class="sd"> convention defined here:</span>
<span class="sd"> https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple</span>
<span class="sd"> :type destination_cloud_storage_uris: list</span>
<span class="sd"> :param compression: Type of compression to use.</span>
<span class="sd"> :type compression: str</span>
<span class="sd"> :param export_format: File format to export.</span>
<span class="sd"> :type export_format: str</span>
<span class="sd"> :param field_delimiter: The delimiter to use when extracting to a CSV.</span>
<span class="sd"> :type field_delimiter: str</span>
<span class="sd"> :param print_header: Whether to print a header for a CSV file extract.</span>
<span class="sd"> :type print_header: bool</span>
<span class="sd"> :param labels: a dictionary containing labels for the job/query,</span>
<span class="sd"> passed to BigQuery</span>
<span class="sd"> :type labels: dict</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">source_project</span><span class="p">,</span> <span class="n">source_dataset</span><span class="p">,</span> <span class="n">source_table</span> <span class="o">=</span> \
<span class="n">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="o">=</span><span class="n">source_project_dataset_table</span><span class="p">,</span>
<span class="n">default_project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">var_name</span><span class="o">=</span><span class="s1">&#39;source_project_dataset_table&#39;</span><span class="p">)</span>
<span class="n">configuration</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;extract&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;sourceTable&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;projectId&#39;</span><span class="p">:</span> <span class="n">source_project</span><span class="p">,</span>
<span class="s1">&#39;datasetId&#39;</span><span class="p">:</span> <span class="n">source_dataset</span><span class="p">,</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span> <span class="n">source_table</span><span class="p">,</span>
<span class="p">},</span>
<span class="s1">&#39;compression&#39;</span><span class="p">:</span> <span class="n">compression</span><span class="p">,</span>
<span class="s1">&#39;destinationUris&#39;</span><span class="p">:</span> <span class="n">destination_cloud_storage_uris</span><span class="p">,</span>
<span class="s1">&#39;destinationFormat&#39;</span><span class="p">:</span> <span class="n">export_format</span><span class="p">,</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">labels</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;labels&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span>
<span class="k">if</span> <span class="n">export_format</span> <span class="o">==</span> <span class="s1">&#39;CSV&#39;</span><span class="p">:</span>
<span class="c1"># Only set fieldDelimiter and printHeader fields if using CSV.</span>
<span class="c1"># Google does not like it if you set these fields for other export</span>
<span class="c1"># formats.</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;extract&#39;</span><span class="p">][</span><span class="s1">&#39;fieldDelimiter&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">field_delimiter</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;extract&#39;</span><span class="p">][</span><span class="s1">&#39;printHeader&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">print_header</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">run_with_configuration</span><span class="p">(</span><span class="n">configuration</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.run_copy"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_copy">[docs]</a> <span class="k">def</span> <span class="nf">run_copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">source_project_dataset_tables</span><span class="p">,</span>
<span class="n">destination_project_dataset_table</span><span class="p">,</span>
<span class="n">write_disposition</span><span class="o">=</span><span class="s1">&#39;WRITE_EMPTY&#39;</span><span class="p">,</span>
<span class="n">create_disposition</span><span class="o">=</span><span class="s1">&#39;CREATE_IF_NEEDED&#39;</span><span class="p">,</span>
<span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Executes a BigQuery copy command to copy data from one BigQuery table</span>
<span class="sd"> to another. See here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy</span>
<span class="sd"> For more details about these parameters.</span>
<span class="sd"> :param source_project_dataset_tables: One or more dotted</span>
<span class="sd"> ``(project:|project.)&lt;dataset&gt;.&lt;table&gt;``</span>
<span class="sd"> BigQuery tables to use as the source data. Use a list if there are</span>
<span class="sd"> multiple source tables.</span>
<span class="sd"> If ``&lt;project&gt;`` is not included, project will be the project defined</span>
<span class="sd"> in the connection json.</span>
<span class="sd"> :type source_project_dataset_tables: list|string</span>
<span class="sd"> :param destination_project_dataset_table: The destination BigQuery</span>
<span class="sd"> table. Format is: ``(project:|project.)&lt;dataset&gt;.&lt;table&gt;``</span>
<span class="sd"> :type destination_project_dataset_table: str</span>
<span class="sd"> :param write_disposition: The write disposition if the table already exists.</span>
<span class="sd"> :type write_disposition: str</span>
<span class="sd"> :param create_disposition: The create disposition if the table doesn&#39;t exist.</span>
<span class="sd"> :type create_disposition: str</span>
<span class="sd"> :param labels: a dictionary containing labels for the job/query,</span>
<span class="sd"> passed to BigQuery</span>
<span class="sd"> :type labels: dict</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">source_project_dataset_tables</span> <span class="o">=</span> <span class="p">([</span>
<span class="n">source_project_dataset_tables</span>
<span class="p">]</span> <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">source_project_dataset_tables</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="k">else</span>
<span class="n">source_project_dataset_tables</span><span class="p">)</span>
<span class="n">source_project_dataset_tables_fixup</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">source_project_dataset_table</span> <span class="ow">in</span> <span class="n">source_project_dataset_tables</span><span class="p">:</span>
<span class="n">source_project</span><span class="p">,</span> <span class="n">source_dataset</span><span class="p">,</span> <span class="n">source_table</span> <span class="o">=</span> \
<span class="n">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="o">=</span><span class="n">source_project_dataset_table</span><span class="p">,</span>
<span class="n">default_project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">var_name</span><span class="o">=</span><span class="s1">&#39;source_project_dataset_table&#39;</span><span class="p">)</span>
<span class="n">source_project_dataset_tables_fixup</span><span class="o">.</span><span class="n">append</span><span class="p">({</span>
<span class="s1">&#39;projectId&#39;</span><span class="p">:</span>
<span class="n">source_project</span><span class="p">,</span>
<span class="s1">&#39;datasetId&#39;</span><span class="p">:</span>
<span class="n">source_dataset</span><span class="p">,</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span>
<span class="n">source_table</span>
<span class="p">})</span>
<span class="n">destination_project</span><span class="p">,</span> <span class="n">destination_dataset</span><span class="p">,</span> <span class="n">destination_table</span> <span class="o">=</span> \
<span class="n">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="o">=</span><span class="n">destination_project_dataset_table</span><span class="p">,</span>
<span class="n">default_project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">)</span>
<span class="n">configuration</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;copy&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;createDisposition&#39;</span><span class="p">:</span> <span class="n">create_disposition</span><span class="p">,</span>
<span class="s1">&#39;writeDisposition&#39;</span><span class="p">:</span> <span class="n">write_disposition</span><span class="p">,</span>
<span class="s1">&#39;sourceTables&#39;</span><span class="p">:</span> <span class="n">source_project_dataset_tables_fixup</span><span class="p">,</span>
<span class="s1">&#39;destinationTable&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;projectId&#39;</span><span class="p">:</span> <span class="n">destination_project</span><span class="p">,</span>
<span class="s1">&#39;datasetId&#39;</span><span class="p">:</span> <span class="n">destination_dataset</span><span class="p">,</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span> <span class="n">destination_table</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">labels</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;labels&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">labels</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">run_with_configuration</span><span class="p">(</span><span class="n">configuration</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.run_load"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_load">[docs]</a> <span class="k">def</span> <span class="nf">run_load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">destination_project_dataset_table</span><span class="p">,</span>
<span class="n">source_uris</span><span class="p">,</span>
<span class="n">schema_fields</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">source_format</span><span class="o">=</span><span class="s1">&#39;CSV&#39;</span><span class="p">,</span>
<span class="n">create_disposition</span><span class="o">=</span><span class="s1">&#39;CREATE_IF_NEEDED&#39;</span><span class="p">,</span>
<span class="n">skip_leading_rows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">write_disposition</span><span class="o">=</span><span class="s1">&#39;WRITE_EMPTY&#39;</span><span class="p">,</span>
<span class="n">field_delimiter</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">,</span>
<span class="n">max_bad_records</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">quote_character</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_unknown_values</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">allow_quoted_newlines</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">allow_jagged_rows</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">schema_update_options</span><span class="o">=</span><span class="p">(),</span>
<span class="n">src_fmt_configs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">time_partitioning</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">cluster_fields</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">autodetect</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Executes a BigQuery load command to load data from Google Cloud Storage</span>
<span class="sd"> to BigQuery. See here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/jobs</span>
<span class="sd"> For more details about these parameters.</span>
<span class="sd"> :param destination_project_dataset_table:</span>
<span class="sd"> The dotted ``(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt;($&lt;partition&gt;)`` BigQuery</span>
<span class="sd"> table to load data into. If ``&lt;project&gt;`` is not included, project will be the</span>
<span class="sd"> project defined in the connection json. If a partition is specified the</span>
<span class="sd"> operator will automatically append the data, create a new partition or create</span>
<span class="sd"> a new DAY partitioned table.</span>
<span class="sd"> :type destination_project_dataset_table: str</span>
<span class="sd"> :param schema_fields: The schema field list as defined here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load</span>
<span class="sd"> Required if autodetect=False; optional if autodetect=True.</span>
<span class="sd"> :type schema_fields: list</span>
<span class="sd"> :param autodetect: Attempt to autodetect the schema for CSV and JSON</span>
<span class="sd"> source files.</span>
<span class="sd"> :type autodetect: bool</span>
<span class="sd"> :param source_uris: The source Google Cloud</span>
<span class="sd"> Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild</span>
<span class="sd"> per-object name can be used.</span>
<span class="sd"> :type source_uris: list</span>
<span class="sd"> :param source_format: File format to export.</span>
<span class="sd"> :type source_format: str</span>
<span class="sd"> :param create_disposition: The create disposition if the table doesn&#39;t exist.</span>
<span class="sd"> :type create_disposition: str</span>
<span class="sd"> :param skip_leading_rows: Number of rows to skip when loading from a CSV.</span>
<span class="sd"> :type skip_leading_rows: int</span>
<span class="sd"> :param write_disposition: The write disposition if the table already exists.</span>
<span class="sd"> :type write_disposition: str</span>
<span class="sd"> :param field_delimiter: The delimiter to use when loading from a CSV.</span>
<span class="sd"> :type field_delimiter: str</span>
<span class="sd"> :param max_bad_records: The maximum number of bad records that BigQuery can</span>
<span class="sd"> ignore when running the job.</span>
<span class="sd"> :type max_bad_records: int</span>
<span class="sd"> :param quote_character: The value that is used to quote data sections in a CSV</span>
<span class="sd"> file.</span>
<span class="sd"> :type quote_character: str</span>
<span class="sd"> :param ignore_unknown_values: [Optional] Indicates if BigQuery should allow</span>
<span class="sd"> extra values that are not represented in the table schema.</span>
<span class="sd"> If true, the extra values are ignored. If false, records with extra columns</span>
<span class="sd"> are treated as bad records, and if there are too many bad records, an</span>
<span class="sd"> invalid error is returned in the job result.</span>
<span class="sd"> :type ignore_unknown_values: bool</span>
<span class="sd"> :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not</span>
<span class="sd"> (false).</span>
<span class="sd"> :type allow_quoted_newlines: bool</span>
<span class="sd"> :param allow_jagged_rows: Accept rows that are missing trailing optional columns.</span>
<span class="sd"> The missing values are treated as nulls. If false, records with missing</span>
<span class="sd"> trailing columns are treated as bad records, and if there are too many bad</span>
<span class="sd"> records, an invalid error is returned in the job result. Only applicable when</span>
<span class="sd"> soure_format is CSV.</span>
<span class="sd"> :type allow_jagged_rows: bool</span>
<span class="sd"> :param schema_update_options: Allows the schema of the destination</span>
<span class="sd"> table to be updated as a side effect of the load job.</span>
<span class="sd"> :type schema_update_options: tuple</span>
<span class="sd"> :param src_fmt_configs: configure optional fields specific to the source format</span>
<span class="sd"> :type src_fmt_configs: dict</span>
<span class="sd"> :param time_partitioning: configure optional time partitioning fields i.e.</span>
<span class="sd"> partition by field, type and expiration as per API specifications.</span>
<span class="sd"> :type time_partitioning: dict</span>
<span class="sd"> :param cluster_fields: Request that the result of this load be stored sorted</span>
<span class="sd"> by one or more columns. This is only available in combination with</span>
<span class="sd"> time_partitioning. The order of columns given determines the sort order.</span>
<span class="sd"> :type cluster_fields: list[str]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># bigquery only allows certain source formats</span>
<span class="c1"># we check to make sure the passed source format is valid</span>
<span class="c1"># if it&#39;s not, we raise a ValueError</span>
<span class="c1"># Refer to this link for more details:</span>
<span class="c1"># https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat</span>
<span class="k">if</span> <span class="n">schema_fields</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">autodetect</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s1">&#39;You must either pass a schema or autodetect=True.&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">src_fmt_configs</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">src_fmt_configs</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">source_format</span> <span class="o">=</span> <span class="n">source_format</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
<span class="n">allowed_formats</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;CSV&quot;</span><span class="p">,</span> <span class="s2">&quot;NEWLINE_DELIMITED_JSON&quot;</span><span class="p">,</span> <span class="s2">&quot;AVRO&quot;</span><span class="p">,</span> <span class="s2">&quot;GOOGLE_SHEETS&quot;</span><span class="p">,</span>
<span class="s2">&quot;DATASTORE_BACKUP&quot;</span><span class="p">,</span> <span class="s2">&quot;PARQUET&quot;</span>
<span class="p">]</span>
<span class="k">if</span> <span class="n">source_format</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">allowed_formats</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{0}</span><span class="s2"> is not a valid source format. &quot;</span>
<span class="s2">&quot;Please use one of the following types: </span><span class="si">{1}</span><span class="s2">&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">source_format</span><span class="p">,</span> <span class="n">allowed_formats</span><span class="p">))</span>
<span class="c1"># bigquery also allows you to define how you want a table&#39;s schema to change</span>
<span class="c1"># as a side effect of a load</span>
<span class="c1"># for more details:</span>
<span class="c1"># https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schemaUpdateOptions</span>
<span class="n">allowed_schema_update_options</span> <span class="o">=</span> <span class="p">[</span>
<span class="s1">&#39;ALLOW_FIELD_ADDITION&#39;</span><span class="p">,</span> <span class="s2">&quot;ALLOW_FIELD_RELAXATION&quot;</span>
<span class="p">]</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">set</span><span class="p">(</span><span class="n">allowed_schema_update_options</span><span class="p">)</span><span class="o">.</span><span class="n">issuperset</span><span class="p">(</span>
<span class="nb">set</span><span class="p">(</span><span class="n">schema_update_options</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;</span><span class="si">{0}</span><span class="s2"> contains invalid schema update options.&quot;</span>
<span class="s2">&quot;Please only use one or more of the following options: </span><span class="si">{1}</span><span class="s2">&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">schema_update_options</span><span class="p">,</span> <span class="n">allowed_schema_update_options</span><span class="p">))</span>
<span class="n">destination_project</span><span class="p">,</span> <span class="n">destination_dataset</span><span class="p">,</span> <span class="n">destination_table</span> <span class="o">=</span> \
<span class="n">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="o">=</span><span class="n">destination_project_dataset_table</span><span class="p">,</span>
<span class="n">default_project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">var_name</span><span class="o">=</span><span class="s1">&#39;destination_project_dataset_table&#39;</span><span class="p">)</span>
<span class="n">configuration</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;load&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;autodetect&#39;</span><span class="p">:</span> <span class="n">autodetect</span><span class="p">,</span>
<span class="s1">&#39;createDisposition&#39;</span><span class="p">:</span> <span class="n">create_disposition</span><span class="p">,</span>
<span class="s1">&#39;destinationTable&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;projectId&#39;</span><span class="p">:</span> <span class="n">destination_project</span><span class="p">,</span>
<span class="s1">&#39;datasetId&#39;</span><span class="p">:</span> <span class="n">destination_dataset</span><span class="p">,</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span> <span class="n">destination_table</span><span class="p">,</span>
<span class="p">},</span>
<span class="s1">&#39;sourceFormat&#39;</span><span class="p">:</span> <span class="n">source_format</span><span class="p">,</span>
<span class="s1">&#39;sourceUris&#39;</span><span class="p">:</span> <span class="n">source_uris</span><span class="p">,</span>
<span class="s1">&#39;writeDisposition&#39;</span><span class="p">:</span> <span class="n">write_disposition</span><span class="p">,</span>
<span class="s1">&#39;ignoreUnknownValues&#39;</span><span class="p">:</span> <span class="n">ignore_unknown_values</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="n">time_partitioning</span> <span class="o">=</span> <span class="n">_cleanse_time_partitioning</span><span class="p">(</span>
<span class="n">destination_project_dataset_table</span><span class="p">,</span>
<span class="n">time_partitioning</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">time_partitioning</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;load&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">({</span>
<span class="s1">&#39;timePartitioning&#39;</span><span class="p">:</span> <span class="n">time_partitioning</span>
<span class="p">})</span>
<span class="k">if</span> <span class="n">cluster_fields</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;load&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s1">&#39;clustering&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="n">cluster_fields</span><span class="p">}})</span>
<span class="k">if</span> <span class="n">schema_fields</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;load&#39;</span><span class="p">][</span><span class="s1">&#39;schema&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="n">schema_fields</span><span class="p">}</span>
<span class="k">if</span> <span class="n">schema_update_options</span><span class="p">:</span>
<span class="k">if</span> <span class="n">write_disposition</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;WRITE_APPEND&quot;</span><span class="p">,</span> <span class="s2">&quot;WRITE_TRUNCATE&quot;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;schema_update_options is only &quot;</span>
<span class="s2">&quot;allowed if write_disposition is &quot;</span>
<span class="s2">&quot;&#39;WRITE_APPEND&#39; or &#39;WRITE_TRUNCATE&#39;.&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s2">&quot;Adding experimental &#39;schemaUpdateOptions&#39;: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span>
<span class="n">schema_update_options</span>
<span class="p">)</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;load&#39;</span><span class="p">][</span>
<span class="s1">&#39;schemaUpdateOptions&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">schema_update_options</span>
<span class="k">if</span> <span class="n">max_bad_records</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;load&#39;</span><span class="p">][</span><span class="s1">&#39;maxBadRecords&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">max_bad_records</span>
<span class="c1"># if following fields are not specified in src_fmt_configs,</span>
<span class="c1"># honor the top-level params for backward-compatibility</span>
<span class="k">if</span> <span class="s1">&#39;skipLeadingRows&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;skipLeadingRows&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">skip_leading_rows</span>
<span class="k">if</span> <span class="s1">&#39;fieldDelimiter&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;fieldDelimiter&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">field_delimiter</span>
<span class="k">if</span> <span class="s1">&#39;ignoreUnknownValues&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;ignoreUnknownValues&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ignore_unknown_values</span>
<span class="k">if</span> <span class="n">quote_character</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;quote&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">quote_character</span>
<span class="k">if</span> <span class="n">allow_quoted_newlines</span><span class="p">:</span>
<span class="n">src_fmt_configs</span><span class="p">[</span><span class="s1">&#39;allowQuotedNewlines&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">allow_quoted_newlines</span>
<span class="n">src_fmt_to_configs_mapping</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;CSV&#39;</span><span class="p">:</span> <span class="p">[</span>
<span class="s1">&#39;allowJaggedRows&#39;</span><span class="p">,</span> <span class="s1">&#39;allowQuotedNewlines&#39;</span><span class="p">,</span> <span class="s1">&#39;autodetect&#39;</span><span class="p">,</span>
<span class="s1">&#39;fieldDelimiter&#39;</span><span class="p">,</span> <span class="s1">&#39;skipLeadingRows&#39;</span><span class="p">,</span> <span class="s1">&#39;ignoreUnknownValues&#39;</span><span class="p">,</span>
<span class="s1">&#39;nullMarker&#39;</span><span class="p">,</span> <span class="s1">&#39;quote&#39;</span>
<span class="p">],</span>
<span class="s1">&#39;DATASTORE_BACKUP&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;projectionFields&#39;</span><span class="p">],</span>
<span class="s1">&#39;NEWLINE_DELIMITED_JSON&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;autodetect&#39;</span><span class="p">,</span> <span class="s1">&#39;ignoreUnknownValues&#39;</span><span class="p">],</span>
<span class="s1">&#39;PARQUET&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;autodetect&#39;</span><span class="p">,</span> <span class="s1">&#39;ignoreUnknownValues&#39;</span><span class="p">],</span>
<span class="s1">&#39;AVRO&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;useAvroLogicalTypes&#39;</span><span class="p">],</span>
<span class="p">}</span>
<span class="n">valid_configs</span> <span class="o">=</span> <span class="n">src_fmt_to_configs_mapping</span><span class="p">[</span><span class="n">source_format</span><span class="p">]</span>
<span class="n">src_fmt_configs</span> <span class="o">=</span> <span class="p">{</span>
<span class="n">k</span><span class="p">:</span> <span class="n">v</span>
<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">src_fmt_configs</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">valid_configs</span>
<span class="p">}</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;load&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">src_fmt_configs</span><span class="p">)</span>
<span class="k">if</span> <span class="n">allow_jagged_rows</span><span class="p">:</span>
<span class="n">configuration</span><span class="p">[</span><span class="s1">&#39;load&#39;</span><span class="p">][</span><span class="s1">&#39;allowJaggedRows&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">allow_jagged_rows</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">run_with_configuration</span><span class="p">(</span><span class="n">configuration</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.run_with_configuration"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_with_configuration">[docs]</a> <span class="k">def</span> <span class="nf">run_with_configuration</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">configuration</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Executes a BigQuery SQL query. See here:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/jobs</span>
<span class="sd"> For more details about the configuration parameter.</span>
<span class="sd"> :param configuration: The configuration parameter maps directly to</span>
<span class="sd"> BigQuery&#39;s configuration field in the job object. See</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/jobs for</span>
<span class="sd"> details.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">jobs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">jobs</span><span class="p">()</span>
<span class="n">job_data</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;configuration&#39;</span><span class="p">:</span> <span class="n">configuration</span><span class="p">}</span>
<span class="c1"># Send query and wait for reply.</span>
<span class="n">query_reply</span> <span class="o">=</span> <span class="n">jobs</span> \
<span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="n">body</span><span class="o">=</span><span class="n">job_data</span><span class="p">)</span> \
<span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span> <span class="o">=</span> <span class="n">query_reply</span><span class="p">[</span><span class="s1">&#39;jobReference&#39;</span><span class="p">][</span><span class="s1">&#39;jobId&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="s1">&#39;location&#39;</span> <span class="ow">in</span> <span class="n">query_reply</span><span class="p">[</span><span class="s1">&#39;jobReference&#39;</span><span class="p">]:</span>
<span class="n">location</span> <span class="o">=</span> <span class="n">query_reply</span><span class="p">[</span><span class="s1">&#39;jobReference&#39;</span><span class="p">][</span><span class="s1">&#39;location&#39;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">location</span>
<span class="c1"># Wait for query to finish.</span>
<span class="n">keep_polling_job</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">while</span> <span class="n">keep_polling_job</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="n">location</span><span class="p">:</span>
<span class="n">job</span> <span class="o">=</span> <span class="n">jobs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">jobId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">job</span> <span class="o">=</span> <span class="n">jobs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">jobId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">if</span> <span class="n">job</span><span class="p">[</span><span class="s1">&#39;status&#39;</span><span class="p">][</span><span class="s1">&#39;state&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;DONE&#39;</span><span class="p">:</span>
<span class="n">keep_polling_job</span> <span class="o">=</span> <span class="kc">False</span>
<span class="c1"># Check if job had errors.</span>
<span class="k">if</span> <span class="s1">&#39;errorResult&#39;</span> <span class="ow">in</span> <span class="n">job</span><span class="p">[</span><span class="s1">&#39;status&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Final error was: </span><span class="si">{}</span><span class="s1">. The job was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span>
<span class="nb">format</span><span class="p">(</span><span class="n">job</span><span class="p">[</span><span class="s1">&#39;status&#39;</span><span class="p">][</span><span class="s1">&#39;errorResult&#39;</span><span class="p">],</span> <span class="n">job</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Waiting for job to complete : </span><span class="si">%s</span><span class="s1">, </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">if</span> <span class="n">err</span><span class="o">.</span><span class="n">resp</span><span class="o">.</span><span class="n">status</span> <span class="ow">in</span> <span class="p">[</span><span class="mi">500</span><span class="p">,</span> <span class="mi">503</span><span class="p">]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;</span><span class="si">%s</span><span class="s1">: Retryable error, waiting for job to complete: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">err</span><span class="o">.</span><span class="n">resp</span><span class="o">.</span><span class="n">status</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job status check failed. Final error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span>
<span class="nb">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">resp</span><span class="o">.</span><span class="n">status</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.poll_job_complete"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.poll_job_complete">[docs]</a> <span class="k">def</span> <span class="nf">poll_job_complete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">job_id</span><span class="p">):</span>
<span class="n">jobs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">jobs</span><span class="p">()</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="p">:</span>
<span class="n">job</span> <span class="o">=</span> <span class="n">jobs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">jobId</span><span class="o">=</span><span class="n">job_id</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">job</span> <span class="o">=</span> <span class="n">jobs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">jobId</span><span class="o">=</span><span class="n">job_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">if</span> <span class="n">job</span><span class="p">[</span><span class="s1">&#39;status&#39;</span><span class="p">][</span><span class="s1">&#39;state&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;DONE&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">if</span> <span class="n">err</span><span class="o">.</span><span class="n">resp</span><span class="o">.</span><span class="n">status</span> <span class="ow">in</span> <span class="p">[</span><span class="mi">500</span><span class="p">,</span> <span class="mi">503</span><span class="p">]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;</span><span class="si">%s</span><span class="s1">: Retryable error while polling job with id </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">err</span><span class="o">.</span><span class="n">resp</span><span class="o">.</span><span class="n">status</span><span class="p">,</span> <span class="n">job_id</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job status check failed. Final error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span>
<span class="nb">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">resp</span><span class="o">.</span><span class="n">status</span><span class="p">))</span>
<span class="k">return</span> <span class="kc">False</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.cancel_query"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.cancel_query">[docs]</a> <span class="k">def</span> <span class="nf">cancel_query</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Cancel all started queries that have not yet completed</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">jobs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">jobs</span><span class="p">()</span>
<span class="k">if</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span> <span class="ow">and</span>
<span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">poll_job_complete</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Attempting to cancel job : </span><span class="si">%s</span><span class="s1">, </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="p">:</span>
<span class="n">jobs</span><span class="o">.</span><span class="n">cancel</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">jobId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">location</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">jobs</span><span class="o">.</span><span class="n">cancel</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">jobId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;No running BigQuery jobs to cancel.&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="c1"># Wait for all the calls to cancel to finish</span>
<span class="n">max_polling_attempts</span> <span class="o">=</span> <span class="mi">12</span>
<span class="n">polling_attempts</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">job_complete</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">while</span> <span class="n">polling_attempts</span> <span class="o">&lt;</span> <span class="n">max_polling_attempts</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">job_complete</span><span class="p">:</span>
<span class="n">polling_attempts</span> <span class="o">=</span> <span class="n">polling_attempts</span> <span class="o">+</span> <span class="mi">1</span>
<span class="n">job_complete</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">poll_job_complete</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span>
<span class="k">if</span> <span class="n">job_complete</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Job successfully canceled: </span><span class="si">%s</span><span class="s1">, </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">polling_attempts</span> <span class="o">==</span> <span class="n">max_polling_attempts</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s2">&quot;Stopping polling due to timeout. Job with id </span><span class="si">%s</span><span class="s2"> &quot;</span>
<span class="s2">&quot;has not completed cancel and may or may not finish.&quot;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Waiting for canceled job with id </span><span class="si">%s</span><span class="s1"> to finish.&#39;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">running_job_id</span><span class="p">)</span>
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.get_schema"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.get_schema">[docs]</a> <span class="k">def</span> <span class="nf">get_schema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get the schema for a given datset.table.</span>
<span class="sd"> see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource</span>
<span class="sd"> :param dataset_id: the dataset ID of the requested table</span>
<span class="sd"> :param table_id: the table ID of the requested table</span>
<span class="sd"> :return: a table schema</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">tables_resource</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span> \
<span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span> <span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span> <span class="n">tableId</span><span class="o">=</span><span class="n">table_id</span><span class="p">)</span> \
<span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">return</span> <span class="n">tables_resource</span><span class="p">[</span><span class="s1">&#39;schema&#39;</span><span class="p">]</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.get_tabledata"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.get_tabledata">[docs]</a> <span class="k">def</span> <span class="nf">get_tabledata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">,</span>
<span class="n">max_results</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">selected_fields</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">page_token</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">start_index</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Get the data of a given dataset.table and optionally with selected columns.</span>
<span class="sd"> see https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list</span>
<span class="sd"> :param dataset_id: the dataset ID of the requested table.</span>
<span class="sd"> :param table_id: the table ID of the requested table.</span>
<span class="sd"> :param max_results: the maximum results to return.</span>
<span class="sd"> :param selected_fields: List of fields to return (comma-separated). If</span>
<span class="sd"> unspecified, all fields are returned.</span>
<span class="sd"> :param page_token: page token, returned from a previous call,</span>
<span class="sd"> identifying the result set.</span>
<span class="sd"> :param start_index: zero based index of the starting row to read.</span>
<span class="sd"> :return: map containing the requested rows.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">optional_params</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="n">max_results</span><span class="p">:</span>
<span class="n">optional_params</span><span class="p">[</span><span class="s1">&#39;maxResults&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">max_results</span>
<span class="k">if</span> <span class="n">selected_fields</span><span class="p">:</span>
<span class="n">optional_params</span><span class="p">[</span><span class="s1">&#39;selectedFields&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">selected_fields</span>
<span class="k">if</span> <span class="n">page_token</span><span class="p">:</span>
<span class="n">optional_params</span><span class="p">[</span><span class="s1">&#39;pageToken&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">page_token</span>
<span class="k">if</span> <span class="n">start_index</span><span class="p">:</span>
<span class="n">optional_params</span><span class="p">[</span><span class="s1">&#39;startIndex&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">start_index</span>
<span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tabledata</span><span class="p">()</span><span class="o">.</span><span class="n">list</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">tableId</span><span class="o">=</span><span class="n">table_id</span><span class="p">,</span>
<span class="o">**</span><span class="n">optional_params</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">))</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.run_table_delete"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_table_delete">[docs]</a> <span class="k">def</span> <span class="nf">run_table_delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deletion_dataset_table</span><span class="p">,</span>
<span class="n">ignore_if_missing</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Delete an existing table from the dataset;</span>
<span class="sd"> If the table does not exist, return an error unless ignore_if_missing</span>
<span class="sd"> is set to True.</span>
<span class="sd"> :param deletion_dataset_table: A dotted</span>
<span class="sd"> ``(&lt;project&gt;.|&lt;project&gt;:)&lt;dataset&gt;.&lt;table&gt;`` that indicates which table</span>
<span class="sd"> will be deleted.</span>
<span class="sd"> :type deletion_dataset_table: str</span>
<span class="sd"> :param ignore_if_missing: if True, then return success even if the</span>
<span class="sd"> requested table does not exist.</span>
<span class="sd"> :type ignore_if_missing: bool</span>
<span class="sd"> :return:</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">deletion_project</span><span class="p">,</span> <span class="n">deletion_dataset</span><span class="p">,</span> <span class="n">deletion_table</span> <span class="o">=</span> \
<span class="n">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="o">=</span><span class="n">deletion_dataset_table</span><span class="p">,</span>
<span class="n">default_project_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span> \
<span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">projectId</span><span class="o">=</span><span class="n">deletion_project</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">deletion_dataset</span><span class="p">,</span>
<span class="n">tableId</span><span class="o">=</span><span class="n">deletion_table</span><span class="p">)</span> \
<span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Deleted table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">.&#39;</span><span class="p">,</span> <span class="n">deletion_project</span><span class="p">,</span>
<span class="n">deletion_dataset</span><span class="p">,</span> <span class="n">deletion_table</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">ignore_if_missing</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">&#39;Table deletion failed. Table does not exist.&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Table does not exist. Skipping.&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.run_table_upsert"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_table_upsert">[docs]</a> <span class="k">def</span> <span class="nf">run_table_upsert</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_resource</span><span class="p">,</span> <span class="n">project_id</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> creates a new, empty table in the dataset;</span>
<span class="sd"> If the table already exists, update the existing table.</span>
<span class="sd"> Since BigQuery does not natively allow table upserts, this is not an</span>
<span class="sd"> atomic operation.</span>
<span class="sd"> :param dataset_id: the dataset to upsert the table into.</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :param table_resource: a table resource. see</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/v2/tables#resource</span>
<span class="sd"> :type table_resource: dict</span>
<span class="sd"> :param project_id: the project to upsert the table into. If None,</span>
<span class="sd"> project will be self.project_id.</span>
<span class="sd"> :return:</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># check to see if the table exists</span>
<span class="n">table_id</span> <span class="o">=</span> <span class="n">table_resource</span><span class="p">[</span><span class="s1">&#39;tableReference&#39;</span><span class="p">][</span><span class="s1">&#39;tableId&#39;</span><span class="p">]</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="n">tables_list_resp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span><span class="o">.</span><span class="n">list</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span> <span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="k">for</span> <span class="n">table</span> <span class="ow">in</span> <span class="n">tables_list_resp</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;tables&#39;</span><span class="p">,</span> <span class="p">[]):</span>
<span class="k">if</span> <span class="n">table</span><span class="p">[</span><span class="s1">&#39;tableReference&#39;</span><span class="p">][</span><span class="s1">&#39;tableId&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="n">table_id</span><span class="p">:</span>
<span class="c1"># found the table, do update</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1"> exists, updating.&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span><span class="o">.</span><span class="n">update</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">tableId</span><span class="o">=</span><span class="n">table_id</span><span class="p">,</span>
<span class="n">body</span><span class="o">=</span><span class="n">table_resource</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="c1"># If there is a next page, we need to check the next page.</span>
<span class="k">if</span> <span class="s1">&#39;nextPageToken&#39;</span> <span class="ow">in</span> <span class="n">tables_list_resp</span><span class="p">:</span>
<span class="n">tables_list_resp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span>\
<span class="o">.</span><span class="n">list</span><span class="p">(</span><span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">pageToken</span><span class="o">=</span><span class="n">tables_list_resp</span><span class="p">[</span><span class="s1">&#39;nextPageToken&#39;</span><span class="p">])</span>\
<span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="c1"># If there is no next page, then the table doesn&#39;t exist.</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># do insert</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1"> does not exist. creating.&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tables</span><span class="p">()</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">body</span><span class="o">=</span><span class="n">table_resource</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.run_grant_dataset_view_access"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.run_grant_dataset_view_access">[docs]</a> <span class="k">def</span> <span class="nf">run_grant_dataset_view_access</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">source_dataset</span><span class="p">,</span>
<span class="n">view_dataset</span><span class="p">,</span>
<span class="n">view_table</span><span class="p">,</span>
<span class="n">source_project</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">view_project</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Grant authorized view access of a dataset to a view table.</span>
<span class="sd"> If this view has already been granted access to the dataset, do nothing.</span>
<span class="sd"> This method is not atomic. Running it may clobber a simultaneous update.</span>
<span class="sd"> :param source_dataset: the source dataset</span>
<span class="sd"> :type source_dataset: str</span>
<span class="sd"> :param view_dataset: the dataset that the view is in</span>
<span class="sd"> :type view_dataset: str</span>
<span class="sd"> :param view_table: the table of the view</span>
<span class="sd"> :type view_table: str</span>
<span class="sd"> :param source_project: the project of the source dataset. If None,</span>
<span class="sd"> self.project_id will be used.</span>
<span class="sd"> :type source_project: str</span>
<span class="sd"> :param view_project: the project that the view is in. If None,</span>
<span class="sd"> self.project_id will be used.</span>
<span class="sd"> :type view_project: str</span>
<span class="sd"> :return: the datasets resource of the source dataset.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Apply default values to projects</span>
<span class="n">source_project</span> <span class="o">=</span> <span class="n">source_project</span> <span class="k">if</span> <span class="n">source_project</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="n">view_project</span> <span class="o">=</span> <span class="n">view_project</span> <span class="k">if</span> <span class="n">view_project</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="c1"># we don&#39;t want to clobber any existing accesses, so we have to get</span>
<span class="c1"># info on the dataset before we can add view access</span>
<span class="n">source_dataset_resource</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">datasets</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">source_project</span><span class="p">,</span> <span class="n">datasetId</span><span class="o">=</span><span class="n">source_dataset</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="n">access</span> <span class="o">=</span> <span class="n">source_dataset_resource</span><span class="p">[</span>
<span class="s1">&#39;access&#39;</span><span class="p">]</span> <span class="k">if</span> <span class="s1">&#39;access&#39;</span> <span class="ow">in</span> <span class="n">source_dataset_resource</span> <span class="k">else</span> <span class="p">[]</span>
<span class="n">view_access</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;view&#39;</span><span class="p">:</span> <span class="p">{</span>
<span class="s1">&#39;projectId&#39;</span><span class="p">:</span> <span class="n">view_project</span><span class="p">,</span>
<span class="s1">&#39;datasetId&#39;</span><span class="p">:</span> <span class="n">view_dataset</span><span class="p">,</span>
<span class="s1">&#39;tableId&#39;</span><span class="p">:</span> <span class="n">view_table</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="c1"># check to see if the view we want to add already exists.</span>
<span class="k">if</span> <span class="n">view_access</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">access</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;Granting table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1"> authorized view access to </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1"> dataset.&#39;</span><span class="p">,</span>
<span class="n">view_project</span><span class="p">,</span> <span class="n">view_dataset</span><span class="p">,</span> <span class="n">view_table</span><span class="p">,</span> <span class="n">source_project</span><span class="p">,</span>
<span class="n">source_dataset</span><span class="p">)</span>
<span class="n">access</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">view_access</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">datasets</span><span class="p">()</span><span class="o">.</span><span class="n">patch</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">source_project</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">source_dataset</span><span class="p">,</span>
<span class="n">body</span><span class="o">=</span><span class="p">{</span>
<span class="s1">&#39;access&#39;</span><span class="p">:</span> <span class="n">access</span>
<span class="p">})</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># if view is already in access, do nothing.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;Table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1"> already has authorized view access to </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1"> dataset.&#39;</span><span class="p">,</span>
<span class="n">view_project</span><span class="p">,</span> <span class="n">view_dataset</span><span class="p">,</span> <span class="n">view_table</span><span class="p">,</span> <span class="n">source_project</span><span class="p">,</span> <span class="n">source_dataset</span><span class="p">)</span>
<span class="k">return</span> <span class="n">source_dataset_resource</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.create_empty_dataset"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.create_empty_dataset">[docs]</a> <span class="k">def</span> <span class="nf">create_empty_dataset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset_id</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">project_id</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">,</span>
<span class="n">dataset_reference</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Create a new empty dataset:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert</span>
<span class="sd"> :param project_id: The name of the project where we want to create</span>
<span class="sd"> an empty a dataset. Don&#39;t need to provide, if projectId in dataset_reference.</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :param dataset_id: The id of dataset. Don&#39;t need to provide,</span>
<span class="sd"> if datasetId in dataset_reference.</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :param dataset_reference: Dataset reference that could be provided</span>
<span class="sd"> with request body. More info:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</span>
<span class="sd"> :type dataset_reference: dict</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">dataset_reference</span><span class="p">:</span>
<span class="n">_validate_value</span><span class="p">(</span><span class="s1">&#39;dataset_reference&#39;</span><span class="p">,</span> <span class="n">dataset_reference</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">dataset_reference</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="s2">&quot;datasetReference&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">dataset_reference</span><span class="p">:</span>
<span class="n">dataset_reference</span><span class="p">[</span><span class="s2">&quot;datasetReference&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">dataset_reference</span><span class="p">[</span><span class="s2">&quot;datasetReference&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;datasetId&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset_id</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> not provided datasetId. Impossible to create dataset&quot;</span><span class="p">)</span>
<span class="n">dataset_required_params</span> <span class="o">=</span> <span class="p">[(</span><span class="n">dataset_id</span><span class="p">,</span> <span class="s2">&quot;datasetId&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">),</span>
<span class="p">(</span><span class="n">project_id</span><span class="p">,</span> <span class="s2">&quot;projectId&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">)]</span>
<span class="k">for</span> <span class="n">param_tuple</span> <span class="ow">in</span> <span class="n">dataset_required_params</span><span class="p">:</span>
<span class="n">param</span><span class="p">,</span> <span class="n">param_name</span><span class="p">,</span> <span class="n">param_default</span> <span class="o">=</span> <span class="n">param_tuple</span>
<span class="k">if</span> <span class="n">param_name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">dataset_reference</span><span class="p">[</span><span class="s1">&#39;datasetReference&#39;</span><span class="p">]:</span>
<span class="k">if</span> <span class="n">param_default</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">param</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s2">&quot;</span><span class="si">%s</span><span class="s2"> was not specified. Will be used default value </span><span class="si">%s</span><span class="s2">.&quot;</span><span class="p">,</span>
<span class="n">param_name</span><span class="p">,</span> <span class="n">param_default</span>
<span class="p">)</span>
<span class="n">param</span> <span class="o">=</span> <span class="n">param_default</span>
<span class="n">dataset_reference</span><span class="p">[</span><span class="s1">&#39;datasetReference&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span>
<span class="p">{</span><span class="n">param_name</span><span class="p">:</span> <span class="n">param</span><span class="p">})</span>
<span class="k">elif</span> <span class="n">param</span><span class="p">:</span>
<span class="n">_api_resource_configs_duplication_check</span><span class="p">(</span>
<span class="n">param_name</span><span class="p">,</span> <span class="n">param</span><span class="p">,</span>
<span class="n">dataset_reference</span><span class="p">[</span><span class="s1">&#39;datasetReference&#39;</span><span class="p">],</span> <span class="s1">&#39;dataset_reference&#39;</span><span class="p">)</span>
<span class="n">dataset_id</span> <span class="o">=</span> <span class="n">dataset_reference</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;datasetReference&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;datasetId&quot;</span><span class="p">)</span>
<span class="n">dataset_project_id</span> <span class="o">=</span> <span class="n">dataset_reference</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;datasetReference&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
<span class="s2">&quot;projectId&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Creating Dataset: </span><span class="si">%s</span><span class="s1"> in project: </span><span class="si">%s</span><span class="s1"> &#39;</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span>
<span class="n">dataset_project_id</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">datasets</span><span class="p">()</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">dataset_project_id</span><span class="p">,</span>
<span class="n">body</span><span class="o">=</span><span class="n">dataset_reference</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Dataset created successfully: In project </span><span class="si">%s</span><span class="s1"> &#39;</span>
<span class="s1">&#39;Dataset </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">dataset_project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">)</span></div>
<span class="p">)</span>
<div class="viewcode-block" id="BigQueryBaseCursor.delete_dataset"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.delete_dataset">[docs]</a> <span class="k">def</span> <span class="nf">delete_dataset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Delete a dataset of Big query in your project.</span>
<span class="sd"> :param project_id: The name of the project where we have the dataset .</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :param dataset_id: The dataset to be delete.</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :return:</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Deleting from project: </span><span class="si">%s</span><span class="s1"> Dataset:</span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">datasets</span><span class="p">()</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;Dataset deleted successfully: In project </span><span class="si">%s</span><span class="s1"> &#39;</span>
<span class="s1">&#39;Dataset </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">)</span></div>
<span class="p">)</span>
<div class="viewcode-block" id="BigQueryBaseCursor.get_dataset"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.get_dataset">[docs]</a> <span class="k">def</span> <span class="nf">get_dataset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">project_id</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Method returns dataset_resource if dataset exist</span>
<span class="sd"> and raised 404 error if dataset does not exist</span>
<span class="sd"> :param dataset_id: The BigQuery Dataset ID</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :param project_id: The GCP Project ID</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :return: dataset_resource</span>
<span class="sd"> .. seealso::</span>
<span class="sd"> For more information, see Dataset Resource content:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">dataset_id</span> <span class="ow">or</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dataset_id</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;dataset_id argument must be provided and has &quot;</span>
<span class="s2">&quot;a type &#39;str&#39;. You provided: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">dataset_id</span><span class="p">))</span>
<span class="n">dataset_project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">dataset_resource</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">datasets</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
<span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span> <span class="n">projectId</span><span class="o">=</span><span class="n">dataset_project_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Dataset Resource: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">dataset_resource</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">))</span>
<span class="k">return</span> <span class="n">dataset_resource</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.get_datasets_list"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.get_datasets_list">[docs]</a> <span class="k">def</span> <span class="nf">get_datasets_list</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">project_id</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Method returns full list of BigQuery datasets in the current project</span>
<span class="sd"> .. seealso::</span>
<span class="sd"> For more information, see:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list</span>
<span class="sd"> :param project_id: Google Cloud Project for which you</span>
<span class="sd"> try to get all datasets</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :return: datasets_list</span>
<span class="sd"> Example of returned datasets_list: ::</span>
<span class="sd"> {</span>
<span class="sd"> &quot;kind&quot;:&quot;bigquery#dataset&quot;,</span>
<span class="sd"> &quot;location&quot;:&quot;US&quot;,</span>
<span class="sd"> &quot;id&quot;:&quot;your-project:dataset_2_test&quot;,</span>
<span class="sd"> &quot;datasetReference&quot;:{</span>
<span class="sd"> &quot;projectId&quot;:&quot;your-project&quot;,</span>
<span class="sd"> &quot;datasetId&quot;:&quot;dataset_2_test&quot;</span>
<span class="sd"> }</span>
<span class="sd"> },</span>
<span class="sd"> {</span>
<span class="sd"> &quot;kind&quot;:&quot;bigquery#dataset&quot;,</span>
<span class="sd"> &quot;location&quot;:&quot;US&quot;,</span>
<span class="sd"> &quot;id&quot;:&quot;your-project:dataset_1_test&quot;,</span>
<span class="sd"> &quot;datasetReference&quot;:{</span>
<span class="sd"> &quot;projectId&quot;:&quot;your-project&quot;,</span>
<span class="sd"> &quot;datasetId&quot;:&quot;dataset_1_test&quot;</span>
<span class="sd"> }</span>
<span class="sd"> }</span>
<span class="sd"> ]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dataset_project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">datasets_list</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">datasets</span><span class="p">()</span><span class="o">.</span><span class="n">list</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">dataset_project_id</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)[</span><span class="s1">&#39;datasets&#39;</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Datasets List: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">datasets_list</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">))</span>
<span class="k">return</span> <span class="n">datasets_list</span></div>
<div class="viewcode-block" id="BigQueryBaseCursor.insert_all"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryBaseCursor.insert_all">[docs]</a> <span class="k">def</span> <span class="nf">insert_all</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">,</span>
<span class="n">rows</span><span class="p">,</span> <span class="n">ignore_unknown_values</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">skip_invalid_rows</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">fail_on_error</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Method to stream data into BigQuery one record at a time without needing</span>
<span class="sd"> to run a load job</span>
<span class="sd"> .. seealso::</span>
<span class="sd"> For more information, see:</span>
<span class="sd"> https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll</span>
<span class="sd"> :param project_id: The name of the project where we have the table</span>
<span class="sd"> :type project_id: str</span>
<span class="sd"> :param dataset_id: The name of the dataset where we have the table</span>
<span class="sd"> :type dataset_id: str</span>
<span class="sd"> :param table_id: The name of the table</span>
<span class="sd"> :type table_id: str</span>
<span class="sd"> :param rows: the rows to insert</span>
<span class="sd"> :type rows: list</span>
<span class="sd"> **Example or rows**:</span>
<span class="sd"> rows=[{&quot;json&quot;: {&quot;a_key&quot;: &quot;a_value_0&quot;}}, {&quot;json&quot;: {&quot;a_key&quot;: &quot;a_value_1&quot;}}]</span>
<span class="sd"> :param ignore_unknown_values: [Optional] Accept rows that contain values</span>
<span class="sd"> that do not match the schema. The unknown values are ignored.</span>
<span class="sd"> The default value is false, which treats unknown values as errors.</span>
<span class="sd"> :type ignore_unknown_values: bool</span>
<span class="sd"> :param skip_invalid_rows: [Optional] Insert all valid rows of a request,</span>
<span class="sd"> even if invalid rows exist. The default value is false, which causes</span>
<span class="sd"> the entire request to fail if any invalid rows exist.</span>
<span class="sd"> :type skip_invalid_rows: bool</span>
<span class="sd"> :param fail_on_error: [Optional] Force the task to fail if any errors occur.</span>
<span class="sd"> The default value is false, which indicates the task should not fail</span>
<span class="sd"> even if any insertion errors occur.</span>
<span class="sd"> :type fail_on_error: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dataset_project_id</span> <span class="o">=</span> <span class="n">project_id</span> <span class="k">if</span> <span class="n">project_id</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">project_id</span>
<span class="n">body</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;rows&quot;</span><span class="p">:</span> <span class="n">rows</span><span class="p">,</span>
<span class="s2">&quot;ignoreUnknownValues&quot;</span><span class="p">:</span> <span class="n">ignore_unknown_values</span><span class="p">,</span>
<span class="s2">&quot;kind&quot;</span><span class="p">:</span> <span class="s2">&quot;bigquery#tableDataInsertAllRequest&quot;</span><span class="p">,</span>
<span class="s2">&quot;skipInvalidRows&quot;</span><span class="p">:</span> <span class="n">skip_invalid_rows</span><span class="p">,</span>
<span class="p">}</span>
<span class="k">try</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;Inserting </span><span class="si">%s</span><span class="s1"> row(s) into Table </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="nb">len</span><span class="p">(</span><span class="n">rows</span><span class="p">),</span> <span class="n">dataset_project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span>
<span class="p">)</span>
<span class="n">resp</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">tabledata</span><span class="p">()</span><span class="o">.</span><span class="n">insertAll</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="n">dataset_project_id</span><span class="p">,</span> <span class="n">datasetId</span><span class="o">=</span><span class="n">dataset_id</span><span class="p">,</span>
<span class="n">tableId</span><span class="o">=</span><span class="n">table_id</span><span class="p">,</span> <span class="n">body</span><span class="o">=</span><span class="n">body</span>
<span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;insertErrors&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">resp</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;All row(s) inserted successfully: </span><span class="si">%s</span><span class="s1">:</span><span class="si">%s</span><span class="s1">.</span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">dataset_project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">error_msg</span> <span class="o">=</span> <span class="s1">&#39;</span><span class="si">{}</span><span class="s1"> insert error(s) occured: </span><span class="si">{}</span><span class="s1">:</span><span class="si">{}</span><span class="s1">.</span><span class="si">{}</span><span class="s1">. Details: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">resp</span><span class="p">[</span><span class="s1">&#39;insertErrors&#39;</span><span class="p">]),</span>
<span class="n">dataset_project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span><span class="p">,</span> <span class="n">resp</span><span class="p">[</span><span class="s1">&#39;insertErrors&#39;</span><span class="p">])</span>
<span class="k">if</span> <span class="n">fail_on_error</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="k">except</span> <span class="n">HttpError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span>
<span class="s1">&#39;BigQuery job failed. Error was: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="o">.</span><span class="n">content</span><span class="p">)</span></div></div>
<span class="p">)</span>
<div class="viewcode-block" id="BigQueryCursor"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor">[docs]</a><span class="k">class</span> <span class="nc">BigQueryCursor</span><span class="p">(</span><span class="n">BigQueryBaseCursor</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A very basic BigQuery PEP 249 cursor implementation. The PyHive PEP 249</span>
<span class="sd"> implementation was used as a reference:</span>
<span class="sd"> https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py</span>
<span class="sd"> https://github.com/dropbox/PyHive/blob/master/pyhive/common.py</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">service</span><span class="p">,</span> <span class="n">project_id</span><span class="p">,</span> <span class="n">use_legacy_sql</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">location</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">num_retries</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">BigQueryCursor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
<span class="n">service</span><span class="o">=</span><span class="n">service</span><span class="p">,</span>
<span class="n">project_id</span><span class="o">=</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">use_legacy_sql</span><span class="o">=</span><span class="n">use_legacy_sql</span><span class="p">,</span>
<span class="n">location</span><span class="o">=</span><span class="n">location</span><span class="p">,</span>
<span class="n">num_retries</span><span class="o">=</span><span class="n">num_retries</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">buffersize</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">page_token</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">job_id</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">buffer</span> <span class="o">=</span> <span class="p">[]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">all_pages_loaded</span> <span class="o">=</span> <span class="kc">False</span>
<span class="nd">@property</span>
<div class="viewcode-block" id="BigQueryCursor.description"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.description">[docs]</a> <span class="k">def</span> <span class="nf">description</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; The schema description method is not currently implemented. &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="BigQueryCursor.close"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.close">[docs]</a> <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; By default, do nothing &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<span class="nd">@property</span>
<div class="viewcode-block" id="BigQueryCursor.rowcount"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.rowcount">[docs]</a> <span class="k">def</span> <span class="nf">rowcount</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; By default, return -1 to indicate that this is not supported. &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="o">-</span><span class="mi">1</span></div>
<div class="viewcode-block" id="BigQueryCursor.execute"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.execute">[docs]</a> <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">operation</span><span class="p">,</span> <span class="n">parameters</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Executes a BigQuery query, and returns the job ID.</span>
<span class="sd"> :param operation: The query to execute.</span>
<span class="sd"> :type operation: str</span>
<span class="sd"> :param parameters: Parameters to substitute into the query.</span>
<span class="sd"> :type parameters: dict</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sql</span> <span class="o">=</span> <span class="n">_bind_parameters</span><span class="p">(</span><span class="n">operation</span><span class="p">,</span>
<span class="n">parameters</span><span class="p">)</span> <span class="k">if</span> <span class="n">parameters</span> <span class="k">else</span> <span class="n">operation</span>
<span class="bp">self</span><span class="o">.</span><span class="n">job_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">run_query</span><span class="p">(</span><span class="n">sql</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryCursor.executemany"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.executemany">[docs]</a> <span class="k">def</span> <span class="nf">executemany</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">operation</span><span class="p">,</span> <span class="n">seq_of_parameters</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Execute a BigQuery query multiple times with different parameters.</span>
<span class="sd"> :param operation: The query to execute.</span>
<span class="sd"> :type operation: str</span>
<span class="sd"> :param seq_of_parameters: List of dictionary parameters to substitute into the</span>
<span class="sd"> query.</span>
<span class="sd"> :type seq_of_parameters: list</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">for</span> <span class="n">parameters</span> <span class="ow">in</span> <span class="n">seq_of_parameters</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">operation</span><span class="p">,</span> <span class="n">parameters</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryCursor.fetchone"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.fetchone">[docs]</a> <span class="k">def</span> <span class="nf">fetchone</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Fetch the next row of a query result set. &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">next</span><span class="p">()</span></div>
<div class="viewcode-block" id="BigQueryCursor.next"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.next">[docs]</a> <span class="k">def</span> <span class="nf">next</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper method for fetchone, which returns the next row from a buffer.</span>
<span class="sd"> If the buffer is empty, attempts to paginate through the result set for</span>
<span class="sd"> the next page, and load it into the buffer.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">job_id</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">buffer</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">all_pages_loaded</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="n">query_results</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">service</span><span class="o">.</span><span class="n">jobs</span><span class="p">()</span><span class="o">.</span><span class="n">getQueryResults</span><span class="p">(</span>
<span class="n">projectId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">project_id</span><span class="p">,</span>
<span class="n">jobId</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">job_id</span><span class="p">,</span>
<span class="n">pageToken</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">page_token</span><span class="p">)</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">num_retries</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">num_retries</span><span class="p">))</span>
<span class="k">if</span> <span class="s1">&#39;rows&#39;</span> <span class="ow">in</span> <span class="n">query_results</span> <span class="ow">and</span> <span class="n">query_results</span><span class="p">[</span><span class="s1">&#39;rows&#39;</span><span class="p">]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">page_token</span> <span class="o">=</span> <span class="n">query_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;pageToken&#39;</span><span class="p">)</span>
<span class="n">fields</span> <span class="o">=</span> <span class="n">query_results</span><span class="p">[</span><span class="s1">&#39;schema&#39;</span><span class="p">][</span><span class="s1">&#39;fields&#39;</span><span class="p">]</span>
<span class="n">col_types</span> <span class="o">=</span> <span class="p">[</span><span class="n">field</span><span class="p">[</span><span class="s1">&#39;type&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">fields</span><span class="p">]</span>
<span class="n">rows</span> <span class="o">=</span> <span class="n">query_results</span><span class="p">[</span><span class="s1">&#39;rows&#39;</span><span class="p">]</span>
<span class="k">for</span> <span class="n">dict_row</span> <span class="ow">in</span> <span class="n">rows</span><span class="p">:</span>
<span class="n">typed_row</span> <span class="o">=</span> <span class="p">([</span>
<span class="n">_bq_cast</span><span class="p">(</span><span class="n">vs</span><span class="p">[</span><span class="s1">&#39;v&#39;</span><span class="p">],</span> <span class="n">col_types</span><span class="p">[</span><span class="n">idx</span><span class="p">])</span>
<span class="k">for</span> <span class="n">idx</span><span class="p">,</span> <span class="n">vs</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">dict_row</span><span class="p">[</span><span class="s1">&#39;f&#39;</span><span class="p">])</span>
<span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">buffer</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">typed_row</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">page_token</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">all_pages_loaded</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Reset all state since we&#39;ve exhausted the results.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">page_token</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">job_id</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">page_token</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">buffer</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryCursor.fetchmany"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.fetchmany">[docs]</a> <span class="k">def</span> <span class="nf">fetchmany</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fetch the next set of rows of a query result, returning a sequence of sequences</span>
<span class="sd"> (e.g. a list of tuples). An empty sequence is returned when no more rows are</span>
<span class="sd"> available. The number of rows to fetch per call is specified by the parameter.</span>
<span class="sd"> If it is not given, the cursor&#39;s arraysize determines the number of rows to be</span>
<span class="sd"> fetched. The method should try to fetch as many rows as indicated by the size</span>
<span class="sd"> parameter. If this is not possible due to the specified number of rows not being</span>
<span class="sd"> available, fewer rows may be returned. An :py:class:`~pyhive.exc.Error`</span>
<span class="sd"> (or subclass) exception is raised if the previous call to</span>
<span class="sd"> :py:meth:`execute` did not produce any result set or no call was issued yet.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">arraysize</span>
<span class="n">result</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">size</span><span class="p">):</span>
<span class="n">one</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span>
<span class="k">if</span> <span class="n">one</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">break</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">one</span><span class="p">)</span>
<span class="k">return</span> <span class="n">result</span></div>
<div class="viewcode-block" id="BigQueryCursor.fetchall"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.fetchall">[docs]</a> <span class="k">def</span> <span class="nf">fetchall</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fetch all (remaining) rows of a query result, returning them as a sequence of</span>
<span class="sd"> sequences (e.g. a list of tuples).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">result</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">one</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fetchone</span><span class="p">()</span>
<span class="k">if</span> <span class="n">one</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">break</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">one</span><span class="p">)</span>
<span class="k">return</span> <span class="n">result</span></div>
<div class="viewcode-block" id="BigQueryCursor.get_arraysize"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.get_arraysize">[docs]</a> <span class="k">def</span> <span class="nf">get_arraysize</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Specifies the number of rows to fetch at a time with .fetchmany() &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_buffersize</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">buffersize</span> <span class="k">else</span> <span class="mi">1</span></div>
<div class="viewcode-block" id="BigQueryCursor.set_arraysize"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.set_arraysize">[docs]</a> <span class="k">def</span> <span class="nf">set_arraysize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">arraysize</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Specifies the number of rows to fetch at a time with .fetchmany() &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">buffersize</span> <span class="o">=</span> <span class="n">arraysize</span></div>
<div class="viewcode-block" id="BigQueryCursor.arraysize"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.arraysize">[docs]</a> <span class="n">arraysize</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">get_arraysize</span><span class="p">,</span> <span class="n">set_arraysize</span><span class="p">)</span></div>
<div class="viewcode-block" id="BigQueryCursor.setinputsizes"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.setinputsizes">[docs]</a> <span class="k">def</span> <span class="nf">setinputsizes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sizes</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Does nothing by default &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="BigQueryCursor.setoutputsize"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook.BigQueryCursor.setoutputsize">[docs]</a> <span class="k">def</span> <span class="nf">setoutputsize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">column</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Does nothing by default &quot;&quot;&quot;</span>
<span class="k">pass</span></div></div>
<div class="viewcode-block" id="_bind_parameters"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook._bind_parameters">[docs]</a><span class="k">def</span> <span class="nf">_bind_parameters</span><span class="p">(</span><span class="n">operation</span><span class="p">,</span> <span class="n">parameters</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Helper method that binds parameters to a SQL query. &quot;&quot;&quot;</span>
<span class="c1"># inspired by MySQL Python Connector (conversion.py)</span>
<span class="n">string_parameters</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="ow">in</span> <span class="n">iteritems</span><span class="p">(</span><span class="n">parameters</span><span class="p">):</span>
<span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">string_parameters</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;NULL&#39;</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">basestring</span><span class="p">):</span>
<span class="n">string_parameters</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;&#39;&quot;</span> <span class="o">+</span> <span class="n">_escape</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">+</span> <span class="s2">&quot;&#39;&quot;</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">string_parameters</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">return</span> <span class="n">operation</span> <span class="o">%</span> <span class="n">string_parameters</span></div>
<div class="viewcode-block" id="_escape"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook._escape">[docs]</a><span class="k">def</span> <span class="nf">_escape</span><span class="p">(</span><span class="n">s</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; Helper method that escapes parameters to a SQL query. &quot;&quot;&quot;</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">s</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">e</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\\</span><span class="s1">&#39;</span><span class="p">,</span> <span class="s1">&#39;</span><span class="se">\\\\</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">e</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\n</span><span class="s1">&#39;</span><span class="p">,</span> <span class="s1">&#39;</span><span class="se">\\</span><span class="s1">n&#39;</span><span class="p">)</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">e</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\r</span><span class="s1">&#39;</span><span class="p">,</span> <span class="s1">&#39;</span><span class="se">\\</span><span class="s1">r&#39;</span><span class="p">)</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">e</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;</span><span class="se">\\</span><span class="s2">&#39;&quot;</span><span class="p">)</span>
<span class="n">e</span> <span class="o">=</span> <span class="n">e</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;&quot;&#39;</span><span class="p">,</span> <span class="s1">&#39;</span><span class="se">\\</span><span class="s1">&quot;&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">e</span></div>
<div class="viewcode-block" id="_bq_cast"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook._bq_cast">[docs]</a><span class="k">def</span> <span class="nf">_bq_cast</span><span class="p">(</span><span class="n">string_field</span><span class="p">,</span> <span class="n">bq_type</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper method that casts a BigQuery row to the appropriate data types.</span>
<span class="sd"> This is useful because BigQuery returns all fields as strings.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">string_field</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">elif</span> <span class="n">bq_type</span> <span class="o">==</span> <span class="s1">&#39;INTEGER&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">string_field</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">bq_type</span> <span class="o">==</span> <span class="s1">&#39;FLOAT&#39;</span> <span class="ow">or</span> <span class="n">bq_type</span> <span class="o">==</span> <span class="s1">&#39;TIMESTAMP&#39;</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">float</span><span class="p">(</span><span class="n">string_field</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">bq_type</span> <span class="o">==</span> <span class="s1">&#39;BOOLEAN&#39;</span><span class="p">:</span>
<span class="k">if</span> <span class="n">string_field</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;true&#39;</span><span class="p">,</span> <span class="s1">&#39;false&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> must have value &#39;true&#39; or &#39;false&#39;&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">string_field</span><span class="p">))</span>
<span class="k">return</span> <span class="n">string_field</span> <span class="o">==</span> <span class="s1">&#39;true&#39;</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">string_field</span></div>
<div class="viewcode-block" id="_split_tablename"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook._split_tablename">[docs]</a><span class="k">def</span> <span class="nf">_split_tablename</span><span class="p">(</span><span class="n">table_input</span><span class="p">,</span> <span class="n">default_project_id</span><span class="p">,</span> <span class="n">var_name</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="s1">&#39;.&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">table_input</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s1">&#39;Expected target table name in the format of &#39;</span>
<span class="s1">&#39;&lt;dataset&gt;.&lt;table&gt;. Got: </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">table_input</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">default_project_id</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;INTERNAL: No default project is specified&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">var_print</span><span class="p">(</span><span class="n">var_name</span><span class="p">):</span>
<span class="k">if</span> <span class="n">var_name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;&quot;</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;Format exception for </span><span class="si">{var}</span><span class="s2">: &quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">var</span><span class="o">=</span><span class="n">var_name</span><span class="p">)</span>
<span class="k">if</span> <span class="n">table_input</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;.&#39;</span><span class="p">)</span> <span class="o">+</span> <span class="n">table_input</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;:&#39;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">3</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">((</span><span class="s1">&#39;</span><span class="si">{var}</span><span class="s1">Use either : or . to specify project &#39;</span>
<span class="s1">&#39;got </span><span class="si">{input}</span><span class="s1">&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">var</span><span class="o">=</span><span class="n">var_print</span><span class="p">(</span><span class="n">var_name</span><span class="p">),</span> <span class="nb">input</span><span class="o">=</span><span class="n">table_input</span><span class="p">))</span>
<span class="n">cmpt</span> <span class="o">=</span> <span class="n">table_input</span><span class="o">.</span><span class="n">rsplit</span><span class="p">(</span><span class="s1">&#39;:&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">rest</span> <span class="o">=</span> <span class="n">table_input</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cmpt</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">rest</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">cmpt</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">and</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;:&#39;</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">if</span> <span class="n">cmpt</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;.&#39;</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">rest</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">((</span><span class="s1">&#39;</span><span class="si">{var}</span><span class="s1">Expect format of (&lt;project:)&lt;dataset&gt;.&lt;table&gt;, &#39;</span>
<span class="s1">&#39;got </span><span class="si">{input}</span><span class="s1">&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">var</span><span class="o">=</span><span class="n">var_print</span><span class="p">(</span><span class="n">var_name</span><span class="p">),</span> <span class="nb">input</span><span class="o">=</span><span class="n">table_input</span><span class="p">))</span>
<span class="n">cmpt</span> <span class="o">=</span> <span class="n">rest</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;.&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cmpt</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
<span class="k">if</span> <span class="n">project_id</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;</span><span class="si">{var}</span><span class="s2">Use either : or . to specify project&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">var</span><span class="o">=</span><span class="n">var_print</span><span class="p">(</span><span class="n">var_name</span><span class="p">)))</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">dataset_id</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">table_id</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">cmpt</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
<span class="n">dataset_id</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">table_id</span> <span class="o">=</span> <span class="n">cmpt</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span>
<span class="p">(</span><span class="s1">&#39;</span><span class="si">{var}</span><span class="s1">Expect format of (&lt;project.|&lt;project:)&lt;dataset&gt;.&lt;table&gt;, &#39;</span>
<span class="s1">&#39;got </span><span class="si">{input}</span><span class="s1">&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">var</span><span class="o">=</span><span class="n">var_print</span><span class="p">(</span><span class="n">var_name</span><span class="p">),</span> <span class="nb">input</span><span class="o">=</span><span class="n">table_input</span><span class="p">))</span>
<span class="k">if</span> <span class="n">project_id</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">var_name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">log</span> <span class="o">=</span> <span class="n">LoggingMixin</span><span class="p">()</span><span class="o">.</span><span class="n">log</span>
<span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="s1">&#39;Project not included in </span><span class="si">%s</span><span class="s1">: </span><span class="si">%s</span><span class="s1">; using project &quot;</span><span class="si">%s</span><span class="s1">&quot;&#39;</span><span class="p">,</span>
<span class="n">var_name</span><span class="p">,</span> <span class="n">table_input</span><span class="p">,</span> <span class="n">default_project_id</span>
<span class="p">)</span>
<span class="n">project_id</span> <span class="o">=</span> <span class="n">default_project_id</span>
<span class="k">return</span> <span class="n">project_id</span><span class="p">,</span> <span class="n">dataset_id</span><span class="p">,</span> <span class="n">table_id</span></div>
<div class="viewcode-block" id="_cleanse_time_partitioning"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook._cleanse_time_partitioning">[docs]</a><span class="k">def</span> <span class="nf">_cleanse_time_partitioning</span><span class="p">(</span><span class="n">destination_dataset_table</span><span class="p">,</span> <span class="n">time_partitioning_in</span><span class="p">):</span>
<span class="c1"># if it is a partitioned table ($ is in the table name) add partition load option</span>
<span class="k">if</span> <span class="n">time_partitioning_in</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">time_partitioning_in</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">time_partitioning_out</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="n">destination_dataset_table</span> <span class="ow">and</span> <span class="s1">&#39;$&#39;</span> <span class="ow">in</span> <span class="n">destination_dataset_table</span><span class="p">:</span>
<span class="n">time_partitioning_out</span><span class="p">[</span><span class="s1">&#39;type&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;DAY&#39;</span>
<span class="n">time_partitioning_out</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">time_partitioning_in</span><span class="p">)</span>
<span class="k">return</span> <span class="n">time_partitioning_out</span></div>
<div class="viewcode-block" id="_validate_value"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook._validate_value">[docs]</a><span class="k">def</span> <span class="nf">_validate_value</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">expected_type</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot; function to check expected type and raise</span>
<span class="sd"> error if type is not correct &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">expected_type</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> argument must have a type </span><span class="si">{}</span><span class="s2"> not </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">key</span><span class="p">,</span> <span class="n">expected_type</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="n">value</span><span class="p">)))</span></div>
<div class="viewcode-block" id="_api_resource_configs_duplication_check"><a class="viewcode-back" href="../../../../_api/airflow/contrib/hooks/bigquery_hook/index.html#airflow.contrib.hooks.bigquery_hook._api_resource_configs_duplication_check">[docs]</a><span class="k">def</span> <span class="nf">_api_resource_configs_duplication_check</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">config_dict</span><span class="p">,</span>
<span class="n">config_dict_name</span><span class="o">=</span><span class="s1">&#39;api_resource_configs&#39;</span><span class="p">):</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">config_dict</span> <span class="ow">and</span> <span class="n">value</span> <span class="o">!=</span> <span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Values of </span><span class="si">{param_name}</span><span class="s2"> param are duplicated. &quot;</span>
<span class="s2">&quot;</span><span class="si">{dict_name}</span><span class="s2"> contained </span><span class="si">{param_name}</span><span class="s2"> param &quot;</span>
<span class="s2">&quot;in `query` config and </span><span class="si">{param_name}</span><span class="s2"> was also provided &quot;</span>
<span class="s2">&quot;with arg to run_query() method. Please remove duplicates.&quot;</span>
<span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">param_name</span><span class="o">=</span><span class="n">key</span><span class="p">,</span> <span class="n">dict_name</span><span class="o">=</span><span class="n">config_dict_name</span><span class="p">))</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>