blob: e95d4f85e5b264123f96d873da99ab3ecbe6f916 [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>apache_beam.io.gcp.bigquery_file_loads module &mdash; Apache Beam documentation</title>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.io.gcp.bigquery_io_metadata module" href="apache_beam.io.gcp.bigquery_io_metadata.html" />
<link rel="prev" title="apache_beam.io.gcp.bigquery_avro_tools module" href="apache_beam.io.gcp.bigquery_avro_tools.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Apache Beam
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.io.html#subpackages">Subpackages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.aws.html">apache_beam.io.aws package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.azure.html">apache_beam.io.azure package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.external.html">apache_beam.io.external package</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.io.flink.html">apache_beam.io.flink package</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="apache_beam.io.gcp.html#subpackages">Subpackages</a></li>
<li class="toctree-l4 current"><a class="reference internal" href="apache_beam.io.gcp.html#submodules">Submodules</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="apache_beam.io.html#submodules">Submodules</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li><a href="apache_beam.io.html">apache_beam.io package</a> &raquo;</li>
<li><a href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a> &raquo;</li>
<li>apache_beam.io.gcp.bigquery_file_loads module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.io.gcp.bigquery_file_loads.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="module-apache_beam.io.gcp.bigquery_file_loads">
<span id="apache-beam-io-gcp-bigquery-file-loads-module"></span><h1>apache_beam.io.gcp.bigquery_file_loads module<a class="headerlink" href="#module-apache_beam.io.gcp.bigquery_file_loads" title="Permalink to this headline"></a></h1>
<p>Functionality to perform file loads into BigQuery for Batch and Streaming
pipelines.</p>
<p>This source is able to work around BigQuery load quotas and limitations. When
destinations are dynamic, or when data for a single job is too large, the data
will be split into multiple jobs.</p>
<p>NOTHING IN THIS FILE HAS BACKWARDS COMPATIBILITY GUARANTEES.</p>
<dl class="function">
<dt id="apache_beam.io.gcp.bigquery_file_loads.file_prefix_generator">
<code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">file_prefix_generator</code><span class="sig-paren">(</span><em>with_validation=True</em>, <em>pipeline_gcs_location=None</em>, <em>temp_location=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#file_prefix_generator"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.file_prefix_generator" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">WriteRecordsToFile</code><span class="sig-paren">(</span><em>schema</em>, <em>max_files_per_bundle=20</em>, <em>max_file_size=4398046511104</em>, <em>file_format=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WriteRecordsToFile"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<p>Write input records to files before triggering a load job.</p>
<p>This transform keeps up to <code class="docutils literal notranslate"><span class="pre">max_files_per_bundle</span></code> files open to write to. It
receives (destination, record) tuples, and it writes the records to different
files for each destination.</p>
<p>If there are more than <code class="docutils literal notranslate"><span class="pre">max_files_per_bundle</span></code> destinations that we need to
write to, then those records are grouped by their destination, and later
written to files by <code class="docutils literal notranslate"><span class="pre">WriteGroupedRecordsToFile</span></code>.</p>
<p>It outputs two PCollections.</p>
<p>Initialize a <a class="reference internal" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile" title="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile"><code class="xref py py-class docutils literal notranslate"><span class="pre">WriteRecordsToFile</span></code></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>max_files_per_bundle</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – The maximum number of files that can be kept
open during execution of this step in a worker. This is to avoid over-
whelming the worker memory.</li>
<li><strong>max_file_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – The maximum size in bytes for a file to be used in
an export job.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.UNWRITTEN_RECORD_TAG">
<code class="descname">UNWRITTEN_RECORD_TAG</code><em class="property"> = 'UnwrittenRecords'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.UNWRITTEN_RECORD_TAG" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.WRITTEN_FILE_TAG">
<code class="descname">WRITTEN_FILE_TAG</code><em class="property"> = 'WrittenFiles'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.WRITTEN_FILE_TAG" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WriteRecordsToFile.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.start_bundle">
<code class="descname">start_bundle</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WriteRecordsToFile.start_bundle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.start_bundle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>element</em>, <em>file_prefix</em>, <em>*schema_side_inputs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WriteRecordsToFile.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.process" title="Permalink to this definition"></a></dt>
<dd><p>Take a tuple with (destination, row) and write to file or spill out.</p>
<p>Destination may be a <code class="docutils literal notranslate"><span class="pre">TableReference</span></code> or a string, and row is a
Python dictionary for a row to be inserted to BigQuery.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.finish_bundle">
<code class="descname">finish_bundle</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WriteRecordsToFile.finish_bundle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteRecordsToFile.finish_bundle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteGroupedRecordsToFile">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">WriteGroupedRecordsToFile</code><span class="sig-paren">(</span><em>schema</em>, <em>max_file_size=4398046511104</em>, <em>file_format=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WriteGroupedRecordsToFile"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteGroupedRecordsToFile" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<p>Receives collection of dest-iterable(records), writes it to files.</p>
<p>This is different from <code class="docutils literal notranslate"><span class="pre">WriteRecordsToFile</span></code> because it receives records
grouped by destination. This means that it’s not necessary to keep multiple
file descriptors open, because we know for sure when records for a single
destination have been written out.</p>
<p>Experimental; no backwards compatibility guarantees.</p>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WriteGroupedRecordsToFile.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>element</em>, <em>file_prefix</em>, <em>*schema_side_inputs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WriteGroupedRecordsToFile.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WriteGroupedRecordsToFile.process" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">UpdateDestinationSchema</code><span class="sig-paren">(</span><em>write_disposition=None</em>, <em>test_client=None</em>, <em>additional_bq_parameters=None</em>, <em>step_name=None</em>, <em>source_format=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#UpdateDestinationSchema"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<p>Update destination schema based on data that is about to be copied into it.</p>
<p>Unlike load and query jobs, BigQuery copy jobs do not support schema field
addition or relaxation on the destination table. This DoFn fills that gap by
updating the destination table schemas to be compatible with the data coming
from the source table so that schema field modification options are respected
regardless of whether data is loaded directly to the destination table or
loaded into temporary tables before being copied into the destination.</p>
<p>This tranform takes as input a (destination, job_reference) pair where the
job_reference refers to a completed load job into a temporary table.</p>
<p>This transform emits (destination, job_reference) pairs where the
job_reference refers to a submitted load job for performing the schema
modification. Note that the input and output job references are not the same.</p>
<p>Experimental; no backwards compatibility guarantees.</p>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema.setup">
<code class="descname">setup</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#UpdateDestinationSchema.setup"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema.setup" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#UpdateDestinationSchema.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>element</em>, <em>schema_mod_job_name_prefix</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#UpdateDestinationSchema.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.UpdateDestinationSchema.process" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">TriggerCopyJobs</code><span class="sig-paren">(</span><em>create_disposition=None</em>, <em>write_disposition=None</em>, <em>test_client=None</em>, <em>step_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerCopyJobs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<p>Launches jobs to copy from temporary tables into the main target table.</p>
<p>When a job needs to write to multiple destination tables, or when a single
destination table needs to have multiple load jobs to write to it, files are
loaded into temporary tables, and those tables are later copied to the
destination tables.</p>
<p>This transform emits (destination, job_reference) pairs.</p>
<dl class="docutils">
<dt>TODO(BEAM-7822): In file loads method of writing to BigQuery,</dt>
<dd>copying from temp_tables to destination_table is not atomic.
See: <a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-7822">https://issues.apache.org/jira/browse/BEAM-7822</a></dd>
</dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerCopyJobs.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs.start_bundle">
<code class="descname">start_bundle</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerCopyJobs.start_bundle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs.start_bundle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>element</em>, <em>job_name_prefix=None</em>, <em>unused_schema_mod_jobs=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerCopyJobs.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs.process" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">TriggerLoadJobs</code><span class="sig-paren">(</span><em>schema=None</em>, <em>create_disposition=None</em>, <em>write_disposition=None</em>, <em>test_client=None</em>, <em>temporary_tables=False</em>, <em>additional_bq_parameters=None</em>, <em>source_format=None</em>, <em>step_name=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerLoadJobs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<p>Triggers the import jobs to BQ.</p>
<p>Experimental; no backwards compatibility guarantees.</p>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.TEMP_TABLES">
<code class="descname">TEMP_TABLES</code><em class="property"> = 'TemporaryTables'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.TEMP_TABLES" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerLoadJobs.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.start_bundle">
<code class="descname">start_bundle</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerLoadJobs.start_bundle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.start_bundle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>element</em>, <em>load_job_name_prefix</em>, <em>*schema_side_inputs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#TriggerLoadJobs.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.TriggerLoadJobs.process" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.PartitionFiles">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">PartitionFiles</code><span class="sig-paren">(</span><em>max_partition_size</em>, <em>max_files_per_partition</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#PartitionFiles"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.PartitionFiles" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.MULTIPLE_PARTITIONS_TAG">
<code class="descname">MULTIPLE_PARTITIONS_TAG</code><em class="property"> = 'MULTIPLE_PARTITIONS'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.MULTIPLE_PARTITIONS_TAG" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.SINGLE_PARTITION_TAG">
<code class="descname">SINGLE_PARTITION_TAG</code><em class="property"> = 'SINGLE_PARTITION'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.SINGLE_PARTITION_TAG" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.Partition">
<em class="property">class </em><code class="descname">Partition</code><span class="sig-paren">(</span><em>max_size</em>, <em>max_files</em>, <em>files=None</em>, <em>size=0</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#PartitionFiles.Partition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.Partition" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.Partition.can_accept">
<code class="descname">can_accept</code><span class="sig-paren">(</span><em>file_size</em>, <em>no_of_files=1</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#PartitionFiles.Partition.can_accept"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.Partition.can_accept" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.Partition.add">
<code class="descname">add</code><span class="sig-paren">(</span><em>file_path</em>, <em>file_size</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#PartitionFiles.Partition.add"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.Partition.add" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>element</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#PartitionFiles.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.PartitionFiles.process" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WaitForBQJobs">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">WaitForBQJobs</code><span class="sig-paren">(</span><em>test_client=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WaitForBQJobs"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WaitForBQJobs" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<p>Takes in a series of BQ job names as side input, and waits for all of them.</p>
<p>If any job fails, it will fail. If all jobs succeed, it will succeed.</p>
<p>Experimental; no backwards compatibility guarantees.</p>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WaitForBQJobs.start_bundle">
<code class="descname">start_bundle</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WaitForBQJobs.start_bundle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WaitForBQJobs.start_bundle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.WaitForBQJobs.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>element</em>, <em>dest_ids_list</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#WaitForBQJobs.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.WaitForBQJobs.process" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.DeleteTablesFn">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">DeleteTablesFn</code><span class="sig-paren">(</span><em>test_client=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#DeleteTablesFn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.DeleteTablesFn" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn" title="apache_beam.transforms.core.DoFn"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.core.DoFn</span></code></a></p>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.DeleteTablesFn.start_bundle">
<code class="descname">start_bundle</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#DeleteTablesFn.start_bundle"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.DeleteTablesFn.start_bundle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.DeleteTablesFn.process">
<code class="descname">process</code><span class="sig-paren">(</span><em>table_reference</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#DeleteTablesFn.process"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.DeleteTablesFn.process" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads">
<em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery_file_loads.</code><code class="descname">BigQueryBatchFileLoads</code><span class="sig-paren">(</span><em>destination</em>, <em>schema=None</em>, <em>custom_gcs_temp_location=None</em>, <em>create_disposition=None</em>, <em>write_disposition=None</em>, <em>triggering_frequency=None</em>, <em>with_auto_sharding=False</em>, <em>temp_file_format=None</em>, <em>max_file_size=None</em>, <em>max_files_per_bundle=None</em>, <em>max_partition_size=None</em>, <em>max_files_per_partition=None</em>, <em>additional_bq_parameters=None</em>, <em>table_side_inputs=None</em>, <em>schema_side_inputs=None</em>, <em>test_client=None</em>, <em>validate=True</em>, <em>is_streaming_pipeline=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#BigQueryBatchFileLoads"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>Takes in a set of elements, and inserts them to BigQuery via batch loads.</p>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.DESTINATION_JOBID_PAIRS">
<code class="descname">DESTINATION_JOBID_PAIRS</code><em class="property"> = 'destination_load_jobid_pairs'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.DESTINATION_JOBID_PAIRS" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.DESTINATION_FILE_PAIRS">
<code class="descname">DESTINATION_FILE_PAIRS</code><em class="property"> = 'destination_file_pairs'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.DESTINATION_FILE_PAIRS" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.DESTINATION_COPY_JOBID_PAIRS">
<code class="descname">DESTINATION_COPY_JOBID_PAIRS</code><em class="property"> = 'destination_copy_jobid_pairs'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.DESTINATION_COPY_JOBID_PAIRS" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="attribute">
<dt id="apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.COUNT">
<code class="descname">COUNT</code><em class="property"> = 0</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.COUNT" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.verify">
<code class="descname">verify</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#BigQueryBatchFileLoads.verify"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.verify" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery_file_loads.html#BigQueryBatchFileLoads.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery_file_loads.BigQueryBatchFileLoads.expand" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="apache_beam.io.gcp.bigquery_io_metadata.html" class="btn btn-neutral float-right" title="apache_beam.io.gcp.bigquery_io_metadata module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="apache_beam.io.gcp.bigquery_avro_tools.html" class="btn btn-neutral float-left" title="apache_beam.io.gcp.bigquery_avro_tools module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>