| |
| |
| <!DOCTYPE html> |
| <html class="writer-html5" lang="en" data-content_root="./"> |
| <head> |
| <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <title>apache_beam.io.gcp.bigquery module — Apache Beam 2.67.0 documentation</title> |
| <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=b86133f3" /> |
| <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" /> |
| |
| |
| <script src="_static/jquery.js?v=5d32c60e"></script> |
| <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script> |
| <script src="_static/documentation_options.js?v=959b4fbe"></script> |
| <script src="_static/doctools.js?v=9a2dae69"></script> |
| <script src="_static/sphinx_highlight.js?v=dc90522c"></script> |
| <script src="_static/js/theme.js"></script> |
| <link rel="index" title="Index" href="genindex.html" /> |
| <link rel="search" title="Search" href="search.html" /> |
| <link rel="next" title="apache_beam.io.gcp.bigquery_avro_tools module" href="apache_beam.io.gcp.bigquery_avro_tools.html" /> |
| <link rel="prev" title="apache_beam.io.gcp.big_query_query_to_table_pipeline module" href="apache_beam.io.gcp.big_query_query_to_table_pipeline.html" /> |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| <div class="wy-grid-for-nav"> |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="index.html" class="icon icon-home"> |
| Apache Beam |
| </a> |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu"> |
| <ul class="current"> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li> |
| <li class="toctree-l1 current"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a><ul class="current"> |
| <li class="toctree-l2 current"><a class="reference internal" href="apache_beam.io.html#subpackages">Subpackages</a><ul class="current"> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.aws.html">apache_beam.io.aws package</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.azure.html">apache_beam.io.azure package</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.components.html">apache_beam.io.components package</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.external.html">apache_beam.io.external package</a></li> |
| <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.flink.html">apache_beam.io.flink package</a></li> |
| <li class="toctree-l3 current"><a class="reference internal" href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a><ul class="current"> |
| <li class="toctree-l4"><a class="reference internal" href="apache_beam.io.gcp.html#subpackages">Subpackages</a></li> |
| <li class="toctree-l4 current"><a class="reference internal" href="apache_beam.io.gcp.html#submodules">Submodules</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="apache_beam.io.html#submodules">Submodules</a></li> |
| </ul> |
| </li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.yaml.html">apache_beam.yaml package</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li> |
| </ul> |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" > |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="index.html">Apache Beam</a> |
| </nav> |
| |
| <div class="wy-nav-content"> |
| <div class="rst-content"> |
| <div role="navigation" aria-label="Page navigation"> |
| <ul class="wy-breadcrumbs"> |
| <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li> |
| <li class="breadcrumb-item"><a href="apache_beam.io.html">apache_beam.io package</a></li> |
| <li class="breadcrumb-item"><a href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a></li> |
| <li class="breadcrumb-item active">apache_beam.io.gcp.bigquery module</li> |
| <li class="wy-breadcrumbs-aside"> |
| <a href="_sources/apache_beam.io.gcp.bigquery.rst.txt" rel="nofollow"> View page source</a> |
| </li> |
| </ul> |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <section id="module-apache_beam.io.gcp.bigquery"> |
| <span id="apache-beam-io-gcp-bigquery-module"></span><h1>apache_beam.io.gcp.bigquery module<a class="headerlink" href="#module-apache_beam.io.gcp.bigquery" title="Link to this heading"></a></h1> |
| <p>BigQuery sources and sinks.</p> |
| <p>This module implements reading from and writing to BigQuery tables. It relies |
| on several classes exposed by the BigQuery API: TableSchema, TableFieldSchema, |
| TableRow, and TableCell. The default mode is to return table rows read from a |
| BigQuery source as dictionaries. Similarly a Write transform to a BigQuerySink |
| accepts PCollections of dictionaries. This is done for more convenient |
| programming. If desired, the native TableRow objects can be used throughout to |
| represent rows (use an instance of TableRowJsonCoder as a coder argument when |
| creating the sources or sinks respectively).</p> |
| <p>Also, for programming convenience, instances of TableReference and TableSchema |
| have a string representation that can be used for the corresponding arguments:</p> |
| <blockquote> |
| <div><ul class="simple"> |
| <li><p>TableReference can be a PROJECT:DATASET.TABLE or DATASET.TABLE string.</p></li> |
| <li><p>TableSchema can be a NAME:TYPE{,NAME:TYPE}* string |
| (e.g. ‘month:STRING,event_count:INTEGER’).</p></li> |
| </ul> |
| </div></blockquote> |
| <p>The syntax supported is described here: |
| <a class="reference external" href="https://cloud.google.com/bigquery/bq-command-line-tool-quickstart">https://cloud.google.com/bigquery/bq-command-line-tool-quickstart</a></p> |
| <p>BigQuery sources can be used as main inputs or side inputs. A main input |
| (common case) is expected to be massive and will be split into manageable chunks |
| and processed in parallel. Side inputs are expected to be small and will be read |
| completely every time a ParDo DoFn gets executed. In the example below the |
| lambda function implementing the DoFn for the Map transform will get on each |
| call <em>one</em> row of the main table and <em>all</em> rows of the side table. The runner |
| may use some caching techniques to share the side inputs between calls in order |
| to avoid excessive reading::</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">main_table</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="s1">'VeryBig'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">ReadFromBigQuery</span><span class="p">(</span><span class="o">...</span><span class="p">)</span> |
| <span class="n">side_table</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="s1">'NotBig'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">ReadFromBigQuery</span><span class="p">(</span><span class="o">...</span><span class="p">)</span> |
| <span class="n">results</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">main_table</span> |
| <span class="o">|</span> <span class="s1">'ProcessData'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span> |
| <span class="k">lambda</span> <span class="n">element</span><span class="p">,</span> <span class="n">side_input</span><span class="p">:</span> <span class="o">...</span><span class="p">,</span> <span class="n">AsList</span><span class="p">(</span><span class="n">side_table</span><span class="p">)))</span> |
| </pre></div> |
| </div> |
| <p>There is no difference in how main and side inputs are read. What makes the |
| side_table a ‘side input’ is the AsList wrapper used when passing the table |
| as a parameter to the Map transform. AsList signals to the execution framework |
| that its input should be made available whole.</p> |
| <p>The main and side inputs are implemented differently. Reading a BigQuery table |
| as main input entails exporting the table to a set of GCS files (in AVRO or in |
| JSON format) and then processing those files.</p> |
| <p>Users may provide a query to read from rather than reading all of a BigQuery |
| table. If specified, the result obtained by executing the specified query will |
| be used as the data of the input transform.:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">query_results</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">ReadFromBigQuery</span><span class="p">(</span> |
| <span class="n">query</span><span class="o">=</span><span class="s1">'SELECT year, mean_temp FROM samples.weather_stations'</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>When creating a BigQuery input transform, users should provide either a query |
| or a table. Pipeline construction will fail with a validation error if neither |
| or both are specified.</p> |
| <p>When reading via <cite>ReadFromBigQuery</cite> using <cite>EXPORT</cite>, |
| bytes are returned decoded as bytes. |
| This is due to the fact that ReadFromBigQuery uses Avro exports by default. |
| When reading from BigQuery using <cite>apache_beam.io.BigQuerySource</cite>, bytes are |
| returned as base64-encoded bytes. To get base64-encoded bytes using |
| <cite>ReadFromBigQuery</cite>, you can use the flag <cite>use_json_exports</cite> to export |
| data as JSON, and receive base64-encoded bytes.</p> |
| <section id="readallfrombigquery"> |
| <h2>ReadAllFromBigQuery<a class="headerlink" href="#readallfrombigquery" title="Link to this heading"></a></h2> |
| <p>Beam 2.27.0 introduces a new transform called <cite>ReadAllFromBigQuery</cite> which |
| allows you to define table and query reads from BigQuery at pipeline |
| runtime.::</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">read_requests</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span> |
| <span class="n">ReadFromBigQueryRequest</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="s1">'SELECT * FROM mydataset.mytable'</span><span class="p">),</span> |
| <span class="n">ReadFromBigQueryRequest</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s1">'myproject.mydataset.mytable'</span><span class="p">)])</span> |
| <span class="n">results</span> <span class="o">=</span> <span class="n">read_requests</span> <span class="o">|</span> <span class="n">ReadAllFromBigQuery</span><span class="p">()</span> |
| </pre></div> |
| </div> |
| <p>A good application for this transform is in streaming pipelines to |
| refresh a side input coming from BigQuery. This would work like so::</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">side_input</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">p</span> |
| <span class="o">|</span> <span class="s1">'PeriodicImpulse'</span> <span class="o">>></span> <span class="n">PeriodicImpulse</span><span class="p">(</span> |
| <span class="n">first_timestamp</span><span class="p">,</span> <span class="n">last_timestamp</span><span class="p">,</span> <span class="n">interval</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span> |
| <span class="o">|</span> <span class="s1">'MapToReadRequest'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span> |
| <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">ReadFromBigQueryRequest</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s1">'dataset.table'</span><span class="p">))</span> |
| <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">ReadAllFromBigQuery</span><span class="p">())</span> |
| <span class="n">main_input</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">p</span> |
| <span class="o">|</span> <span class="s1">'MpImpulse'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="n">sample_main_input_elements</span><span class="p">)</span> |
| <span class="o">|</span> |
| <span class="s1">'MapMpToTimestamped'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">src</span><span class="p">:</span> <span class="n">TimestampedValue</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">src</span><span class="p">))</span> |
| <span class="o">|</span> <span class="s1">'WindowMpInto'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">WindowInto</span><span class="p">(</span> |
| <span class="n">window</span><span class="o">.</span><span class="n">FixedWindows</span><span class="p">(</span><span class="n">main_input_windowing_interval</span><span class="p">)))</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="p">(</span> |
| <span class="n">main_input</span> |
| <span class="o">|</span> <span class="s1">'ApplyCrossJoin'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">FlatMap</span><span class="p">(</span> |
| <span class="n">cross_join</span><span class="p">,</span> <span class="n">rights</span><span class="o">=</span><span class="n">beam</span><span class="o">.</span><span class="n">pvalue</span><span class="o">.</span><span class="n">AsIter</span><span class="p">(</span><span class="n">side_input</span><span class="p">)))</span> |
| </pre></div> |
| </div> |
| <p><strong>Note</strong>: This transform is supported on Portable and Dataflow v2 runners.</p> |
| <p><strong>Note</strong>: This transform does not currently clean up temporary datasets |
| created for its execution. (BEAM-11359)</p> |
| <section id="writing-data-to-bigquery"> |
| <h3>Writing Data to BigQuery<a class="headerlink" href="#writing-data-to-bigquery" title="Link to this heading"></a></h3> |
| <p>The <cite>WriteToBigQuery</cite> transform is the recommended way of writing data to |
| BigQuery. It supports a large set of parameters to customize how you’d like to |
| write to BigQuery.</p> |
| </section> |
| </section> |
| <section id="table-references"> |
| <h2>Table References<a class="headerlink" href="#table-references" title="Link to this heading"></a></h2> |
| <p>This transform allows you to provide static <cite>project</cite>, <cite>dataset</cite> and <cite>table</cite> |
| parameters which point to a specific BigQuery table to be created. The <cite>table</cite> |
| parameter can also be a dynamic parameter (i.e. a callable), which receives an |
| element to be written to BigQuery, and returns the table that that element |
| should be sent to.</p> |
| <p>You may also provide a tuple of PCollectionView elements to be passed as side |
| inputs to your callable. For example, suppose that one wishes to send |
| events of different types to different tables, and the table names are |
| computed at pipeline runtime, one may do something like the following:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span> |
| <span class="n">elements</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="s1">'Create elements'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span> |
| <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'error'</span><span class="p">,</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="s1">'12:34:56'</span><span class="p">,</span> <span class="s1">'message'</span><span class="p">:</span> <span class="s1">'bad'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'user_log'</span><span class="p">,</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="s1">'12:34:59'</span><span class="p">,</span> <span class="s1">'query'</span><span class="p">:</span> <span class="s1">'flu symptom'</span><span class="p">},</span> |
| <span class="p">]))</span> |
| |
| <span class="n">table_names</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="s1">'Create table_names'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span> |
| <span class="p">(</span><span class="s1">'error'</span><span class="p">,</span> <span class="s1">'my_project:dataset1.error_table_for_today'</span><span class="p">),</span> |
| <span class="p">(</span><span class="s1">'user_log'</span><span class="p">,</span> <span class="s1">'my_project:dataset1.query_table_for_today'</span><span class="p">),</span> |
| <span class="p">]))</span> |
| |
| <span class="n">table_names_dict</span> <span class="o">=</span> <span class="n">beam</span><span class="o">.</span><span class="n">pvalue</span><span class="o">.</span><span class="n">AsDict</span><span class="p">(</span><span class="n">table_names</span><span class="p">)</span> |
| |
| <span class="n">elements</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">WriteToBigQuery</span><span class="p">(</span> |
| <span class="n">table</span><span class="o">=</span><span class="k">lambda</span> <span class="n">row</span><span class="p">,</span> <span class="n">table_dict</span><span class="p">:</span> <span class="n">table_dict</span><span class="p">[</span><span class="n">row</span><span class="p">[</span><span class="s1">'type'</span><span class="p">]],</span> |
| <span class="n">table_side_inputs</span><span class="o">=</span><span class="p">(</span><span class="n">table_names_dict</span><span class="p">,))</span> |
| </pre></div> |
| </div> |
| <p>In the example above, the <cite>table_dict</cite> argument passed to the function in |
| <cite>table_dict</cite> is the side input coming from <cite>table_names_dict</cite>, which is passed |
| as part of the <cite>table_side_inputs</cite> argument.</p> |
| </section> |
| <section id="schemas"> |
| <h2>Schemas<a class="headerlink" href="#schemas" title="Link to this heading"></a></h2> |
| <p>This transform also allows you to provide a static or dynamic <cite>schema</cite> |
| parameter (i.e. a callable).</p> |
| <p>If providing a callable, this should take in a table reference (as returned by |
| the <cite>table</cite> parameter), and return the corresponding schema for that table. |
| This allows to provide different schemas for different tables:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">compute_table_name</span><span class="p">(</span><span class="n">row</span><span class="p">):</span> |
| <span class="o">...</span> |
| |
| <span class="n">errors_schema</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'fields'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'type'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'message'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">}]}</span> |
| <span class="n">queries_schema</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'fields'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'type'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'query'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">}]}</span> |
| |
| <span class="k">with</span> <span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span> |
| <span class="n">elements</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span> |
| <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'error'</span><span class="p">,</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="s1">'12:34:56'</span><span class="p">,</span> <span class="s1">'message'</span><span class="p">:</span> <span class="s1">'bad'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'user_log'</span><span class="p">,</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="s1">'12:34:59'</span><span class="p">,</span> <span class="s1">'query'</span><span class="p">:</span> <span class="s1">'flu symptom'</span><span class="p">},</span> |
| <span class="p">]))</span> |
| |
| <span class="n">elements</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">WriteToBigQuery</span><span class="p">(</span> |
| <span class="n">table</span><span class="o">=</span><span class="n">compute_table_name</span><span class="p">,</span> |
| <span class="n">schema</span><span class="o">=</span><span class="k">lambda</span> <span class="n">table</span><span class="p">:</span> <span class="p">(</span><span class="n">errors_schema</span> |
| <span class="k">if</span> <span class="s1">'errors'</span> <span class="ow">in</span> <span class="n">table</span> |
| <span class="k">else</span> <span class="n">queries_schema</span><span class="p">))</span> |
| </pre></div> |
| </div> |
| <p>It may be the case that schemas are computed at pipeline runtime. In cases |
| like these, one can also provide a <cite>schema_side_inputs</cite> parameter, which is |
| a tuple of PCollectionViews to be passed to the schema callable (much like |
| the <cite>table_side_inputs</cite> parameter).</p> |
| </section> |
| <section id="additional-parameters-for-bigquery-tables"> |
| <h2>Additional Parameters for BigQuery Tables<a class="headerlink" href="#additional-parameters-for-bigquery-tables" title="Link to this heading"></a></h2> |
| <p>This sink is able to create tables in BigQuery if they don’t already exist. It |
| also relies on creating temporary tables when performing file loads.</p> |
| <p>The WriteToBigQuery transform creates tables using the BigQuery API by |
| inserting a load job (see the API reference [1]), or by inserting a new table |
| (see the API reference for that [2][3]).</p> |
| <p>When creating a new BigQuery table, there are a number of extra parameters |
| that one may need to specify. For example, clustering, partitioning, data |
| encoding, etc. It is possible to provide these additional parameters by |
| passing a Python dictionary as <cite>additional_bq_parameters</cite> to the transform. |
| As an example, to create a table that has specific partitioning, and |
| clustering properties, one would do the following:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">additional_bq_parameters</span> <span class="o">=</span> <span class="p">{</span> |
| <span class="s1">'timePartitioning'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'DAY'</span><span class="p">},</span> |
| <span class="s1">'clustering'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'fields'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'country'</span><span class="p">]}}</span> |
| <span class="k">with</span> <span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span> |
| <span class="n">elements</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span> |
| <span class="p">{</span><span class="s1">'country'</span><span class="p">:</span> <span class="s1">'mexico'</span><span class="p">,</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="s1">'12:34:56'</span><span class="p">,</span> <span class="s1">'query'</span><span class="p">:</span> <span class="s1">'acapulco'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'country'</span><span class="p">:</span> <span class="s1">'canada'</span><span class="p">,</span> <span class="s1">'timestamp'</span><span class="p">:</span> <span class="s1">'12:34:59'</span><span class="p">,</span> <span class="s1">'query'</span><span class="p">:</span> <span class="s1">'influenza'</span><span class="p">},</span> |
| <span class="p">]))</span> |
| |
| <span class="n">elements</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">WriteToBigQuery</span><span class="p">(</span> |
| <span class="n">table</span><span class="o">=</span><span class="s1">'project_name1:dataset_2.query_events_table'</span><span class="p">,</span> |
| <span class="n">additional_bq_parameters</span><span class="o">=</span><span class="n">additional_bq_parameters</span><span class="p">)</span> |
| </pre></div> |
| </div> |
| <p>Much like the schema case, the parameter with <cite>additional_bq_parameters</cite> can |
| also take a callable that receives a table reference.</p> |
| <p>[1] <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job">https://cloud.google.com/bigquery/docs/reference/rest/v2/Job</a> #jobconfigurationload |
| [2] <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert</a> |
| [3] <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource</a></p> |
| </section> |
| <section id="chaining-of-operations-after-writetobigquery"> |
| <h2>Chaining of operations after WriteToBigQuery<a class="headerlink" href="#chaining-of-operations-after-writetobigquery" title="Link to this heading"></a></h2> |
| <p>WriteToBigQuery returns an object with several PCollections that consist of |
| metadata about the write operations. These are useful to inspect the write |
| operation and follow with the results:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">schema</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'fields'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'column'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">}]}</span> |
| |
| <span class="n">error_schema</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'fields'</span><span class="p">:</span> <span class="p">[</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'destination'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'row'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'name'</span><span class="p">:</span> <span class="s1">'error_message'</span><span class="p">,</span> <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'STRING'</span><span class="p">,</span> <span class="s1">'mode'</span><span class="p">:</span> <span class="s1">'NULLABLE'</span><span class="p">}]}</span> |
| |
| <span class="k">with</span> <span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span> |
| <span class="n">result</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> |
| <span class="o">|</span> <span class="s1">'Create Columns'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span> |
| <span class="p">{</span><span class="s1">'column'</span><span class="p">:</span> <span class="s1">'value'</span><span class="p">},</span> |
| <span class="p">{</span><span class="s1">'bad_column'</span><span class="p">:</span> <span class="s1">'bad_value'</span><span class="p">}</span> |
| <span class="p">])</span> |
| <span class="o">|</span> <span class="s1">'Write Data'</span> <span class="o">>></span> <span class="n">WriteToBigQuery</span><span class="p">(</span> |
| <span class="n">method</span><span class="o">=</span><span class="n">WriteToBigQuery</span><span class="o">.</span><span class="n">Method</span><span class="o">.</span><span class="n">STREAMING_INSERTS</span><span class="p">,</span> |
| <span class="n">table</span><span class="o">=</span><span class="n">my_table</span><span class="p">,</span> |
| <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> |
| <span class="n">insert_retry_strategy</span><span class="o">=</span><span class="n">RetryStrategy</span><span class="o">.</span><span class="n">RETRY_NEVER</span> |
| <span class="p">))</span> |
| |
| <span class="n">_</span> <span class="o">=</span> <span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">failed_rows_with_errors</span> |
| <span class="o">|</span> <span class="s1">'Get Errors'</span> <span class="o">>></span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="p">{</span> |
| <span class="s2">"destination"</span><span class="p">:</span> <span class="n">e</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> |
| <span class="s2">"row"</span><span class="p">:</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">e</span><span class="p">[</span><span class="mi">1</span><span class="p">]),</span> |
| <span class="s2">"error_message"</span><span class="p">:</span> <span class="n">e</span><span class="p">[</span><span class="mi">2</span><span class="p">][</span><span class="mi">0</span><span class="p">][</span><span class="s1">'message'</span><span class="p">]</span> |
| <span class="p">})</span> |
| <span class="o">|</span> <span class="s1">'Write Errors'</span> <span class="o">>></span> <span class="n">WriteToBigQuery</span><span class="p">(</span> |
| <span class="n">method</span><span class="o">=</span><span class="n">WriteToBigQuery</span><span class="o">.</span><span class="n">Method</span><span class="o">.</span><span class="n">STREAMING_INSERTS</span><span class="p">,</span> |
| <span class="n">table</span><span class="o">=</span><span class="n">error_log_table</span><span class="p">,</span> |
| <span class="n">schema</span><span class="o">=</span><span class="n">error_schema</span><span class="p">,</span> |
| <span class="p">))</span> |
| </pre></div> |
| </div> |
| <p>Often, the simplest use case is to chain an operation after writing data to |
| BigQuery.To do this, one can chain the operation after one of the output |
| PCollections. A generic way in which this operation (independent of write |
| method) could look like:</p> |
| <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">chain_after</span><span class="p">(</span><span class="n">result</span><span class="p">):</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="c1"># This works for FILE_LOADS, where we run load and possibly copy jobs.</span> |
| <span class="k">return</span> <span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">destination_load_jobid_pairs</span><span class="p">,</span> |
| <span class="n">result</span><span class="o">.</span><span class="n">destination_copy_jobid_pairs</span><span class="p">)</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Flatten</span><span class="p">()</span> |
| <span class="k">except</span> <span class="ne">AttributeError</span><span class="p">:</span> |
| <span class="c1"># Works for STREAMING_INSERTS, where we return the rows BigQuery rejected</span> |
| <span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">failed_rows</span> |
| |
| <span class="n">result</span> <span class="o">=</span> <span class="p">(</span><span class="n">pcoll</span> <span class="o">|</span> <span class="n">WriteToBigQuery</span><span class="p">(</span><span class="o">...</span><span class="p">))</span> |
| |
| <span class="n">_</span> <span class="o">=</span> <span class="p">(</span><span class="n">chain_after</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> |
| <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Reshuffle</span><span class="p">()</span> <span class="c1"># Force a 'commit' of the intermediate date</span> |
| <span class="o">|</span> <span class="n">MyOperationAfterWriteToBQ</span><span class="p">())</span> |
| </pre></div> |
| </div> |
| <p>Attributes can be accessed using dot notation or bracket notation:</p> |
| <p>result.failed_rows <–> result[‘FailedRows’] |
| result.failed_rows_with_errors <–> result[‘FailedRowsWithErrors’] |
| result.destination_load_jobid_pairs <–> result[‘destination_load_jobid_pairs’] |
| result.destination_file_pairs <–> result[‘destination_file_pairs’] |
| result.destination_copy_jobid_pairs <–> result[‘destination_copy_jobid_pairs’]</p> |
| </section> |
| <section id="writing-with-storage-write-api-using-cross-language"> |
| <h2>Writing with Storage Write API using Cross Language<a class="headerlink" href="#writing-with-storage-write-api-using-cross-language" title="Link to this heading"></a></h2> |
| <p>This sink is able to write with BigQuery’s Storage Write API. To do so, specify |
| the method <cite>WriteToBigQuery.Method.STORAGE_WRITE_API</cite>. This will use the |
| StorageWriteToBigQuery() transform to discover and use the Java implementation. |
| Using this transform directly will require the use of beam.Row() elements.</p> |
| <p>Similar to streaming inserts, it returns two dead-letter queue PCollections: |
| one containing just the failed rows and the other containing failed rows and |
| errors. They can be accessed with <cite>failed_rows</cite> and <cite>failed_rows_with_errors</cite>, |
| respectively. See the examples above for how to do this.</p> |
| <p><strong>* Short introduction to BigQuery concepts *</strong> |
| Tables have rows (TableRow) and each row has cells (TableCell). |
| A table has a schema (TableSchema), which in turn describes the schema of each |
| cell (TableFieldSchema). The terms field and cell are used interchangeably.</p> |
| <dl class="simple"> |
| <dt>TableSchema: Describes the schema (types and order) for values in each row.</dt><dd><p>Has one attribute, ‘field’, which is list of TableFieldSchema objects.</p> |
| </dd> |
| <dt>TableFieldSchema: Describes the schema (type, name) for one field.</dt><dd><p>Has several attributes, including ‘name’ and ‘type’. Common values for |
| the type attribute are: ‘STRING’, ‘INTEGER’, ‘FLOAT’, ‘BOOLEAN’, ‘NUMERIC’, |
| ‘GEOGRAPHY’. |
| All possible values are described at: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types">https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types</a></p> |
| </dd> |
| <dt>TableRow: Holds all values in a table row. Has one attribute, ‘f’, which is a</dt><dd><p>list of TableCell instances.</p> |
| </dd> |
| <dt>TableCell: Holds the value for one cell (or field). Has one attribute,</dt><dd><p>‘v’, which is a JsonValue instance. This class is defined in |
| apitools.base.py.extra_types.py module.</p> |
| </dd> |
| </dl> |
| <p>As of Beam 2.7.0, the NUMERIC data type is supported. This data type supports |
| high-precision decimal numbers (precision of 38 digits, scale of 9 digits). |
| The GEOGRAPHY data type works with Well-Known Text (See |
| <a class="reference external" href="https://en.wikipedia.org/wiki/Well-known_text">https://en.wikipedia.org/wiki/Well-known_text</a>) format for reading and writing |
| to BigQuery. |
| BigQuery IO requires values of BYTES datatype to be encoded using base64 |
| encoding when writing to BigQuery.</p> |
| <p><strong>Updates to the I/O connector code</strong></p> |
| <p>For any significant updates to this I/O connector, please consider involving |
| corresponding code reviewers mentioned in |
| <a class="reference external" href="https://github.com/apache/beam/blob/master/sdks/python/OWNERS">https://github.com/apache/beam/blob/master/sdks/python/OWNERS</a></p> |
| </section> |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.TableRowJsonCoder"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">TableRowJsonCoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table_schema</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#TableRowJsonCoder"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.TableRowJsonCoder" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="apache_beam.coders.coders.html#apache_beam.coders.coders.Coder" title="apache_beam.coders.coders.Coder"><code class="xref py py-class docutils literal notranslate"><span class="pre">Coder</span></code></a></p> |
| <p>A coder for a TableRow instance to/from a JSON string.</p> |
| <p>Note that the encoding operation (used when writing to sinks) requires the |
| table schema in order to obtain the ordered list of field names. Reading from |
| sources on the other hand does not need the table schema.</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.TableRowJsonCoder.encode"> |
| <span class="sig-name descname"><span class="pre">encode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table_row</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#TableRowJsonCoder.encode"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.TableRowJsonCoder.encode" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.TableRowJsonCoder.decode"> |
| <span class="sig-name descname"><span class="pre">decode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoded_table_row</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#TableRowJsonCoder.decode"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.TableRowJsonCoder.decode" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">BigQueryDisposition</span></span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryDisposition"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <p>Class holding standard strings used for create and write dispositions.</p> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER"> |
| <span class="sig-name descname"><span class="pre">CREATE_NEVER</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'CREATE_NEVER'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED"> |
| <span class="sig-name descname"><span class="pre">CREATE_IF_NEEDED</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'CREATE_IF_NEEDED'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE"> |
| <span class="sig-name descname"><span class="pre">WRITE_TRUNCATE</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'WRITE_TRUNCATE'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND"> |
| <span class="sig-name descname"><span class="pre">WRITE_APPEND</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'WRITE_APPEND'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY"> |
| <span class="sig-name descname"><span class="pre">WRITE_EMPTY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'WRITE_EMPTY'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_create"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_create</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">disposition</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryDisposition.validate_create"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_create" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_write"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_write</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">disposition</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryDisposition.validate_write"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_write" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py function"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQuerySource"> |
| <span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">BigQuerySource</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">project</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">query</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">validate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">coder</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_standard_sql</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">flatten_results</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kms_key</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_dataflow_native_source</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQuerySource"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQuerySource" title="Link to this definition"></a></dt> |
| <dd><div class="deprecated"> |
| <p><span class="versionmodified deprecated">Deprecated since version BigQuerySource: </span>is deprecated since 2.25.0. Use ReadFromBigQuery instead.</p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py function"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQuerySink"> |
| <span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">BigQuerySink</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">validate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQuerySink"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQuerySink" title="Link to this definition"></a></dt> |
| <dd><p>A deprecated alias for WriteToBigQuery.</p> |
| <div class="deprecated"> |
| <p><span class="versionmodified deprecated">Deprecated since version BigQuerySink: </span>is deprecated since 2.11.0. Use WriteToBigQuery instead.</p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryQueryPriority"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">BigQueryQueryPriority</span></span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryQueryPriority"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <p>Class holding standard strings used for query priority.</p> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE"> |
| <span class="sig-name descname"><span class="pre">INTERACTIVE</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'INTERACTIVE'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.BigQueryQueryPriority.BATCH"> |
| <span class="sig-name descname"><span class="pre">BATCH</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'BATCH'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority.BATCH" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">WriteToBigQuery</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dataset</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">project</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">schema</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">create_disposition</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'CREATE_IF_NEEDED'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">write_disposition</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'WRITE_APPEND'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kms_key</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_file_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_partition_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_files_per_bundle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_client</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">custom_gcs_temp_location</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">insert_retry_strategy</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">additional_bq_parameters</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">table_side_inputs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">schema_side_inputs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">triggering_frequency</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_at_least_once</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">validate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">temp_file_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ignore_insert_ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">with_auto_sharding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_storage_api_streams</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ignore_unknown_columns</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">load_job_project_id</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_retries</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10000</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_insert_payload_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">9437184</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_streaming_keys</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">500</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cdc_writes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">primary_key</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.List" title="(in Python v3.13)"><span class="pre">List</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">expansion_service</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code></a></p> |
| <p>Write data to BigQuery.</p> |
| <p>This transform receives a PCollection of elements to be inserted into BigQuery |
| tables. The elements would come in as Python dictionaries, or as <cite>TableRow</cite> |
| instances.</p> |
| <p>Initialize a WriteToBigQuery transform.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>callable</em><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – The ID of the table, or a callable |
| that returns it. The ID must contain only letters <code class="docutils literal notranslate"><span class="pre">a-z</span></code>, <code class="docutils literal notranslate"><span class="pre">A-Z</span></code>, |
| numbers <code class="docutils literal notranslate"><span class="pre">0-9</span></code>, or connectors <code class="docutils literal notranslate"><span class="pre">-_</span></code>. If dataset argument is |
| <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> then the table argument must contain the entire table |
| reference specified as: <code class="docutils literal notranslate"><span class="pre">'DATASET.TABLE'</span></code> |
| or <code class="docutils literal notranslate"><span class="pre">'PROJECT:DATASET.TABLE'</span></code>. If it’s a callable, it must receive one |
| argument representing an element to be written to BigQuery, and return |
| a TableReference, or a string table name as specified above.</p></li> |
| <li><p><strong>dataset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – The ID of the dataset containing this table or |
| <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> if the table reference is specified entirely by the table |
| argument.</p></li> |
| <li><p><strong>project</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – The ID of the project containing this table or |
| <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> if the table reference is specified entirely by the table |
| argument.</p></li> |
| <li><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>,</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>,</em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a><em>,</em><em>callable</em>) – The schema to be used if the |
| BigQuery table to write has to be created. This can be either specified |
| as a <code class="xref py py-class docutils literal notranslate"><span class="pre">TableSchema</span></code>. or a <cite>ValueProvider</cite> that has a JSON string, |
| or a python dictionary, or the string or dictionary itself, |
| object or a single string of the form |
| <code class="docutils literal notranslate"><span class="pre">'field1:type1,field2:type2,field3:type3'</span></code> that defines a comma |
| separated list of fields. Here <code class="docutils literal notranslate"><span class="pre">'type'</span></code> should specify the BigQuery |
| type of the field. Single string based schemas do not support nested |
| fields, repeated fields, or specifying a BigQuery mode for fields |
| (mode will always be set to <code class="docutils literal notranslate"><span class="pre">'NULLABLE'</span></code>). |
| If a callable, then it should receive a destination (in the form of |
| a str, and return a str, dict or TableSchema). |
| One may also pass <code class="docutils literal notranslate"><span class="pre">SCHEMA_AUTODETECT</span></code> here when using JSON-based |
| file loads, and BigQuery will try to infer the schema for the files |
| that are being loaded.</p></li> |
| <li><p><strong>create_disposition</strong> (<a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition" title="apache_beam.io.gcp.bigquery.BigQueryDisposition"><em>BigQueryDisposition</em></a>) – <p>A string describing what |
| happens if the table does not exist. Possible values are:</p> |
| <ul> |
| <li><p><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.CREATE_IF_NEEDED</span></code></a>: create if does not |
| exist.</p></li> |
| <li><p><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.CREATE_NEVER</span></code></a>: fail the write if does not |
| exist.</p></li> |
| </ul> |
| </p></li> |
| <li><p><strong>write_disposition</strong> (<a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition" title="apache_beam.io.gcp.bigquery.BigQueryDisposition"><em>BigQueryDisposition</em></a>) – <p>A string describing what happens |
| if the table has already some data. Possible values are:</p> |
| <ul> |
| <li><p><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.WRITE_TRUNCATE</span></code></a>: delete existing rows.</p></li> |
| <li><p><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.WRITE_APPEND</span></code></a>: add to existing rows.</p></li> |
| <li><p><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.WRITE_EMPTY</span></code></a>: fail the write if table not |
| empty.</p></li> |
| </ul> |
| <p>For streaming pipelines WriteTruncate can not be used.</p> |
| </p></li> |
| <li><p><strong>kms_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Optional Cloud KMS key name for use when creating new |
| tables.</p></li> |
| <li><p><strong>batch_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Number of rows to be written to BQ per streaming API |
| insert. The default is 500.</p></li> |
| <li><p><strong>test_client</strong> – Override the default bigquery client used for testing.</p></li> |
| <li><p><strong>max_file_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – The maximum size for a file to be written and then |
| loaded into BigQuery. The default value is 4TB, which is 80% of the |
| limit of 5TB for BigQuery to load any file.</p></li> |
| <li><p><strong>max_partition_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – Maximum byte size for each load job to |
| BigQuery. Defaults to 15TB. Applicable to FILE_LOADS only.</p></li> |
| <li><p><strong>max_files_per_bundle</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) – The maximum number of files to be concurrently |
| written by a worker. The default here is 20. Larger values will allow |
| writing to multiple destinations without having to reshard - but they |
| increase the memory burden on the workers.</p></li> |
| <li><p><strong>custom_gcs_temp_location</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – A GCS location to store files to be used |
| for file loads into BigQuery. By default, this will use the pipeline’s |
| temp_location, but for pipelines whose temp_location is not appropriate |
| for BQ File Loads, users should pass a specific one.</p></li> |
| <li><p><strong>method</strong> – The method to use to write to BigQuery. It may be |
| STREAMING_INSERTS, FILE_LOADS, STORAGE_WRITE_API or DEFAULT. An |
| introduction on loading data to BigQuery: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data">https://cloud.google.com/bigquery/docs/loading-data</a>. |
| DEFAULT will use STREAMING_INSERTS on Streaming pipelines and |
| FILE_LOADS on Batch pipelines. |
| Note: FILE_LOADS currently does not support BigQuery’s JSON data type: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type">https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type</a>”></p></li> |
| <li><p><strong>insert_retry_strategy</strong> – <p>The strategy to use when retrying streaming inserts |
| into BigQuery. Options are shown in bigquery_tools.RetryStrategy attrs. |
| Default is to retry always. This means that whenever there are rows |
| that fail to be inserted to BigQuery, they will be retried indefinitely. |
| Other retry strategy settings will produce a deadletter PCollection |
| as output. Appropriate values are:</p> |
| <ul> |
| <li><p><cite>RetryStrategy.RETRY_ALWAYS</cite>: retry all rows if |
| there are any kind of errors. Note that this will hold your pipeline |
| back if there are errors until you cancel or update it.</p></li> |
| <li><p><cite>RetryStrategy.RETRY_NEVER</cite>: rows with errors |
| will not be retried. Instead they will be output to a dead letter |
| queue under the <cite>‘FailedRows’</cite> tag.</p></li> |
| <li><p><cite>RetryStrategy.RETRY_ON_TRANSIENT_ERROR</cite>: retry |
| rows with transient errors (e.g. timeouts). Rows with permanent errors |
| will be output to dead letter queue under <cite>‘FailedRows’</cite> tag.</p></li> |
| </ul> |
| </p></li> |
| <li><p><strong>additional_bq_parameters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>callable</em>) – Additional parameters to pass |
| to BQ when creating / loading data into a table. If a callable, it |
| should be a function that receives a table reference indicating |
| the destination and returns a dictionary. |
| These can be ‘timePartitioning’, ‘clustering’, etc. They are passed |
| directly to the job load configuration. See |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload">https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload</a></p></li> |
| <li><p><strong>table_side_inputs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.13)"><em>tuple</em></a>) – A tuple with <code class="docutils literal notranslate"><span class="pre">AsSideInput</span></code> PCollections to be |
| passed to the table callable (if one is provided).</p></li> |
| <li><p><strong>schema_side_inputs</strong> – A tuple with <code class="docutils literal notranslate"><span class="pre">AsSideInput</span></code> PCollections to be |
| passed to the schema callable (if one is provided).</p></li> |
| <li><p><strong>triggering_frequency</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a>) – <p>When method is FILE_LOADS: |
| Value will be converted to int. Every triggering_frequency seconds, a |
| BigQuery load job will be triggered for all the data written since the |
| last load job. BigQuery has limits on how many load jobs can be |
| triggered per day, so be careful not to set this duration too low, or |
| you may exceed daily quota. Often this is set to 5 or 10 minutes to |
| ensure that the project stays well under the BigQuery quota. See |
| <a class="reference external" href="https://cloud.google.com/bigquery/quota-policy">https://cloud.google.com/bigquery/quota-policy</a> for more information |
| about BigQuery quotas.</p> |
| <p>When method is STREAMING_INSERTS and with_auto_sharding=True: |
| A streaming inserts batch will be submitted at least every |
| triggering_frequency seconds when data is waiting. The batch can be |
| sent earlier if it reaches the maximum batch size set by batch_size. |
| Default value is 0.2 seconds.</p> |
| <p>When method is STORAGE_WRITE_API: |
| A stream of rows will be committed every triggering_frequency seconds. |
| By default, this will be 5 seconds to ensure exactly-once semantics.</p> |
| </p></li> |
| <li><p><strong>use_at_least_once</strong> – Intended only for STORAGE_WRITE_API. When True, will |
| use at-least-once semantics. This is cheaper and provides lower |
| latency, but will potentially duplicate records.</p></li> |
| <li><p><strong>validate</strong> – Indicates whether to perform validation checks on |
| inputs. This parameter is primarily used for testing.</p></li> |
| <li><p><strong>temp_file_format</strong> – The format to use for file loads into BigQuery. The |
| options are NEWLINE_DELIMITED_JSON or AVRO, with NEWLINE_DELIMITED_JSON |
| being used by default. For advantages and limitations of the two |
| formats, see |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro">https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro</a> |
| and |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json">https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json</a>.</p></li> |
| <li><p><strong>ignore_insert_ids</strong> – When using the STREAMING_INSERTS method to write data |
| to BigQuery, <cite>insert_ids</cite> are a feature of BigQuery that support |
| deduplication of events. If your use case is not sensitive to |
| duplication of data inserted to BigQuery, set <cite>ignore_insert_ids</cite> |
| to True to increase the throughput for BQ writing. See: |
| <a class="reference external" href="https://cloud.google.com/bigquery/streaming-data-into-bigquery#disabling_best_effort_de-duplication">https://cloud.google.com/bigquery/streaming-data-into-bigquery#disabling_best_effort_de-duplication</a></p></li> |
| <li><p><strong>with_auto_sharding</strong> – Experimental. If true, enables using a dynamically |
| determined number of shards to write to BigQuery. This can be used for |
| all of FILE_LOADS, STREAMING_INSERTS, and STORAGE_WRITE_API. Only |
| applicable to unbounded input.</p></li> |
| <li><p><strong>num_storage_api_streams</strong> – Specifies the number of write streams that the |
| Storage API sink will use. This parameter is only applicable when |
| writing unbounded data.</p></li> |
| <li><p><strong>ignore_unknown_columns</strong> – Accept rows that contain values that do not match |
| the schema. The unknown values are ignored. Default is False, |
| which treats unknown values as errors. This option is only valid for |
| method=STREAMING_INSERTS. See reference: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll">https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll</a></p></li> |
| <li><p><strong>load_job_project_id</strong> – Specifies an alternate GCP project id to use for |
| billingBatch File Loads. By default, the project id of the table is |
| used.</p></li> |
| <li><p><strong>num_streaming_keys</strong> – The number of shards per destination when writing via |
| streaming inserts.</p></li> |
| <li><p><strong>expansion_service</strong> – The address (host:port) of the expansion service. |
| If no expansion service is provided, will attempt to run the default |
| GCP expansion service. Used for STORAGE_WRITE_API method.</p></li> |
| <li><p><strong>max_retries</strong> – The number of times that we will retry inserting a group of |
| rows into BigQuery. By default, we retry 10000 times with exponential |
| backoffs (effectively retry forever).</p></li> |
| <li><p><strong>max_insert_payload_size</strong> – The maximum byte size for a BigQuery legacy |
| streaming insert payload.</p></li> |
| <li><p><strong>use_cdc_writes</strong> – Configure the usage of CDC writes on BigQuery. |
| The argument can be used by passing True and the Beam Rows will be |
| sent as they are to the BigQuery sink which expects a ‘record’ |
| and ‘row_mutation_info’ properties. |
| Used for STORAGE_WRITE_API, working on ‘at least once’ mode.</p></li> |
| <li><p><strong>primary_key</strong> – When using CDC write on BigQuery and |
| CREATE_IF_NEEDED mode for the underlying tables a list of column names |
| is required to be configured as the primary key. Used for |
| STORAGE_WRITE_API, working on ‘at least once’ mode.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Method</span></span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.Method"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.DEFAULT"> |
| <span class="sig-name descname"><span class="pre">DEFAULT</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'DEFAULT'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.DEFAULT" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.STREAMING_INSERTS"> |
| <span class="sig-name descname"><span class="pre">STREAMING_INSERTS</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'STREAMING_INSERTS'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.STREAMING_INSERTS" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.FILE_LOADS"> |
| <span class="sig-name descname"><span class="pre">FILE_LOADS</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'FILE_LOADS'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.FILE_LOADS" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.STORAGE_WRITE_API"> |
| <span class="sig-name descname"><span class="pre">STORAGE_WRITE_API</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'STORAGE_WRITE_API'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.STORAGE_WRITE_API" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.get_table_schema_from_string"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">get_table_schema_from_string</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">schema</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.get_table_schema_from_string" title="Link to this definition"></a></dt> |
| <dd><p>Transform the string table schema into a |
| <code class="xref py py-class docutils literal notranslate"><span class="pre">TableSchema</span></code> instance.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – The string schema to be used if the BigQuery table to write |
| has to be created.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>The schema to be used if the BigQuery table to write has to be created |
| but in the <code class="xref py py-class docutils literal notranslate"><span class="pre">TableSchema</span></code> format.</p> |
| </dd> |
| <dt class="field-odd">Return type<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><em>TableSchema</em></p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.table_schema_to_dict"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">table_schema_to_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">table_schema</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.table_schema_to_dict" title="Link to this definition"></a></dt> |
| <dd><p>Create a dictionary representation of table schema for serialization</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.get_dict_table_schema"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">get_dict_table_schema</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">schema</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.get_dict_table_schema" title="Link to this definition"></a></dt> |
| <dd><p>Transform the table schema into a dictionary instance.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a><em>, </em><em>TableSchema</em>) – The schema to be used if the BigQuery table to write has to be created. |
| This can either be a dict or string or in the TableSchema format.</p> |
| </dd> |
| <dt class="field-even">Returns<span class="colon">:</span></dt> |
| <dd class="field-even"><p>The schema to be used if the BigQuery table to write has |
| to be created but in the dictionary format.</p> |
| </dd> |
| <dt class="field-odd">Return type<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>Dict[<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a>, Any]</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.expand"> |
| <span class="sig-name descname"><span class="pre">expand</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pcoll</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.expand"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.expand" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.display_data"> |
| <span class="sig-name descname"><span class="pre">display_data</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.display_data"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.display_data" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.to_runner_api_parameter"> |
| <span class="sig-name descname"><span class="pre">to_runner_api_parameter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">context</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.to_runner_api_parameter"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.to_runner_api_parameter" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteToBigQuery.from_runner_api"> |
| <span class="sig-name descname"><span class="pre">from_runner_api</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">payload</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">context</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.from_runner_api"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.from_runner_api" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteResult"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">WriteResult</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">destination_load_jobid_pairs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">JobReference</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">destination_file_pairs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">destination_copy_jobid_pairs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">JobReference</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">failed_rows</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><span class="pre">dict</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">failed_rows_with_errors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><span class="pre">dict</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><span class="pre">list</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteResult"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteResult" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <p>The result of a WriteToBigQuery transform.</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteResult.validate"> |
| <span class="sig-name descname"><span class="pre">validate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">valid_methods</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attribute</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteResult.validate"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteResult.validate" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteResult.destination_load_jobid_pairs"> |
| <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">destination_load_jobid_pairs</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">JobReference</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteResult.destination_load_jobid_pairs" title="Link to this definition"></a></dt> |
| <dd><p>A <code class="docutils literal notranslate"><span class="pre">FILE_LOADS</span></code> method attribute</p> |
| <dl class="simple"> |
| <dt>Returns: A PCollection of the table destinations that were successfully</dt><dd><p>loaded to using the batch load API, along with the load job IDs.</p> |
| </dd> |
| </dl> |
| <p>Raises: AttributeError: if accessed with a write method |
| besides <code class="docutils literal notranslate"><span class="pre">FILE_LOADS</span></code>.</p> |
| </dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteResult.destination_file_pairs"> |
| <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">destination_file_pairs</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteResult.destination_file_pairs" title="Link to this definition"></a></dt> |
| <dd><p>A <code class="docutils literal notranslate"><span class="pre">FILE_LOADS</span></code> method attribute</p> |
| <dl class="simple"> |
| <dt>Returns: A PCollection of the table destinations along with the</dt><dd><p>temp files used as sources to load from.</p> |
| </dd> |
| </dl> |
| <p>Raises: AttributeError: if accessed with a write method |
| besides <code class="docutils literal notranslate"><span class="pre">FILE_LOADS</span></code>.</p> |
| </dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteResult.destination_copy_jobid_pairs"> |
| <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">destination_copy_jobid_pairs</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">JobReference</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteResult.destination_copy_jobid_pairs" title="Link to this definition"></a></dt> |
| <dd><p>A <code class="docutils literal notranslate"><span class="pre">FILE_LOADS</span></code> method attribute</p> |
| <dl class="simple"> |
| <dt>Returns: A PCollection of the table destinations that were successfully</dt><dd><p>copied to, along with the copy job ID.</p> |
| </dd> |
| </dl> |
| <p>Raises: AttributeError: if accessed with a write method |
| besides <code class="docutils literal notranslate"><span class="pre">FILE_LOADS</span></code>.</p> |
| </dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteResult.failed_rows"> |
| <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">failed_rows</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><span class="pre">dict</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteResult.failed_rows" title="Link to this definition"></a></dt> |
| <dd><p>A <code class="docutils literal notranslate"><span class="pre">[STREAMING_INSERTS,</span> <span class="pre">STORAGE_WRITE_API]</span></code> method attribute</p> |
| <p>Returns: A PCollection of rows that failed when inserting to BigQuery.</p> |
| <p>Raises: AttributeError: if accessed with a write method |
| besides <code class="docutils literal notranslate"><span class="pre">[STREAMING_INSERTS,</span> <span class="pre">STORAGE_WRITE_API]</span></code>.</p> |
| </dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.WriteResult.failed_rows_with_errors"> |
| <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">failed_rows_with_errors</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><span class="pre">PCollection</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Tuple" title="(in Python v3.13)"><span class="pre">Tuple</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><span class="pre">dict</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.13)"><span class="pre">list</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteResult.failed_rows_with_errors" title="Link to this definition"></a></dt> |
| <dd><p>A <code class="docutils literal notranslate"><span class="pre">[STREAMING_INSERTS,</span> <span class="pre">STORAGE_WRITE_API]</span></code> method attribute</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Returns<span class="colon">:</span></dt> |
| <dd class="field-odd"><p>A PCollection of rows that failed when inserting to BigQuery, |
| along with their errors.</p> |
| </dd> |
| <dt class="field-even">Raises<span class="colon">:</span></dt> |
| <dd class="field-even"><ul class="simple"> |
| <li><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#AttributeError" title="(in Python v3.13)"><strong>AttributeError</strong></a> – if accessed with a write method</p></li> |
| <li><p><strong>besides</strong><strong> [</strong><strong>STREAMING_INSERTS</strong><strong>, </strong><strong>STORAGE_WRITE_API</strong><strong>]</strong><strong>.</strong> – </p></li> |
| </ul> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQuery"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">ReadFromBigQuery</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">gcs_location</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_native_datetime</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">timeout</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQuery"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code></a></p> |
| <p>Read data from BigQuery.</p> |
| <blockquote> |
| <div><p>This PTransform uses a BigQuery export job to take a snapshot of the table |
| on GCS, and then reads from each produced file. File format is Avro by |
| default.</p> |
| </div></blockquote> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>method</strong> – The method to use to read from BigQuery. It may be EXPORT or |
| DIRECT_READ. EXPORT invokes a BigQuery export request |
| (<a class="reference external" href="https://cloud.google.com/bigquery/docs/exporting-data">https://cloud.google.com/bigquery/docs/exporting-data</a>). DIRECT_READ reads |
| directly from BigQuery storage using the BigQuery Read API |
| (<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/storage">https://cloud.google.com/bigquery/docs/reference/storage</a>). If |
| unspecified, the default is currently EXPORT.</p></li> |
| <li><p><strong>timeout</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a>) – The timeout for the read operation in seconds. This only |
| impacts DIRECT_READ. If None, the client default will be used.</p></li> |
| <li><p><strong>use_native_datetime</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – By default this transform exports BigQuery |
| DATETIME fields as formatted strings (for example: |
| 2021-01-01T12:59:59). If <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, BigQuery DATETIME fields will |
| be returned as native Python datetime objects. This can only be used when |
| ‘method’ is ‘DIRECT_READ’.</p></li> |
| <li><p><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><em>callable</em><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – The ID of the table, or a callable |
| that returns it. If dataset argument is <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> then the table |
| argument must contain the entire table reference specified as: |
| <code class="docutils literal notranslate"><span class="pre">'DATASET.TABLE'</span></code> or <code class="docutils literal notranslate"><span class="pre">'PROJECT:DATASET.TABLE'</span></code>. If it’s a callable, |
| it must receive one argument representing an element to be written to |
| BigQuery, and return a TableReference, or a string table name as specified |
| above.</p></li> |
| <li><p><strong>dataset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – The ID of the dataset containing this table or |
| <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> if the table reference is specified entirely by the table |
| argument.</p></li> |
| <li><p><strong>project</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – The ID of the project containing this table.</p></li> |
| <li><p><strong>query</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – A query to be used instead of arguments |
| table, dataset, and project.</p></li> |
| <li><p><strong>validate</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – If <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, various checks will be done when source |
| gets initialized (e.g., is table present?). This should be |
| <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a> for most scenarios in order to catch errors as early as |
| possible (pipeline construction instead of pipeline execution). It |
| should be <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a> if the table is created during pipeline |
| execution by a previous step. Set this to <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a> |
| if the BigQuery export method is slow due to checking file existence.</p></li> |
| <li><p><strong>coder</strong> (<a class="reference internal" href="apache_beam.coders.coders.html#apache_beam.coders.coders.Coder" title="apache_beam.coders.coders.Coder"><em>Coder</em></a>) – The coder for the table |
| rows. If <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a>, then the default coder is |
| _JsonToDictCoder, which will interpret every row as a JSON |
| serialized dictionary.</p></li> |
| <li><p><strong>use_standard_sql</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Specifies whether to use BigQuery’s standard SQL |
| dialect for this query. The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a>. |
| If set to <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, the query will use BigQuery’s updated SQL |
| dialect with improved standards compliance. |
| This parameter is ignored for table inputs.</p></li> |
| <li><p><strong>flatten_results</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Flattens all nested and repeated fields in the |
| query results. The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>.</p></li> |
| <li><p><strong>kms_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Optional Cloud KMS key name for use when creating new |
| temporary tables.</p></li> |
| <li><p><strong>gcs_location</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – The name of the Google Cloud Storage |
| bucket where the extracted table should be written as a string or |
| a <a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><code class="xref py py-class docutils literal notranslate"><span class="pre">ValueProvider</span></code></a>. If |
| <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a>, then the temp_location parameter is used.</p></li> |
| <li><p><strong>bigquery_job_labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><em>dict</em></a>) – A dictionary with string labels to be passed |
| to BigQuery export and query jobs created by this transform. See: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration">https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration</a></p></li> |
| <li><p><strong>use_json_exports</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – By default, this transform works by exporting |
| BigQuery data into Avro files, and reading those files. With this |
| parameter, the transform will instead export to JSON files. JSON files |
| are slower to read due to their larger size. |
| When using JSON exports, the BigQuery types for DATE, DATETIME, TIME, and |
| TIMESTAMP will be exported as strings. This behavior is consistent with |
| BigQuerySource. |
| When using Avro exports, these fields will be exported as native Python |
| types (datetime.date, datetime.datetime, datetime.datetime, |
| and datetime.datetime respectively). Avro exports are recommended. |
| To learn more about BigQuery types, and Time-related type |
| representations, |
| see: <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types">https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types</a> |
| To learn more about type conversions between BigQuery and Avro, see: |
| <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#avro_conversions">https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#avro_conversions</a></p></li> |
| <li><p><strong>temp_dataset</strong> (<code class="docutils literal notranslate"><span class="pre">apache_beam.io.gcp.internal.clients.bigquery.DatasetReference</span></code>) – Temporary dataset reference to use when reading from BigQuery using a |
| query. When reading using a query, BigQuery source will create a |
| temporary dataset and a temporary table to store the results of the |
| query. With this option, you can set an existing dataset to create the |
| temporary table in. BigQuery source will create a temporary table in |
| that dataset, and will remove it once it is not needed. Job needs access |
| to create and delete tables within the given dataset. Dataset name |
| should <em>not</em> start with the reserved prefix <cite>beam_temp_dataset_</cite>.</p></li> |
| <li><p><strong>query_priority</strong> (<a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority" title="apache_beam.io.gcp.bigquery.BigQueryQueryPriority"><em>BigQueryQueryPriority</em></a>) – By default, this transform runs |
| queries with BATCH priority. Use <a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE" title="apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryQueryPriority.INTERACTIVE</span></code></a> |
| to run queries with INTERACTIVE priority. This option is ignored when |
| reading from a table rather than a query. To learn more about query |
| priority, see: <a class="reference external" href="https://cloud.google.com/bigquery/docs/running-queries">https://cloud.google.com/bigquery/docs/running-queries</a></p></li> |
| <li><p><strong>output_type</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – By default, this source yields Python dictionaries |
| (<cite>PYTHON_DICT</cite>). There is experimental support for producing a |
| PCollection with a schema and yielding Beam Rows via the option |
| <cite>BEAM_ROW</cite>. For more information on schemas, see |
| <a class="reference external" href="https://beam.apache.org/documentation/programming-guide/#what-is-a-schema">https://beam.apache.org/documentation/programming-guide/#what-is-a-schema</a>)</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Method</span></span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQuery.Method"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.EXPORT"> |
| <span class="sig-name descname"><span class="pre">EXPORT</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'EXPORT'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.EXPORT" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.DIRECT_READ"> |
| <span class="sig-name descname"><span class="pre">DIRECT_READ</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'DIRECT_READ'</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.DIRECT_READ" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.COUNTER"> |
| <span class="sig-name descname"><span class="pre">COUNTER</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.COUNTER" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.expand"> |
| <span class="sig-name descname"><span class="pre">expand</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pcoll</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQuery.expand"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.expand" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">ReadFromBigQueryRequest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">query</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_standard_sql</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">table</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TableReference</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">flatten_results</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQueryRequest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.13)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p> |
| <p>Class that defines data to read from BQ.</p> |
| <p>Only one of query or table should be specified.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>query</strong> – SQL query to fetch data.</p></li> |
| <li><p><strong>use_standard_sql</strong> – Specifies whether to use BigQuery’s standard SQL dialect for this query. |
| The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>. If set to <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a>, |
| the query will use BigQuery’s legacy SQL dialect. |
| This parameter is ignored for table inputs.</p></li> |
| <li><p><strong>table</strong> – The ID of the table to read. Table should define project and dataset |
| (ex.: <code class="docutils literal notranslate"><span class="pre">'PROJECT:DATASET.TABLE'</span></code>).</p></li> |
| <li><p><strong>flatten_results</strong> – Flattens all nested and repeated fields in the query results. |
| The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a>.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest.validate"> |
| <span class="sig-name descname"><span class="pre">validate</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQueryRequest.validate"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest.validate" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadAllFromBigQuery"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">apache_beam.io.gcp.bigquery.</span></span><span class="sig-name descname"><span class="pre">ReadAllFromBigQuery</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">gcs_location</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><span class="pre">ValueProvider</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">validate</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kms_key</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">temp_dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">DatasetReference</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bigquery_job_labels</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Dict" title="(in Python v3.13)"><span class="pre">Dict</span></a><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">query_priority</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'BATCH'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadAllFromBigQuery"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadAllFromBigQuery" title="Link to this definition"></a></dt> |
| <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">PTransform</span></code></a></p> |
| <p>Read data from BigQuery.</p> |
| <blockquote> |
| <div><p>PTransform:ReadFromBigQueryRequest->Rows</p> |
| <p>This PTransform uses a BigQuery export job to take a snapshot of the table |
| on GCS, and then reads from each produced file. Data is exported into |
| a new subdirectory for each export using UUIDs generated in |
| <cite>ReadFromBigQueryRequest</cite> objects.</p> |
| <p>It is recommended not to use this PTransform for streaming jobs on |
| GlobalWindow, since it will not be able to cleanup snapshots.</p> |
| </div></blockquote> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><ul class="simple"> |
| <li><p><strong>gcs_location</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – The name of the Google Cloud Storage |
| bucket where the extracted table should be written as a string. If |
| <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a>, then the temp_location parameter is used.</p></li> |
| <li><p><strong>validate</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – If <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, various checks will be done when source |
| gets initialized (e.g., is table present?). Set this to <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.13)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a> |
| if the BigQuery export method is slow due to checking file existence.</p></li> |
| <li><p><strong>kms_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Experimental. Optional Cloud KMS key name for use when |
| creating new temporary tables.</p></li> |
| </ul> |
| </dd> |
| </dl> |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadAllFromBigQuery.COUNTER"> |
| <span class="sig-name descname"><span class="pre">COUNTER</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadAllFromBigQuery.COUNTER" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="apache_beam.io.gcp.bigquery.ReadAllFromBigQuery.expand"> |
| <span class="sig-name descname"><span class="pre">expand</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pcoll</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadAllFromBigQuery.expand"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadAllFromBigQuery.expand" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| </section> |
| |
| |
| </div> |
| </div> |
| <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer"> |
| <a href="apache_beam.io.gcp.big_query_query_to_table_pipeline.html" class="btn btn-neutral float-left" title="apache_beam.io.gcp.big_query_query_to_table_pipeline module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a> |
| <a href="apache_beam.io.gcp.bigquery_avro_tools.html" class="btn btn-neutral float-right" title="apache_beam.io.gcp.bigquery_avro_tools module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a> |
| </div> |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p>© Copyright %Y, Apache Beam.</p> |
| </div> |
| |
| Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a |
| <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a> |
| provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| |
| </footer> |
| </div> |
| </div> |
| </section> |
| </div> |
| <script> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| </body> |
| </html> |