pydoc/2.36.0/apache_beam.io.gcp.bigquery.html - beam-site - Git at Google



 <!DOCTYPE html>
 <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
 <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">

   <meta name="viewport" content="width=device-width, initial-scale=1.0">

   <title>apache_beam.io.gcp.bigquery module &mdash; Apache Beam 2.36.0 documentation</title>


   <script type="text/javascript" src="_static/js/modernizr.min.js"></script>


       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
         <script type="text/javascript" src="_static/jquery.js"></script>
         <script type="text/javascript" src="_static/underscore.js"></script>
         <script type="text/javascript" src="_static/doctools.js"></script>
         <script type="text/javascript" src="_static/language_data.js"></script>
         <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>

     <script type="text/javascript" src="_static/js/theme.js"></script>


   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
     <link rel="index" title="Index" href="genindex.html" />
     <link rel="search" title="Search" href="search.html" />
     <link rel="next" title="apache_beam.io.gcp.bigquery_avro_tools module" href="apache_beam.io.gcp.bigquery_avro_tools.html" />
     <link rel="prev" title="apache_beam.io.gcp.big_query_query_to_table_pipeline module" href="apache_beam.io.gcp.big_query_query_to_table_pipeline.html" />
 </head>

 <body class="wy-body-for-nav">


   <div class="wy-grid-for-nav">

     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >


             <a href="index.html" class="icon icon-home"> Apache Beam


           </a>


               <div class="version">
                 2.36.0
               </div>


 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
     <input type="text" name="q" placeholder="Search docs" />
     <input type="hidden" name="check_keywords" value="yes" />
     <input type="hidden" name="area" value="default" />
   </form>
 </div>


         </div>

         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">


               <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a><ul class="current">
 <li class="toctree-l2 current"><a class="reference internal" href="apache_beam.io.html#subpackages">Subpackages</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.aws.html">apache_beam.io.aws package</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.azure.html">apache_beam.io.azure package</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.external.html">apache_beam.io.external package</a></li>
 <li class="toctree-l3"><a class="reference internal" href="apache_beam.io.flink.html">apache_beam.io.flink package</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="apache_beam.io.gcp.html#subpackages">Subpackages</a></li>
 <li class="toctree-l4 current"><a class="reference internal" href="apache_beam.io.gcp.html#submodules">Submodules</a></li>
 </ul>
 </li>
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="apache_beam.io.html#submodules">Submodules</a></li>
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
 </ul>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
 <li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
 </ul>


         </div>
       </div>
     </nav>

     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">


       <nav class="wy-nav-top" aria-label="top navigation">

           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
           <a href="index.html">Apache Beam</a>

       </nav>


       <div class="wy-nav-content">

         <div class="rst-content">


 <div role="navigation" aria-label="breadcrumbs navigation">

   <ul class="wy-breadcrumbs">

       <li><a href="index.html">Docs</a> &raquo;</li>

           <li><a href="apache_beam.io.html">apache_beam.io package</a> &raquo;</li>

           <li><a href="apache_beam.io.gcp.html">apache_beam.io.gcp package</a> &raquo;</li>

       <li>apache_beam.io.gcp.bigquery module</li>


       <li class="wy-breadcrumbs-aside">


             <a href="_sources/apache_beam.io.gcp.bigquery.rst.txt" rel="nofollow"> View page source</a>


       </li>

   </ul>


   <hr/>
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">

   <div class="section" id="module-apache_beam.io.gcp.bigquery">
 <span id="apache-beam-io-gcp-bigquery-module"></span><h1>apache_beam.io.gcp.bigquery module<a class="headerlink" href="#module-apache_beam.io.gcp.bigquery" title="Permalink to this headline">¶</a></h1>
 <p>BigQuery sources and sinks.</p>
 <p>This module implements reading from and writing to BigQuery tables. It relies
 on several classes exposed by the BigQuery API: TableSchema, TableFieldSchema,
 TableRow, and TableCell. The default mode is to return table rows read from a
 BigQuery source as dictionaries. Similarly a Write transform to a BigQuerySink
 accepts PCollections of dictionaries. This is done for more convenient
 programming.  If desired, the native TableRow objects can be used throughout to
 represent rows (use an instance of TableRowJsonCoder as a coder argument when
 creating the sources or sinks respectively).</p>
 <p>Also, for programming convenience, instances of TableReference and TableSchema
 have a string representation that can be used for the corresponding arguments:</p>
 <blockquote>
 <div><ul class="simple">
 <li>TableReference can be a PROJECT:DATASET.TABLE or DATASET.TABLE string.</li>
 <li>TableSchema can be a NAME:TYPE{,NAME:TYPE}* string
 (e.g. ‘month:STRING,event_count:INTEGER’).</li>
 </ul>
 </div></blockquote>
 <p>The syntax supported is described here:
 <a class="reference external" href="https://cloud.google.com/bigquery/bq-command-line-tool-quickstart">https://cloud.google.com/bigquery/bq-command-line-tool-quickstart</a></p>
 <p>BigQuery sources can be used as main inputs or side inputs. A main input
 (common case) is expected to be massive and will be split into manageable chunks
 and processed in parallel. Side inputs are expected to be small and will be read
 completely every time a ParDo DoFn gets executed. In the example below the
 lambda function implementing the DoFn for the Map transform will get on each
 call <em>one</em> row of the main table and <em>all</em> rows of the side table. The runner
 may use some caching techniques to share the side inputs between calls in order
 to avoid excessive reading::</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">main_table</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="s1">&#39;VeryBig&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">ReadFromBigQuery</span><span class="p">(</span><span class="o">...</span><span class="p">)</span>
 <span class="n">side_table</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="s1">&#39;NotBig&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">ReadFromBigQuery</span><span class="p">(</span><span class="o">...</span><span class="p">)</span>
 <span class="n">results</span> <span class="o">=</span> <span class="p">(</span>
     <span class="n">main_table</span>
     <span class="o">|</span> <span class="s1">&#39;ProcessData&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span>
         <span class="k">lambda</span> <span class="n">element</span><span class="p">,</span> <span class="n">side_input</span><span class="p">:</span> <span class="o">...</span><span class="p">,</span> <span class="n">AsList</span><span class="p">(</span><span class="n">side_table</span><span class="p">)))</span>
 </pre></div>
 </div>
 <p>There is no difference in how main and side inputs are read. What makes the
 side_table a ‘side input’ is the AsList wrapper used when passing the table
 as a parameter to the Map transform. AsList signals to the execution framework
 that its input should be made available whole.</p>
 <p>The main and side inputs are implemented differently. Reading a BigQuery table
 as main input entails exporting the table to a set of GCS files (in AVRO or in
 JSON format) and then processing those files.</p>
 <p>Users may provide a query to read from rather than reading all of a BigQuery
 table. If specified, the result obtained by executing the specified query will
 be used as the data of the input transform.:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">query_results</span> <span class="o">=</span> <span class="n">pipeline</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">ReadFromBigQuery</span><span class="p">(</span>
     <span class="n">query</span><span class="o">=</span><span class="s1">&#39;SELECT year, mean_temp FROM samples.weather_stations&#39;</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>When creating a BigQuery input transform, users should provide either a query
 or a table. Pipeline construction will fail with a validation error if neither
 or both are specified.</p>
 <p>When reading via <cite>ReadFromBigQuery</cite>, bytes are returned decoded as bytes.
 This is due to the fact that ReadFromBigQuery uses Avro exports by default.
 When reading from BigQuery using <cite>apache_beam.io.BigQuerySource</cite>, bytes are
 returned as base64-encoded bytes. To get base64-encoded bytes using
 <cite>ReadFromBigQuery</cite>, you can use the flag <cite>use_json_exports</cite> to export
 data as JSON, and receive base64-encoded bytes.</p>
 <div class="section" id="readallfrombigquery">
 <h2>ReadAllFromBigQuery<a class="headerlink" href="#readallfrombigquery" title="Permalink to this headline">¶</a></h2>
 <p>Beam 2.27.0 introduces a new transform called <cite>ReadAllFromBigQuery</cite> which
 allows you to define table and query reads from BigQuery at pipeline
 runtime.::</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">read_requests</span> <span class="o">=</span> <span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span>
     <span class="n">ReadFromBigQueryRequest</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="s1">&#39;SELECT * FROM mydataset.mytable&#39;</span><span class="p">),</span>
     <span class="n">ReadFromBigQueryRequest</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s1">&#39;myproject.mydataset.mytable&#39;</span><span class="p">)])</span>
 <span class="n">results</span> <span class="o">=</span> <span class="n">read_requests</span> <span class="o">|</span> <span class="n">ReadAllFromBigQuery</span><span class="p">()</span>
 </pre></div>
 </div>
 <p>A good application for this transform is in streaming pipelines to
 refresh a side input coming from BigQuery. This would work like so::</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">side_input</span> <span class="o">=</span> <span class="p">(</span>
     <span class="n">p</span>
     <span class="o">|</span> <span class="s1">&#39;PeriodicImpulse&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">PeriodicImpulse</span><span class="p">(</span>
         <span class="n">first_timestamp</span><span class="p">,</span> <span class="n">last_timestamp</span><span class="p">,</span> <span class="n">interval</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
     <span class="o">|</span> <span class="s1">&#39;MapToReadRequest&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span>
         <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">ReadFromBigQueryRequest</span><span class="p">(</span><span class="n">table</span><span class="o">=</span><span class="s1">&#39;dataset.table&#39;</span><span class="p">))</span>
     <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">ReadAllFromBigQuery</span><span class="p">())</span>
 <span class="n">main_input</span> <span class="o">=</span> <span class="p">(</span>
     <span class="n">p</span>
     <span class="o">|</span> <span class="s1">&#39;MpImpulse&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">(</span><span class="n">sample_main_input_elements</span><span class="p">)</span>
     <span class="o">|</span>
     <span class="s1">&#39;MapMpToTimestamped&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">Map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">src</span><span class="p">:</span> <span class="n">TimestampedValue</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">src</span><span class="p">))</span>
     <span class="o">|</span> <span class="s1">&#39;WindowMpInto&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">WindowInto</span><span class="p">(</span>
         <span class="n">window</span><span class="o">.</span><span class="n">FixedWindows</span><span class="p">(</span><span class="n">main_input_windowing_interval</span><span class="p">)))</span>
 <span class="n">result</span> <span class="o">=</span> <span class="p">(</span>
     <span class="n">main_input</span>
     <span class="o">|</span> <span class="s1">&#39;ApplyCrossJoin&#39;</span> <span class="o">&gt;&gt;</span> <span class="n">beam</span><span class="o">.</span><span class="n">FlatMap</span><span class="p">(</span>
         <span class="n">cross_join</span><span class="p">,</span> <span class="n">rights</span><span class="o">=</span><span class="n">beam</span><span class="o">.</span><span class="n">pvalue</span><span class="o">.</span><span class="n">AsIter</span><span class="p">(</span><span class="n">side_input</span><span class="p">)))</span>
 </pre></div>
 </div>
 <p><strong>Note</strong>: This transform is supported on Portable and Dataflow v2 runners.</p>
 <p><strong>Note</strong>: This transform does not currently clean up temporary datasets
 created for its execution. (BEAM-11359)</p>
 <div class="section" id="writing-data-to-bigquery">
 <h3>Writing Data to BigQuery<a class="headerlink" href="#writing-data-to-bigquery" title="Permalink to this headline">¶</a></h3>
 <p>The <cite>WriteToBigQuery</cite> transform is the recommended way of writing data to
 BigQuery. It supports a large set of parameters to customize how you’d like to
 write to BigQuery.</p>
 </div>
 </div>
 <div class="section" id="table-references">
 <h2>Table References<a class="headerlink" href="#table-references" title="Permalink to this headline">¶</a></h2>
 <p>This transform allows you to provide static <cite>project</cite>, <cite>dataset</cite> and <cite>table</cite>
 parameters which point to a specific BigQuery table to be created. The <cite>table</cite>
 parameter can also be a dynamic parameter (i.e. a callable), which receives an
 element to be written to BigQuery, and returns the table that that element
 should be sent to.</p>
 <p>You may also provide a tuple of PCollectionView elements to be passed as side
 inputs to your callable. For example, suppose that one wishes to send
 events of different types to different tables, and the table names are
 computed at pipeline runtime, one may do something like the following:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span>
   <span class="n">elements</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span>
     <span class="p">{</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;error&#39;</span><span class="p">,</span> <span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="s1">&#39;12:34:56&#39;</span><span class="p">,</span> <span class="s1">&#39;message&#39;</span><span class="p">:</span> <span class="s1">&#39;bad&#39;</span><span class="p">},</span>
     <span class="p">{</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;user_log&#39;</span><span class="p">,</span> <span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="s1">&#39;12:34:59&#39;</span><span class="p">,</span> <span class="s1">&#39;query&#39;</span><span class="p">:</span> <span class="s1">&#39;flu symptom&#39;</span><span class="p">},</span>
   <span class="p">]))</span>

   <span class="n">table_names</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span>
     <span class="p">(</span><span class="s1">&#39;error&#39;</span><span class="p">,</span> <span class="s1">&#39;my_project:dataset1.error_table_for_today&#39;</span><span class="p">),</span>
     <span class="p">(</span><span class="s1">&#39;user_log&#39;</span><span class="p">,</span> <span class="s1">&#39;my_project:dataset1.query_table_for_today&#39;</span><span class="p">),</span>
   <span class="p">])</span>

   <span class="n">table_names_dict</span> <span class="o">=</span> <span class="n">beam</span><span class="o">.</span><span class="n">pvalue</span><span class="o">.</span><span class="n">AsDict</span><span class="p">(</span><span class="n">table_names</span><span class="p">)</span>

   <span class="n">elements</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">WriteToBigQuery</span><span class="p">(</span>
     <span class="n">table</span><span class="o">=</span><span class="k">lambda</span> <span class="n">row</span><span class="p">,</span> <span class="n">table_dict</span><span class="p">:</span> <span class="n">table_dict</span><span class="p">[</span><span class="n">row</span><span class="p">[</span><span class="s1">&#39;type&#39;</span><span class="p">]],</span>
     <span class="n">table_side_inputs</span><span class="o">=</span><span class="p">(</span><span class="n">table_names_dict</span><span class="p">,))</span>
 </pre></div>
 </div>
 <p>In the example above, the <cite>table_dict</cite> argument passed to the function in
 <cite>table_dict</cite> is the side input coming from <cite>table_names_dict</cite>, which is passed
 as part of the <cite>table_side_inputs</cite> argument.</p>
 </div>
 <div class="section" id="schemas">
 <h2>Schemas<a class="headerlink" href="#schemas" title="Permalink to this headline">¶</a></h2>
 <p>This transform also allows you to provide a static or dynamic <cite>schema</cite>
 parameter (i.e. a callable).</p>
 <p>If providing a callable, this should take in a table reference (as returned by
 the <cite>table</cite> parameter), and return the corresponding schema for that table.
 This allows to provide different schemas for different tables:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">compute_table_name</span><span class="p">(</span><span class="n">row</span><span class="p">):</span>
   <span class="o">...</span>

 <span class="n">errors_schema</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="p">[</span>
   <span class="p">{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s1">&#39;type&#39;</span><span class="p">,</span> <span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;STRING&#39;</span><span class="p">,</span> <span class="s1">&#39;mode&#39;</span><span class="p">:</span> <span class="s1">&#39;NULLABLE&#39;</span><span class="p">},</span>
   <span class="p">{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s1">&#39;message&#39;</span><span class="p">,</span> <span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;STRING&#39;</span><span class="p">,</span> <span class="s1">&#39;mode&#39;</span><span class="p">:</span> <span class="s1">&#39;NULLABLE&#39;</span><span class="p">}]}</span>
 <span class="n">queries_schema</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="p">[</span>
   <span class="p">{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s1">&#39;type&#39;</span><span class="p">,</span> <span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;STRING&#39;</span><span class="p">,</span> <span class="s1">&#39;mode&#39;</span><span class="p">:</span> <span class="s1">&#39;NULLABLE&#39;</span><span class="p">},</span>
   <span class="p">{</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s1">&#39;query&#39;</span><span class="p">,</span> <span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;STRING&#39;</span><span class="p">,</span> <span class="s1">&#39;mode&#39;</span><span class="p">:</span> <span class="s1">&#39;NULLABLE&#39;</span><span class="p">}]}</span>

 <span class="k">with</span> <span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span>
   <span class="n">elements</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span>
     <span class="p">{</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;error&#39;</span><span class="p">,</span> <span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="s1">&#39;12:34:56&#39;</span><span class="p">,</span> <span class="s1">&#39;message&#39;</span><span class="p">:</span> <span class="s1">&#39;bad&#39;</span><span class="p">},</span>
     <span class="p">{</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;user_log&#39;</span><span class="p">,</span> <span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="s1">&#39;12:34:59&#39;</span><span class="p">,</span> <span class="s1">&#39;query&#39;</span><span class="p">:</span> <span class="s1">&#39;flu symptom&#39;</span><span class="p">},</span>
   <span class="p">]))</span>

   <span class="n">elements</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">WriteToBigQuery</span><span class="p">(</span>
     <span class="n">table</span><span class="o">=</span><span class="n">compute_table_name</span><span class="p">,</span>
     <span class="n">schema</span><span class="o">=</span><span class="k">lambda</span> <span class="n">table</span><span class="p">:</span> <span class="p">(</span><span class="n">errors_schema</span>
                           <span class="k">if</span> <span class="s1">&#39;errors&#39;</span> <span class="ow">in</span> <span class="n">table</span>
                           <span class="k">else</span> <span class="n">queries_schema</span><span class="p">))</span>
 </pre></div>
 </div>
 <p>It may be the case that schemas are computed at pipeline runtime. In cases
 like these, one can also provide a <cite>schema_side_inputs</cite> parameter, which is
 a tuple of PCollectionViews to be passed to the schema callable (much like
 the <cite>table_side_inputs</cite> parameter).</p>
 </div>
 <div class="section" id="additional-parameters-for-bigquery-tables">
 <h2>Additional Parameters for BigQuery Tables<a class="headerlink" href="#additional-parameters-for-bigquery-tables" title="Permalink to this headline">¶</a></h2>
 <p>This sink is able to create tables in BigQuery if they don’t already exist. It
 also relies on creating temporary tables when performing file loads.</p>
 <p>The WriteToBigQuery transform creates tables using the BigQuery API by
 inserting a load job (see the API reference [1]), or by inserting a new table
 (see the API reference for that [2][3]).</p>
 <p>When creating a new BigQuery table, there are a number of extra parameters
 that one may need to specify. For example, clustering, partitioning, data
 encoding, etc. It is possible to provide these additional parameters by
 passing a Python dictionary as <cite>additional_bq_parameters</cite> to the transform.
 As an example, to create a table that has specific partitioning, and
 clustering properties, one would do the following:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">additional_bq_parameters</span> <span class="o">=</span> <span class="p">{</span>
   <span class="s1">&#39;timePartitioning&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;DAY&#39;</span><span class="p">},</span>
   <span class="s1">&#39;clustering&#39;</span><span class="p">:</span> <span class="p">{</span><span class="s1">&#39;fields&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;country&#39;</span><span class="p">]}}</span>
 <span class="k">with</span> <span class="n">Pipeline</span><span class="p">()</span> <span class="k">as</span> <span class="n">p</span><span class="p">:</span>
   <span class="n">elements</span> <span class="o">=</span> <span class="p">(</span><span class="n">p</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">Create</span><span class="p">([</span>
     <span class="p">{</span><span class="s1">&#39;country&#39;</span><span class="p">:</span> <span class="s1">&#39;mexico&#39;</span><span class="p">,</span> <span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="s1">&#39;12:34:56&#39;</span><span class="p">,</span> <span class="s1">&#39;query&#39;</span><span class="p">:</span> <span class="s1">&#39;acapulco&#39;</span><span class="p">},</span>
     <span class="p">{</span><span class="s1">&#39;country&#39;</span><span class="p">:</span> <span class="s1">&#39;canada&#39;</span><span class="p">,</span> <span class="s1">&#39;timestamp&#39;</span><span class="p">:</span> <span class="s1">&#39;12:34:59&#39;</span><span class="p">,</span> <span class="s1">&#39;query&#39;</span><span class="p">:</span> <span class="s1">&#39;influenza&#39;</span><span class="p">},</span>
   <span class="p">]))</span>

   <span class="n">elements</span> <span class="o">|</span> <span class="n">beam</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">gcp</span><span class="o">.</span><span class="n">bigquery</span><span class="o">.</span><span class="n">WriteToBigQuery</span><span class="p">(</span>
     <span class="n">table</span><span class="o">=</span><span class="s1">&#39;project_name1:dataset_2.query_events_table&#39;</span><span class="p">,</span>
     <span class="n">additional_bq_parameters</span><span class="o">=</span><span class="n">additional_bq_parameters</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>Much like the schema case, the parameter with <cite>additional_bq_parameters</cite> can
 also take a callable that receives a table reference.</p>
 <p>[1] <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job">https://cloud.google.com/bigquery/docs/reference/rest/v2/Job</a>        #jobconfigurationload
 [2] <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert</a>
 [3] <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource">https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource</a></p>
 <p><strong>* Short introduction to BigQuery concepts *</strong>
 Tables have rows (TableRow) and each row has cells (TableCell).
 A table has a schema (TableSchema), which in turn describes the schema of each
 cell (TableFieldSchema). The terms field and cell are used interchangeably.</p>
 <dl class="docutils">
 <dt>TableSchema: Describes the schema (types and order) for values in each row.</dt>
 <dd>Has one attribute, ‘field’, which is list of TableFieldSchema objects.</dd>
 <dt>TableFieldSchema: Describes the schema (type, name) for one field.</dt>
 <dd>Has several attributes, including ‘name’ and ‘type’. Common values for
 the type attribute are: ‘STRING’, ‘INTEGER’, ‘FLOAT’, ‘BOOLEAN’, ‘NUMERIC’,
 ‘GEOGRAPHY’.
 All possible values are described at:
 <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types">https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types</a></dd>
 <dt>TableRow: Holds all values in a table row. Has one attribute, ‘f’, which is a</dt>
 <dd>list of TableCell instances.</dd>
 <dt>TableCell: Holds the value for one cell (or field).  Has one attribute,</dt>
 <dd>‘v’, which is a JsonValue instance. This class is defined in
 apitools.base.py.extra_types.py module.</dd>
 </dl>
 <p>As of Beam 2.7.0, the NUMERIC data type is supported. This data type supports
 high-precision decimal numbers (precision of 38 digits, scale of 9 digits).
 The GEOGRAPHY data type works with Well-Known Text (See
 <a class="reference external" href="https://en.wikipedia.org/wiki/Well-known_text">https://en.wikipedia.org/wiki/Well-known_text</a>) format for reading and writing
 to BigQuery.
 BigQuery IO requires values of BYTES datatype to be encoded using base64
 encoding when writing to BigQuery.</p>
 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.TableRowJsonCoder">
 <em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">TableRowJsonCoder</code><span class="sig-paren">(</span><em>table_schema=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#TableRowJsonCoder"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.TableRowJsonCoder" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference internal" href="apache_beam.coders.coders.html#apache_beam.coders.coders.Coder" title="apache_beam.coders.coders.Coder"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.coders.coders.Coder</span></code></a></p>
 <p>A coder for a TableRow instance to/from a JSON string.</p>
 <p>Note that the encoding operation (used when writing to sinks) requires the
 table schema in order to obtain the ordered list of field names. Reading from
 sources on the other hand does not need the table schema.</p>
 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.TableRowJsonCoder.encode">
 <code class="descname">encode</code><span class="sig-paren">(</span><em>table_row</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#TableRowJsonCoder.encode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.TableRowJsonCoder.encode" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.TableRowJsonCoder.decode">
 <code class="descname">decode</code><span class="sig-paren">(</span><em>encoded_table_row</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#TableRowJsonCoder.decode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.TableRowJsonCoder.decode" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition">
 <em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">BigQueryDisposition</code><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryDisposition"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 <p>Class holding standard strings used for create and write dispositions.</p>
 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER">
 <code class="descname">CREATE_NEVER</code><em class="property"> = 'CREATE_NEVER'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED">
 <code class="descname">CREATE_IF_NEEDED</code><em class="property"> = 'CREATE_IF_NEEDED'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE">
 <code class="descname">WRITE_TRUNCATE</code><em class="property"> = 'WRITE_TRUNCATE'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND">
 <code class="descname">WRITE_APPEND</code><em class="property"> = 'WRITE_APPEND'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY">
 <code class="descname">WRITE_EMPTY</code><em class="property"> = 'WRITE_EMPTY'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="staticmethod">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_create">
 <em class="property">static </em><code class="descname">validate_create</code><span class="sig-paren">(</span><em>disposition</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryDisposition.validate_create"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_create" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="staticmethod">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_write">
 <em class="property">static </em><code class="descname">validate_write</code><span class="sig-paren">(</span><em>disposition</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryDisposition.validate_write"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.validate_write" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryQueryPriority">
 <em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">BigQueryQueryPriority</code><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQueryQueryPriority"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 <p>Class holding standard strings used for query priority.</p>
 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE">
 <code class="descname">INTERACTIVE</code><em class="property"> = 'INTERACTIVE'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.BigQueryQueryPriority.BATCH">
 <code class="descname">BATCH</code><em class="property"> = 'BATCH'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority.BATCH" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.io.gcp.bigquery.BigQuerySource">
 <code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">BigQuerySource</code><span class="sig-paren">(</span><em>table=None</em>, <em>dataset=None</em>, <em>project=None</em>, <em>query=None</em>, <em>validate=False</em>, <em>coder=None</em>, <em>use_standard_sql=False</em>, <em>flatten_results=True</em>, <em>kms_key=None</em>, <em>use_dataflow_native_source=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQuerySource"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQuerySource" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="function">
 <dt id="apache_beam.io.gcp.bigquery.BigQuerySink">
 <code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">BigQuerySink</code><span class="sig-paren">(</span><em>table</em>, <em>dataset=None</em>, <em>project=None</em>, <em>schema=None</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>write_disposition='WRITE_EMPTY'</em>, <em>validate=False</em>, <em>coder=None</em>, <em>kms_key=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#BigQuerySink"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.BigQuerySink" title="Permalink to this definition">¶</a></dt>
 <dd><p>A sink based on a BigQuery table.</p>
 <p>This BigQuery sink triggers a Dataflow native sink for BigQuery
 that only supports batch pipelines.
 Instead of using this sink directly, please use WriteToBigQuery
 transform that works for both batch and streaming pipelines.</p>
 </dd></dl>

 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery">
 <em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">WriteToBigQuery</code><span class="sig-paren">(</span><em>table</em>, <em>dataset=None</em>, <em>project=None</em>, <em>schema=None</em>, <em>create_disposition='CREATE_IF_NEEDED'</em>, <em>write_disposition='WRITE_APPEND'</em>, <em>kms_key=None</em>, <em>batch_size=None</em>, <em>max_file_size=None</em>, <em>max_files_per_bundle=None</em>, <em>test_client=None</em>, <em>custom_gcs_temp_location=None</em>, <em>method=None</em>, <em>insert_retry_strategy=None</em>, <em>additional_bq_parameters=None</em>, <em>table_side_inputs=None</em>, <em>schema_side_inputs=None</em>, <em>triggering_frequency=None</em>, <em>validate=True</em>, <em>temp_file_format=None</em>, <em>ignore_insert_ids=False</em>, <em>with_auto_sharding=False</em>, <em>ignore_unknown_columns=False</em>, <em>load_job_project_id=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
 <p>Write data to BigQuery.</p>
 <p>This transform receives a PCollection of elements to be inserted into BigQuery
 tables. The elements would come in as Python dictionaries, or as <cite>TableRow</cite>
 instances.</p>
 <p>Initialize a WriteToBigQuery transform.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>callable</em><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – The ID of the table, or a callable
 that returns it. The ID must contain only letters <code class="docutils literal notranslate"><span class="pre">a-z</span></code>, <code class="docutils literal notranslate"><span class="pre">A-Z</span></code>,
 numbers <code class="docutils literal notranslate"><span class="pre">0-9</span></code>, or connectors <code class="docutils literal notranslate"><span class="pre">-_</span></code>. If dataset argument is
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> then the table argument must contain the entire table
 reference specified as: <code class="docutils literal notranslate"><span class="pre">'DATASET.TABLE'</span></code>
 or <code class="docutils literal notranslate"><span class="pre">'PROJECT:DATASET.TABLE'</span></code>. If it’s a callable, it must receive one
 argument representing an element to be written to BigQuery, and return
 a TableReference, or a string table name as specified above.</li>
 <li><strong>dataset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The ID of the dataset containing this table or
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> if the table reference is specified entirely by the table
 argument.</li>
 <li><strong>project</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The ID of the project containing this table or
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> if the table reference is specified entirely by the table
 argument.</li>
 <li><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>,</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>,</em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a><em>,</em><em>callable</em>) – The schema to be used if the
 BigQuery table to write has to be created. This can be either specified
 as a <code class="xref py py-class docutils literal notranslate"><span class="pre">TableSchema</span></code>. or a <cite>ValueProvider</cite> that has a JSON string,
 or a python dictionary, or the string or dictionary itself,
 object or a single string  of the form
 <code class="docutils literal notranslate"><span class="pre">'field1:type1,field2:type2,field3:type3'</span></code> that defines a comma
 separated list of fields. Here <code class="docutils literal notranslate"><span class="pre">'type'</span></code> should specify the BigQuery
 type of the field. Single string based schemas do not support nested
 fields, repeated fields, or specifying a BigQuery mode for fields
 (mode will always be set to <code class="docutils literal notranslate"><span class="pre">'NULLABLE'</span></code>).
 If a callable, then it should receive a destination (in the form of
 a str, and return a str, dict or TableSchema).
 One may also pass <code class="docutils literal notranslate"><span class="pre">SCHEMA_AUTODETECT</span></code> here when using JSON-based
 file loads, and BigQuery will try to infer the schema for the files
 that are being loaded.</li>
 <li><strong>create_disposition</strong> (<a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition" title="apache_beam.io.gcp.bigquery.BigQueryDisposition"><em>BigQueryDisposition</em></a>) – <p>A string describing what
 happens if the table does not exist. Possible values are:</p>
 <ul>
 <li><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_IF_NEEDED"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.CREATE_IF_NEEDED</span></code></a>: create if does not
 exist.</li>
 <li><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.CREATE_NEVER"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.CREATE_NEVER</span></code></a>: fail the write if does not
 exist.</li>
 </ul>
 </li>
 <li><strong>write_disposition</strong> (<a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition" title="apache_beam.io.gcp.bigquery.BigQueryDisposition"><em>BigQueryDisposition</em></a>) – <p>A string describing what happens
 if the table has already some data. Possible values are:</p>
 <ul>
 <li><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_TRUNCATE"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.WRITE_TRUNCATE</span></code></a>: delete existing rows.</li>
 <li><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_APPEND"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.WRITE_APPEND</span></code></a>: add to existing rows.</li>
 <li><a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY" title="apache_beam.io.gcp.bigquery.BigQueryDisposition.WRITE_EMPTY"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryDisposition.WRITE_EMPTY</span></code></a>: fail the write if table not
 empty.</li>
 </ul>
 <p>For streaming pipelines WriteTruncate can not be used.</p>
 </li>
 <li><strong>kms_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Optional Cloud KMS key name for use when creating new
 tables.</li>
 <li><strong>batch_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Number of rows to be written to BQ per streaming API
 insert. The default is 500.</li>
 <li><strong>test_client</strong> – Override the default bigquery client used for testing.</li>
 <li><strong>max_file_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – The maximum size for a file to be written and then
 loaded into BigQuery. The default value is 4TB, which is 80% of the
 limit of 5TB for BigQuery to load any file.</li>
 <li><strong>max_files_per_bundle</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – The maximum number of files to be concurrently
 written by a worker. The default here is 20. Larger values will allow
 writing to multiple destinations without having to reshard - but they
 increase the memory burden on the workers.</li>
 <li><strong>custom_gcs_temp_location</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – A GCS location to store files to be used
 for file loads into BigQuery. By default, this will use the pipeline’s
 temp_location, but for pipelines whose temp_location is not appropriate
 for BQ File Loads, users should pass a specific one.</li>
 <li><strong>method</strong> – The method to use to write to BigQuery. It may be
 STREAMING_INSERTS, FILE_LOADS, or DEFAULT. An introduction on loading
 data to BigQuery: <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data">https://cloud.google.com/bigquery/docs/loading-data</a>.
 DEFAULT will use STREAMING_INSERTS on Streaming pipelines and
 FILE_LOADS on Batch pipelines.</li>
 <li><strong>insert_retry_strategy</strong> – <p>The strategy to use when retrying streaming inserts
 into BigQuery. Options are shown in bigquery_tools.RetryStrategy attrs.
 Default is to retry always. This means that whenever there are rows
 that fail to be inserted to BigQuery, they will be retried indefinitely.
 Other retry strategy settings will produce a deadletter PCollection
 as output. Appropriate values are:</p>
 <ul>
 <li><cite>RetryStrategy.RETRY_ALWAYS</cite>: retry all rows if
 there are any kind of errors. Note that this will hold your pipeline
 back if there are errors until you cancel or update it.</li>
 <li><cite>RetryStrategy.RETRY_NEVER</cite>: rows with errors
 will not be retried. Instead they will be output to a dead letter
 queue under the <cite>‘FailedRows’</cite> tag.</li>
 <li><cite>RetryStrategy.RETRY_ON_TRANSIENT_ERROR</cite>: retry
 rows with transient errors (e.g. timeouts). Rows with permanent errors
 will be output to dead letter queue under <cite>‘FailedRows’</cite> tag.</li>
 </ul>
 </li>
 <li><strong>additional_bq_parameters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>callable</em>) – Additional parameters to pass
 to BQ when creating / loading data into a table. If a callable, it
 should be a function that receives a table reference indicating
 the destination and returns a dictionary.
 These can be ‘timePartitioning’, ‘clustering’, etc. They are passed
 directly to the job load configuration. See
 <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload">https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload</a></li>
 <li><strong>table_side_inputs</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a>) – A tuple with <code class="docutils literal notranslate"><span class="pre">AsSideInput</span></code> PCollections to be
 passed to the table callable (if one is provided).</li>
 <li><strong>schema_side_inputs</strong> – A tuple with <code class="docutils literal notranslate"><span class="pre">AsSideInput</span></code> PCollections to be
 passed to the schema callable (if one is provided).</li>
 <li><strong>triggering_frequency</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a>) – <p>When method is FILE_LOADS:
 Value will be converted to int. Every triggering_frequency seconds, a
 BigQuery load job will be triggered for all the data written since the
 last load job. BigQuery has limits on how many load jobs can be
 triggered per day, so be careful not to set this duration too low, or
 you may exceed daily quota. Often this is set to 5 or 10 minutes to
 ensure that the project stays well under the BigQuery quota. See
 <a class="reference external" href="https://cloud.google.com/bigquery/quota-policy">https://cloud.google.com/bigquery/quota-policy</a> for more information
 about BigQuery quotas.</p>
 <p>When method is STREAMING_INSERTS and with_auto_sharding=True:
 A streaming inserts batch will be submitted at least every
 triggering_frequency seconds when data is waiting. The batch can be
 sent earlier if it reaches the maximum batch size set by batch_size.
 Default value is 0.2 seconds.</p>
 </li>
 <li><strong>validate</strong> – Indicates whether to perform validation checks on
 inputs. This parameter is primarily used for testing.</li>
 <li><strong>temp_file_format</strong> – The format to use for file loads into BigQuery. The
 options are NEWLINE_DELIMITED_JSON or AVRO, with NEWLINE_DELIMITED_JSON
 being used by default. For advantages and limitations of the two
 formats, see
 <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro">https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro</a>
 and
 <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json">https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json</a>.</li>
 <li><strong>ignore_insert_ids</strong> – When using the STREAMING_INSERTS method to write data
 to BigQuery, <cite>insert_ids</cite> are a feature of BigQuery that support
 deduplication of events. If your use case is not sensitive to
 duplication of data inserted to BigQuery, set <cite>ignore_insert_ids</cite>
 to True to increase the throughput for BQ writing. See:
 <a class="reference external" href="https://cloud.google.com/bigquery/streaming-data-into-bigquery#disabling_best_effort_de-duplication">https://cloud.google.com/bigquery/streaming-data-into-bigquery#disabling_best_effort_de-duplication</a></li>
 <li><strong>with_auto_sharding</strong> – Experimental. If true, enables using a dynamically
 determined number of shards to write to BigQuery. This can be used for
 both FILE_LOADS and STREAMING_INSERTS. Only applicable to unbounded
 input.</li>
 <li><strong>ignore_unknown_columns</strong> – Accept rows that contain values that do not match
 the schema. The unknown values are ignored. Default is False,
 which treats unknown values as errors. This option is only valid for
 method=STREAMING_INSERTS. See reference:
 <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll">https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll</a></li>
 <li><strong>load_job_project_id</strong> – Specifies an alternate GCP project id to use for
 billingBatch File Loads. By default, the project id of the table is
 used.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method">
 <em class="property">class </em><code class="descname">Method</code><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.Method"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.DEFAULT">
 <code class="descname">DEFAULT</code><em class="property"> = 'DEFAULT'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.DEFAULT" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.STREAMING_INSERTS">
 <code class="descname">STREAMING_INSERTS</code><em class="property"> = 'STREAMING_INSERTS'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.STREAMING_INSERTS" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.FILE_LOADS">
 <code class="descname">FILE_LOADS</code><em class="property"> = 'FILE_LOADS'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.Method.FILE_LOADS" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="staticmethod">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.get_table_schema_from_string">
 <em class="property">static </em><code class="descname">get_table_schema_from_string</code><span class="sig-paren">(</span><em>schema</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.get_table_schema_from_string" title="Permalink to this definition">¶</a></dt>
 <dd><p>Transform the string table schema into a
 <code class="xref py py-class docutils literal notranslate"><span class="pre">TableSchema</span></code> instance.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The sting schema to be used if the BigQuery table to write
 has to be created.</td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">The schema to be used if the BigQuery table to write has to be created
 but in the <code class="xref py py-class docutils literal notranslate"><span class="pre">TableSchema</span></code> format.</td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">TableSchema</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>

 <dl class="staticmethod">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.table_schema_to_dict">
 <em class="property">static </em><code class="descname">table_schema_to_dict</code><span class="sig-paren">(</span><em>table_schema</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.table_schema_to_dict" title="Permalink to this definition">¶</a></dt>
 <dd><p>Create a dictionary representation of table schema for serialization</p>
 </dd></dl>

 <dl class="staticmethod">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.get_dict_table_schema">
 <em class="property">static </em><code class="descname">get_dict_table_schema</code><span class="sig-paren">(</span><em>schema</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.get_dict_table_schema" title="Permalink to this definition">¶</a></dt>
 <dd><p>Transform the table schema into a dictionary instance.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>schema</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>TableSchema</em>) – The schema to be used if the BigQuery table to write has to be created.
 This can either be a dict or string or in the TableSchema format.</td>
 </tr>
 <tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">The schema to be used if the BigQuery table to write has
 to be created but in the dictionary format.</td>
 </tr>
 <tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">Dict[<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)">str</a>, Any]</td>
 </tr>
 </tbody>
 </table>
 </dd></dl>

 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.expand">
 <code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.expand" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.display_data">
 <code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.display_data" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.to_runner_api_parameter">
 <code class="descname">to_runner_api_parameter</code><span class="sig-paren">(</span><em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.to_runner_api_parameter"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.to_runner_api_parameter" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.WriteToBigQuery.from_runner_api">
 <code class="descname">from_runner_api</code><span class="sig-paren">(</span><em>payload</em>, <em>context</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#WriteToBigQuery.from_runner_api"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.WriteToBigQuery.from_runner_api" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQuery">
 <em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">ReadFromBigQuery</code><span class="sig-paren">(</span><em>gcs_location=None</em>, <em>method=None</em>, <em>use_native_datetime=False</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQuery"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
 <p>Read data from BigQuery.</p>
 <blockquote>
 <div>This PTransform uses a BigQuery export job to take a snapshot of the table
 on GCS, and then reads from each produced file. File format is Avro by
 default.</div></blockquote>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>method</strong> – The method to use to read from BigQuery. It may be EXPORT or
 DIRECT_READ. EXPORT invokes a BigQuery export request
 (<a class="reference external" href="https://cloud.google.com/bigquery/docs/exporting-data">https://cloud.google.com/bigquery/docs/exporting-data</a>). DIRECT_READ reads
 directly from BigQuery storage using the BigQuery Read API
 (<a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/storage">https://cloud.google.com/bigquery/docs/reference/storage</a>). If
 unspecified, the default is currently EXPORT.</li>
 <li><strong>use_native_datetime</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – By default this transform exports BigQuery
 DATETIME fields as formatted strings (for example:
 2021-01-01T12:59:59). If <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, BigQuery DATETIME fields will
 be returned as native Python datetime objects. This can only be used when
 ‘method’ is ‘DIRECT_READ’.</li>
 <li><strong>table</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>callable</em><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – The ID of the table, or a callable
 that returns it. The ID must contain only letters <code class="docutils literal notranslate"><span class="pre">a-z</span></code>, <code class="docutils literal notranslate"><span class="pre">A-Z</span></code>,
 numbers <code class="docutils literal notranslate"><span class="pre">0-9</span></code>, or underscores <code class="docutils literal notranslate"><span class="pre">_</span></code>. If dataset argument is
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> then the table argument must contain the entire table
 reference specified as: <code class="docutils literal notranslate"><span class="pre">'DATASET.TABLE'</span></code>
 or <code class="docutils literal notranslate"><span class="pre">'PROJECT:DATASET.TABLE'</span></code>. If it’s a callable, it must receive one
 argument representing an element to be written to BigQuery, and return
 a TableReference, or a string table name as specified above.</li>
 <li><strong>dataset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The ID of the dataset containing this table or
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a> if the table reference is specified entirely by the table
 argument.</li>
 <li><strong>project</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The ID of the project containing this table.</li>
 <li><strong>query</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – A query to be used instead of arguments
 table, dataset, and project.</li>
 <li><strong>validate</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – If <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, various checks will be done when source
 gets initialized (e.g., is table present?). This should be
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a> for most scenarios in order to catch errors as early as
 possible (pipeline construction instead of pipeline execution). It
 should be <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a> if the table is created during pipeline
 execution by a previous step.</li>
 <li><strong>coder</strong> (<a class="reference internal" href="apache_beam.coders.coders.html#apache_beam.coders.coders.Coder" title="apache_beam.coders.coders.Coder"><em>Coder</em></a>) – The coder for the table
 rows. If <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a>, then the default coder is
 _JsonToDictCoder, which will interpret every row as a JSON
 serialized dictionary.</li>
 <li><strong>use_standard_sql</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – Specifies whether to use BigQuery’s standard SQL
 dialect for this query. The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a>.
 If set to <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, the query will use BigQuery’s updated SQL
 dialect with improved standards compliance.
 This parameter is ignored for table inputs.</li>
 <li><strong>flatten_results</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – Flattens all nested and repeated fields in the
 query results. The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>.</li>
 <li><strong>kms_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Optional Cloud KMS key name for use when creating new
 temporary tables.</li>
 <li><strong>gcs_location</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><em>ValueProvider</em></a>) – The name of the Google Cloud Storage
 bucket where the extracted table should be written as a string or
 a <a class="reference internal" href="apache_beam.options.value_provider.html#apache_beam.options.value_provider.ValueProvider" title="apache_beam.options.value_provider.ValueProvider"><code class="xref py py-class docutils literal notranslate"><span class="pre">ValueProvider</span></code></a>. If
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a>, then the temp_location parameter is used.</li>
 <li><strong>bigquery_job_labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – A dictionary with string labels to be passed
 to BigQuery export and query jobs created by this transform. See:
 <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/rest/v2/">https://cloud.google.com/bigquery/docs/reference/rest/v2/</a>              Job#JobConfiguration</li>
 <li><strong>use_json_exports</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – By default, this transform works by exporting
 BigQuery data into Avro files, and reading those files. With this
 parameter, the transform will instead export to JSON files. JSON files
 are slower to read due to their larger size.
 When using JSON exports, the BigQuery types for DATE, DATETIME, TIME, and
 TIMESTAMP will be exported as strings. This behavior is consistent with
 BigQuerySource.
 When using Avro exports, these fields will be exported as native Python
 types (datetime.date, datetime.datetime, datetime.datetime,
 and datetime.datetime respectively). Avro exports are recommended.
 To learn more about BigQuery types, and Time-related type
 representations, see: <a class="reference external" href="https://cloud.google.com/bigquery/docs/reference/">https://cloud.google.com/bigquery/docs/reference/</a>              standard-sql/data-types
 To learn more about type conversions between BigQuery and Avro, see:
 <a class="reference external" href="https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro">https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro</a>              #avro_conversions</li>
 <li><strong>temp_dataset</strong> (<code class="docutils literal notranslate"><span class="pre">apache_beam.io.gcp.internal.clients.bigquery.</span>&#160;&#160;&#160;&#160;&#160;&#160;&#160; <span class="pre">DatasetReference</span></code>) – Temporary dataset reference to use when reading from BigQuery using a
 query. When reading using a query, BigQuery source will create a
 temporary dataset and a temporary table to store the results of the
 query. With this option, you can set an existing dataset to create the
 temporary table in. BigQuery source will create a temporary table in
 that dataset, and will remove it once it is not needed. Job needs access
 to create and delete tables within the given dataset. Dataset name
 should <em>not</em> start with the reserved prefix <cite>beam_temp_dataset_</cite>.</li>
 <li><strong>query_priority</strong> (<a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority" title="apache_beam.io.gcp.bigquery.BigQueryQueryPriority"><em>BigQueryQueryPriority</em></a>) – By default, this transform runs
 queries with BATCH priority. Use <a class="reference internal" href="#apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE" title="apache_beam.io.gcp.bigquery.BigQueryQueryPriority.INTERACTIVE"><code class="xref py py-attr docutils literal notranslate"><span class="pre">BigQueryQueryPriority.INTERACTIVE</span></code></a>
 to run queries with INTERACTIVE priority. This option is ignored when
 reading from a table rather than a query. To learn more about query
 priority, see: <a class="reference external" href="https://cloud.google.com/bigquery/docs/running-queries">https://cloud.google.com/bigquery/docs/running-queries</a></li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method">
 <em class="property">class </em><code class="descname">Method</code><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQuery.Method"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.EXPORT">
 <code class="descname">EXPORT</code><em class="property"> = 'EXPORT'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.EXPORT" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.DIRECT_READ">
 <code class="descname">DIRECT_READ</code><em class="property"> = 'DIRECT_READ'</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.Method.DIRECT_READ" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="attribute">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.COUNTER">
 <code class="descname">COUNTER</code><em class="property"> = 0</em><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.COUNTER" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQuery.expand">
 <code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQuery.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQuery.expand" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="class">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest">
 <em class="property">class </em><code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">ReadFromBigQueryRequest</code><span class="sig-paren">(</span><em>query: str = None</em>, <em>use_standard_sql: bool = True</em>, <em>table: Union[str</em>, <em>apache_beam.io.gcp.internal.clients.bigquery.bigquery_v2_messages.TableReference] = None</em>, <em>flatten_results: bool = False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQueryRequest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 <p>Class that defines data to read from BQ.</p>
 <p>Only one of query or table should be specified.</p>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>query</strong> – SQL query to fetch data.</li>
 <li><strong>use_standard_sql</strong> – Specifies whether to use BigQuery’s standard SQL dialect for this query.
 The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>. If set to <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a>,
 the query will use BigQuery’s legacy SQL dialect.
 This parameter is ignored for table inputs.</li>
 <li><strong>table</strong> – The ID of the table to read. The ID must contain only letters
 <code class="docutils literal notranslate"><span class="pre">a-z</span></code>, <code class="docutils literal notranslate"><span class="pre">A-Z</span></code>, numbers <code class="docutils literal notranslate"><span class="pre">0-9</span></code>, or underscores <code class="docutils literal notranslate"><span class="pre">_</span></code>. Table should
 define project and dataset (ex.: <code class="docutils literal notranslate"><span class="pre">'PROJECT:DATASET.TABLE'</span></code>).</li>
 <li><strong>flatten_results</strong> – Flattens all nested and repeated fields in the query results.
 The default value is <a class="reference external" href="https://docs.python.org/3/library/constants.html#False" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">False</span></code></a>.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 <dl class="method">
 <dt id="apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest.validate">
 <code class="descname">validate</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadFromBigQueryRequest.validate"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadFromBigQueryRequest.validate" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>

 </dd></dl>

 <dl class="function">
 <dt id="apache_beam.io.gcp.bigquery.ReadAllFromBigQuery">
 <code class="descclassname">apache_beam.io.gcp.bigquery.</code><code class="descname">ReadAllFromBigQuery</code><span class="sig-paren">(</span><em>gcs_location: Union[str</em>, <em>apache_beam.options.value_provider.ValueProvider] = None</em>, <em>validate: bool = False</em>, <em>kms_key: str = None</em>, <em>temp_dataset: Union[str</em>, <em>apache_beam.io.gcp.internal.clients.bigquery.bigquery_v2_messages.DatasetReference] = None</em>, <em>bigquery_job_labels: Dict[str</em>, <em>str] = None</em>, <em>query_priority: str = 'BATCH'</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/io/gcp/bigquery.html#ReadAllFromBigQuery"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.io.gcp.bigquery.ReadAllFromBigQuery" title="Permalink to this definition">¶</a></dt>
 <dd><p>Read data from BigQuery.</p>
 <blockquote>
 <div><p>PTransform:ReadFromBigQueryRequest-&gt;Rows</p>
 <p>This PTransform uses a BigQuery export job to take a snapshot of the table
 on GCS, and then reads from each produced file. Data is exported into
 a new subdirectory for each export using UUIDs generated in
 <cite>ReadFromBigQueryRequest</cite> objects.</p>
 <p>It is recommended not to use this PTransform for streaming jobs on
 GlobalWindow, since it will not be able to cleanup snapshots.</p>
 </div></blockquote>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
 <tbody valign="top">
 <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
 <li><strong>gcs_location</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The name of the Google Cloud Storage
 bucket where the extracted table should be written as a string. If
 <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">None</span></code></a>, then the temp_location parameter is used.</li>
 <li><strong>validate</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – If <a class="reference external" href="https://docs.python.org/3/library/constants.html#True" title="(in Python v3.10)"><code class="xref py py-data docutils literal notranslate"><span class="pre">True</span></code></a>, various checks will be done when source
 gets initialized (e.g., is table present?).</li>
 <li><strong>kms_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Experimental. Optional Cloud KMS key name for use when
 creating new temporary tables.</li>
 </ul>
 </td>
 </tr>
 </tbody>
 </table>
 </dd></dl>

 </div>
 </div>


            </div>

           </div>
           <footer>

     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">

         <a href="apache_beam.io.gcp.bigquery_avro_tools.html" class="btn btn-neutral float-right" title="apache_beam.io.gcp.bigquery_avro_tools module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>


         <a href="apache_beam.io.gcp.big_query_query_to_table_pipeline.html" class="btn btn-neutral float-left" title="apache_beam.io.gcp.big_query_query_to_table_pipeline module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>

     </div>


   <hr/>

   <div role="contentinfo">
     <p>
         &copy; Copyright

     </p>
   </div>
   Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.

 </footer>

         </div>
       </div>

     </section>

   </div>


   <script type="text/javascript">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>


 </body>
 </html>