| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc --> |
| <title>BigQueryIO (Apache Beam 2.38.0-SNAPSHOT)</title> |
| <link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="BigQueryIO (Apache Beam 2.38.0-SNAPSHOT)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":41,"i1":9,"i2":9,"i3":9,"i4":9,"i5":9}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryInsertErrorCoder.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../../index.html?org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html" target="_top">Frames</a></li> |
| <li><a href="BigQueryIO.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.beam.sdk.io.gcp.bigquery</div> |
| <h2 title="Class BigQueryIO" class="title">Class BigQueryIO</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>java.lang.Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO</li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">BigQueryIO</span> |
| extends java.lang.Object</pre> |
| <div class="block"><a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a>s for reading and writing <a |
| href="https://developers.google.com/bigquery/">BigQuery</a> tables. |
| |
| <h3>Table References</h3> |
| |
| <p>A fully-qualified BigQuery table name consists of three components: |
| |
| <ul> |
| <li><code>projectId</code>: the Cloud project id (defaults to <a href="../../../../../../../org/apache/beam/sdk/extensions/gcp/options/GcpOptions.html#getProject--"><code>GcpOptions.getProject()</code></a>). |
| <li><code>datasetId</code>: the BigQuery dataset id, unique within a project. |
| <li><code>tableId</code>: a table id, unique within a dataset. |
| </ul> |
| |
| <p>BigQuery table references are stored as a <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableReference.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableReference</code></a>, which comes from the <a |
| href="https://cloud.google.com/bigquery/client-libraries">BigQuery Java Client API</a>. Tables |
| can be referred to as Strings, with or without the <code>projectId</code>. A helper function is |
| provided (<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.html#parseTableSpec-java.lang.String-"><code>BigQueryHelpers.parseTableSpec(String)</code></a>) that parses the following string forms |
| into a <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableReference.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableReference</code></a>: |
| |
| <ul> |
| <li>[<code>project_id</code>]:[<code>dataset_id</code>].[<code>table_id</code>] |
| <li>[<code>dataset_id</code>].[<code>table_id</code>] |
| </ul> |
| |
| <h3>BigQuery Concepts</h3> |
| |
| <p>Tables have rows (<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>) and each row has cells (<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableCell.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableCell</code></a>). A table has a |
| schema (<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableSchema</code></a>), which in turn describes the schema of each cell (<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableFieldSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableFieldSchema</code></a>). The terms field and cell are used interchangeably. |
| |
| <p><a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableSchema</code></a>: describes the schema (types and order) for values in each row. It has one |
| attribute, 'fields', which is list of <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableFieldSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableFieldSchema</code></a> objects. |
| |
| <p><a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableFieldSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableFieldSchema</code></a>: describes the schema (type, name) for one field. It has several |
| attributes, including 'name' and 'type'. Common values for the type attribute are: 'STRING', |
| 'INTEGER', 'FLOAT', 'BOOLEAN', 'NUMERIC', 'GEOGRAPHY'. All possible values are described at: <a |
| href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types"> |
| https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types</a> |
| |
| <p><a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>: Holds all values in a table row. Has one attribute, 'f', which is a list of |
| <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableCell.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableCell</code></a> instances. |
| |
| <p><a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableCell.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableCell</code></a>: Holds the value for one cell (or field). Has one attribute, 'v', which is |
| the value of the table cell. |
| |
| <p>As of Beam 2.7.0, the NUMERIC data type is supported. This data type supports high-precision |
| decimal numbers (precision of 38 digits, scale of 9 digits). The GEOGRAPHY data type works with |
| Well-Known Text (See <a href="https://en.wikipedia.org/wiki/Well-known_text"> |
| https://en.wikipedia.org/wiki/Well-known_text</a>) format for reading and writing to BigQuery. |
| BigQuery IO requires values of BYTES datatype to be encoded using base64 encoding when writing to |
| BigQuery. When bytes are read from BigQuery they are returned as base64-encoded strings. |
| |
| <h3>Reading</h3> |
| |
| <p>Reading from BigQuery is supported by <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a>, which parses |
| records in <a href="https://cloud.google.com/bigquery/data-formats#avro_format">AVRO format</a> |
| into a custom type (see the table below for type conversion) using a specified parse function, |
| and by <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> which parses them into <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>, which may be more |
| convenient but has lower performance. |
| |
| <p>Both functions support reading either from a table or from the result of a query, via <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#from-java.lang.String-"><code>BigQueryIO.TypedRead.from(String)</code></a> and <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#fromQuery-java.lang.String-"><code>BigQueryIO.TypedRead.fromQuery(java.lang.String)</code></a> respectively. Exactly one of these must |
| be specified. |
| |
| <p>If you are reading from an authorized view wih <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#fromQuery-java.lang.String-"><code>BigQueryIO.TypedRead.fromQuery(java.lang.String)</code></a>, you need to use |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#withQueryLocation-java.lang.String-"><code>BigQueryIO.TypedRead.withQueryLocation(String)</code></a> to set the location of the BigQuery job. Otherwise, |
| Beam will ty to determine that location by reading the metadata of the dataset that contains the |
| underlying tables. With authorized views, that will result in a 403 error and the query will not |
| be resolved. |
| |
| <p><b>Type Conversion Table</b> |
| |
| <table border="1" cellspacing="1"> |
| <tr> |
| <td> <b>BigQuery standard SQL type</b> </td> <td> <b>Avro type</b> </td> <td> <b>Java type</b> </td> |
| </tr> |
| <tr> |
| <td> BOOLEAN </td> <td> boolean </td> <td> Boolean </td> |
| </tr> |
| <tr> |
| <td> INT64 </td> <td> long </td> <td> Long </td> |
| </tr> |
| <tr> |
| <td> FLOAT64 </td> <td> double </td> <td> Double </td> |
| </tr> |
| <tr> |
| <td> BYTES </td> <td> bytes </td> <td> java.nio.ByteBuffer </td> |
| </tr> |
| <tr> |
| <td> STRING </td> <td> string </td> <td> CharSequence </td> |
| </tr> |
| <tr> |
| <td> DATE </td> <td> int </td> <td> Integer </td> |
| </tr> |
| <tr> |
| <td> DATETIME </td> <td> string </td> <td> CharSequence </td> |
| </tr> |
| <tr> |
| <td> TIMESTAMP </td> <td> long </td> <td> Long </td> |
| </tr> |
| <tr> |
| <td> TIME </td> <td> long </td> <td> Long </td> |
| </tr> |
| <tr> |
| <td> NUMERIC </td> <td> bytes </td> <td> java.nio.ByteBuffer </td> |
| </tr> |
| <tr> |
| <td> GEOGRAPHY </td> <td> string </td> <td> CharSequence </td> |
| </tr> |
| <tr> |
| <td> ARRAY </td> <td> array </td> <td> java.util.Collection </td> |
| </tr> |
| <tr> |
| <td> STRUCT </td> <td> record </td> <td> org.apache.avro.generic.GenericRecord </td> |
| </tr> |
| </table> |
| |
| <p><b>Example: Reading rows of a table as <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</b> |
| |
| <pre><code> |
| PCollection<TableRow> weatherData = pipeline.apply( |
| BigQueryIO.readTableRows().from("clouddataflow-readonly:samples.weather_stations")); |
| </code></pre> |
| |
| <b>Example: Reading rows of a table and parsing them into a custom type.</b> |
| |
| <pre><code> |
| PCollection<WeatherRecord> weatherData = pipeline.apply( |
| BigQueryIO |
| .read(new SerializableFunction<SchemaAndRecord, WeatherRecord>() { |
| public WeatherRecord apply(SchemaAndRecord schemaAndRecord) { |
| return new WeatherRecord(...); |
| } |
| }) |
| .from("clouddataflow-readonly:samples.weather_stations")) |
| .withCoder(SerializableCoder.of(WeatherRecord.class)); |
| </code></pre> |
| |
| <p>Note: When using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a>, you may sometimes need to use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#withCoder-org.apache.beam.sdk.coders.Coder-"><code>BigQueryIO.TypedRead.withCoder(Coder)</code></a> to specify a <a href="../../../../../../../org/apache/beam/sdk/coders/Coder.html" title="class in org.apache.beam.sdk.coders"><code>Coder</code></a> for the result type, if Beam fails to |
| infer it automatically. |
| |
| <p><b>Example: Reading results of a query as <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</b> |
| |
| <pre><code> |
| PCollection<TableRow> meanTemperatureData = pipeline.apply(BigQueryIO.readTableRows() |
| .fromQuery("SELECT year, mean_temp FROM [samples.weather_stations]")); |
| </code></pre> |
| |
| <p>Users can optionally specify a query priority using <code>TypedRead#withQueryPriority(TypedRead.QueryPriority)</code> and a geographic location where the query |
| will be executed using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#withQueryLocation-java.lang.String-"><code>BigQueryIO.TypedRead.withQueryLocation(String)</code></a>. Query location must be |
| specified for jobs that are not executed in US or EU, or if you are reading from an authorized |
| view. See <a href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query">BigQuery |
| Jobs: query</a>. |
| |
| <h3>Writing</h3> |
| |
| <p>To write to a BigQuery table, apply a <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write</code></a> transformation. This consumes a |
| <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of a user-defined type when using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a> (recommended), |
| or a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRows</code></a> as input when using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#writeTableRows--"><code>writeTableRows()</code></a> (not recommended). When using a user-defined type, one of the |
| following must be provided. |
| |
| <ul> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withAvroFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withAvroFormatFunction(SerializableFunction)</code></a> (recommended) to |
| write data using avro records. |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withAvroWriter-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withAvroWriter(org.apache.beam.sdk.transforms.SerializableFunction<org.apache.avro.Schema, org.apache.avro.io.DatumWriter<T>>)</code></a> to write avro data using a user-specified <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/io/DatumWriter.html?is-external=true" title="class or interface in org.apache.avro.io"><code>DatumWriter</code></a> (and format function). |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withFormatFunction(SerializableFunction)</code></a> to write data as json |
| encoded <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRows</code></a>. |
| </ul> |
| |
| If <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withAvroFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withAvroFormatFunction(SerializableFunction)</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withAvroWriter-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withAvroWriter(org.apache.beam.sdk.transforms.SerializableFunction<org.apache.avro.Schema, org.apache.avro.io.DatumWriter<T>>)</code></a> is used, the table schema MUST be specified using one of the |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withJsonSchema-java.lang.String-"><code>BigQueryIO.Write.withJsonSchema(String)</code></a>, <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withJsonSchema-org.apache.beam.sdk.options.ValueProvider-"><code>BigQueryIO.Write.withJsonSchema(ValueProvider)</code></a>, <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withSchemaFromView-org.apache.beam.sdk.values.PCollectionView-"><code>BigQueryIO.Write.withSchemaFromView(PCollectionView)</code></a> methods, or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#to-org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinations-"><code>BigQueryIO.Write.to(DynamicDestinations)</code></a>. |
| |
| <pre><code> |
| class Quote { |
| final Instant timestamp; |
| final String exchange; |
| final String symbol; |
| final double price; |
| |
| Quote(Instant timestamp, String exchange, String symbol, double price) { |
| // initialize all member variables. |
| } |
| } |
| |
| PCollection<Quote> quotes = ... |
| |
| quotes.apply(BigQueryIO |
| .<Quote>write() |
| .to("my-project:my_dataset.my_table") |
| .withSchema(new TableSchema().setFields( |
| ImmutableList.of( |
| new TableFieldSchema().setName("timestamp").setType("TIMESTAMP"), |
| new TableFieldSchema().setName("exchange").setType("STRING"), |
| new TableFieldSchema().setName("symbol").setType("STRING"), |
| new TableFieldSchema().setName("price").setType("FLOAT")))) |
| .withFormatFunction(quote -> new TableRow().set(..set the columns..)) |
| .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); |
| </code></pre> |
| |
| <p>See <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write</code></a> for details on how to specify if a write should append to an |
| existing table, replace the table, or verify that the table is empty. Note that the dataset being |
| written to must already exist. Unbounded PCollections can only be written using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.WriteDisposition.html#WRITE_EMPTY"><code>BigQueryIO.Write.WriteDisposition.WRITE_EMPTY</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.WriteDisposition.html#WRITE_APPEND"><code>BigQueryIO.Write.WriteDisposition.WRITE_APPEND</code></a>. |
| |
| <p>BigQueryIO supports automatically inferring the BigQuery table schema from the Beam schema on |
| the input PCollection. Beam can also automatically format the input into a TableRow in this case, |
| if no format function is provide. In the above example, the quotes PCollection has a schema that |
| Beam infers from the Quote POJO. So the write could be done more simply as follows: |
| |
| <pre><code> |
| {@literal @}DefaultSchema(JavaFieldSchema.class) |
| class Quote { |
| final Instant timestamp; |
| final String exchange; |
| final String symbol; |
| final double price; |
| |
| {@literal @}SchemaCreate |
| Quote(Instant timestamp, String exchange, String symbol, double price) { |
| // initialize all member variables. |
| } |
| } |
| |
| PCollection<Quote> quotes = ... |
| |
| quotes.apply(BigQueryIO |
| .<Quote>write() |
| .to("my-project:my_dataset.my_table") |
| .useBeamSchema() |
| .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); |
| </code></pre> |
| |
| <h3>Loading historical data into time-partitioned BigQuery tables</h3> |
| |
| <p>To load historical data into a time-partitioned BigQuery table, specify <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withTimePartitioning-com.google.api.services.bigquery.model.TimePartitioning-"><code>BigQueryIO.Write.withTimePartitioning(com.google.api.services.bigquery.model.TimePartitioning)</code></a> with a <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TimePartitioning.html?is-external=true#setField-java.lang.String-" title="class or interface in com.google.api.services.bigquery.model"><code>field</code></a> |
| used for <a |
| href="https://cloud.google.com/bigquery/docs/partitioned-tables#partitioned_tables">column-based |
| partitioning</a>. For example: |
| |
| <pre><code> |
| PCollection<Quote> quotes = ...; |
| |
| quotes.apply(BigQueryIO.write() |
| .withSchema(schema) |
| .withFormatFunction(quote -> new TableRow() |
| .set("timestamp", quote.getTimestamp()) |
| .set(..other columns..)) |
| .to("my-project:my_dataset.my_table") |
| .withTimePartitioning(new TimePartitioning().setField("time"))); |
| </code></pre> |
| |
| <h3>Writing different values to different tables</h3> |
| |
| <p>A common use case is to dynamically generate BigQuery table names based on the current value. |
| To support this, <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#to-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.to(SerializableFunction)</code></a> accepts a function mapping the |
| current element to a tablespec. For example, here's code that outputs quotes of different stocks |
| to different tables: |
| |
| <pre><code> |
| PCollection<Quote> quotes = ...; |
| |
| quotes.apply(BigQueryIO.write() |
| .withSchema(schema) |
| .withFormatFunction(quote -> new TableRow()...) |
| .to((ValueInSingleWindow<Quote> quote) -> { |
| String symbol = quote.getSymbol(); |
| return new TableDestination( |
| "my-project:my_dataset.quotes_" + symbol, // Table spec |
| "Quotes of stock " + symbol // Table description |
| ); |
| }); |
| </code></pre> |
| |
| <p>Per-table schemas can also be provided using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withSchemaFromView-org.apache.beam.sdk.values.PCollectionView-"><code>BigQueryIO.Write.withSchemaFromView(org.apache.beam.sdk.values.PCollectionView<java.util.Map<java.lang.String, java.lang.String>>)</code></a>. This |
| allows you the schemas to be calculated based on a previous pipeline stage or statically via a |
| <a href="../../../../../../../org/apache/beam/sdk/transforms/Create.html" title="class in org.apache.beam.sdk.transforms"><code>Create</code></a> transform. This method expects to receive a |
| map-valued <a href="../../../../../../../org/apache/beam/sdk/values/PCollectionView.html" title="interface in org.apache.beam.sdk.values"><code>PCollectionView</code></a>, mapping table specifications (project:dataset.table-id), to |
| JSON formatted <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableSchema</code></a> objects. All destination tables must be present in this map, |
| or the pipeline will fail to create tables. Care should be taken if the map value is based on a |
| triggered aggregation over and unbounded <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a>; the side input will contain the |
| entire history of all table schemas ever generated, which might blow up memory usage. This method |
| can also be useful when writing to a single table, as it allows a previous stage to calculate the |
| schema (possibly based on the full collection of records being written to BigQuery). |
| |
| <p>For the most general form of dynamic table destinations and schemas, look at <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#to-org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinations-"><code>BigQueryIO.Write.to(DynamicDestinations)</code></a>. |
| |
| <h3>Insertion Method</h3> |
| |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write</code></a> supports two methods of inserting data into BigQuery specified using |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withMethod-org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method-"><code>BigQueryIO.Write.withMethod(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)</code></a>. If no method is supplied, then a default method will be |
| chosen based on the input PCollection. See <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.Method.html" title="enum in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write.Method</code></a> for more information |
| about the methods. The different insertion methods provide different tradeoffs of cost, quota, |
| and data consistency; please see BigQuery documentation for more information about these |
| tradeoffs. |
| |
| <h3>Usage with templates</h3> |
| |
| <p>When using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> in a template, it's required to specify |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html#withTemplateCompatibility--"><code>BigQueryIO.Read.withTemplateCompatibility()</code></a>. Specifying this in a non-template pipeline is not |
| recommended because it has somewhat lower performance. |
| |
| <p>When using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#writeTableRows--"><code>writeTableRows()</code></a> with batch loads in a template, it is |
| recommended to specify <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withCustomGcsTempLocation-org.apache.beam.sdk.options.ValueProvider-"><code>BigQueryIO.Write.withCustomGcsTempLocation(org.apache.beam.sdk.options.ValueProvider<java.lang.String>)</code></a>. Writing to BigQuery via batch |
| loads involves writing temporary files to this location, so the location must be accessible at |
| pipeline execution time. By default, this location is captured at pipeline <i>construction</i> |
| time, may be inaccessible if the template may be reused from a different project or at a moment |
| when the original location no longer exists. <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withCustomGcsTempLocation-org.apache.beam.sdk.options.ValueProvider-"><code>BigQueryIO.Write.withCustomGcsTempLocation(ValueProvider)</code></a> allows specifying the location as an argument to |
| the template invocation. |
| |
| <h3>Permissions</h3> |
| |
| <p>Permission requirements depend on the <a href="../../../../../../../org/apache/beam/sdk/PipelineRunner.html" title="class in org.apache.beam.sdk"><code>PipelineRunner</code></a> that is used to execute the |
| pipeline. Please refer to the documentation of corresponding <a href="../../../../../../../org/apache/beam/sdk/PipelineRunner.html" title="class in org.apache.beam.sdk"><code>PipelineRunner</code></a>s for more |
| details. |
| |
| <p>Please see <a href="https://cloud.google.com/bigquery/access-control">BigQuery Access Control |
| </a> for security and permission related information specific to BigQuery.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== NESTED CLASS SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.class.summary"> |
| <!-- --> |
| </a> |
| <h3>Nested Class Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation"> |
| <caption><span>Nested Classes</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Class and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Read</a></span></code> |
| <div class="block">Implementation of <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a>.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="type parameter in BigQueryIO.TypedRead">T</a>></span></code> |
| <div class="block">Implementation of <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a>.</div> |
| </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="type parameter in BigQueryIO.Write">T</a>></span></code> |
| <div class="block">Implementation of <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a>.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- =========== FIELD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.summary"> |
| <!-- --> |
| </a> |
| <h3>Field Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation"> |
| <caption><span>Fields</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Field and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static java.lang.String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#BIGQUERY_JOB_TEMPLATE">BIGQUERY_JOB_TEMPLATE</a></span></code> |
| <div class="block">Template for BigQuery jobs created by BigQueryIO.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span><span id="t6" class="tableTab"><span><a href="javascript:show(32);">Deprecated Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Read</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--">read</a></span>()</code> |
| <div class="block"><span class="deprecatedLabel">Deprecated.</span> |
| <div class="block"><span class="deprecationComment">Use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> instead. <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> does exactly the same as <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a>, however <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> performs better.</span></div> |
| </div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-">read</a></span>(<a href="../../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/SchemaAndRecord.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">SchemaAndRecord</a>,T> parseFn)</code> |
| <div class="block">Reads from a BigQuery table or query and returns a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> with one element per |
| each row of the table or query result, parsed from the BigQuery AVRO format using the specified |
| function.</div> |
| </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--">readTableRows</a></span>()</code> |
| <div class="block">Like <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> but represents each row as a <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</div> |
| </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRowsWithSchema--">readTableRowsWithSchema</a></span>()</code> |
| <div class="block">Like <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> but with <a href="../../../../../../../org/apache/beam/sdk/schemas/Schema.html" title="class in org.apache.beam.sdk.schemas"><code>Schema</code></a> support.</div> |
| </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code>static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--">write</a></span>()</code> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> to a BigQuery table.</div> |
| </td> |
| </tr> |
| <tr id="i5" class="rowColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#writeTableRows--">writeTableRows</a></span>()</code> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> containing <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRows</code></a> to |
| a BigQuery table.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.Object</h3> |
| <code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ FIELD DETAIL =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.detail"> |
| <!-- --> |
| </a> |
| <h3>Field Detail</h3> |
| <a name="BIGQUERY_JOB_TEMPLATE"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>BIGQUERY_JOB_TEMPLATE</h4> |
| <pre>public static final java.lang.String BIGQUERY_JOB_TEMPLATE</pre> |
| <div class="block">Template for BigQuery jobs created by BigQueryIO. This template is: <code>"beam_bq_job_{TYPE}_{JOB_ID}_{STEP}_{RANDOM}"</code>, where: |
| |
| <ul> |
| <li><code>TYPE</code> represents the BigQuery job type (e.g. extract / copy / load / query) |
| <li><code>JOB_ID</code> is the Beam job name. |
| <li><code>STEP</code> is a UUID representing the the Dataflow step that created the BQ job. |
| <li><code>RANDOM</code> is a random string. |
| </ul> |
| |
| <p><b>NOTE:</b> This job name template does not have backwards compatibility guarantees.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.BIGQUERY_JOB_TEMPLATE">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="read--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>read</h4> |
| <pre>@Deprecated |
| public static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Read</a> read()</pre> |
| <div class="block"><span class="deprecatedLabel">Deprecated.</span> <span class="deprecationComment">Use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> instead. <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> does exactly the same as <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a>, however <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> performs better.</span></div> |
| </li> |
| </ul> |
| <a name="readTableRows--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>readTableRows</h4> |
| <pre>public static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>> readTableRows()</pre> |
| <div class="block">Like <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> but represents each row as a <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>. |
| |
| <p>This method is more convenient to use in some cases, but usually has significantly lower |
| performance than using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> directly to parse data into a |
| domain-specific type, due to the overhead of converting the rows to <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</div> |
| </li> |
| </ul> |
| <a name="readTableRowsWithSchema--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>readTableRowsWithSchema</h4> |
| <pre>public static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>> readTableRowsWithSchema()</pre> |
| <div class="block">Like <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> but with <a href="../../../../../../../org/apache/beam/sdk/schemas/Schema.html" title="class in org.apache.beam.sdk.schemas"><code>Schema</code></a> support.</div> |
| </li> |
| </ul> |
| <a name="read-org.apache.beam.sdk.transforms.SerializableFunction-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>read</h4> |
| <pre>public static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><T> read(<a href="../../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/SchemaAndRecord.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">SchemaAndRecord</a>,T> parseFn)</pre> |
| <div class="block">Reads from a BigQuery table or query and returns a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> with one element per |
| each row of the table or query result, parsed from the BigQuery AVRO format using the specified |
| function. |
| |
| <p>Each <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/SchemaAndRecord.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>SchemaAndRecord</code></a> contains a BigQuery <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableSchema</code></a> and a <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> representing the row, indexed by column name. Here is a sample parse function |
| that parses click events from a table. |
| |
| <pre><code> |
| class ClickEvent { long userId; String url; ... } |
| |
| p.apply(BigQueryIO.read(new SerializableFunction<SchemaAndRecord, ClickEvent>() { |
| public ClickEvent apply(SchemaAndRecord record) { |
| GenericRecord r = record.getRecord(); |
| return new ClickEvent((Long) r.get("userId"), (String) r.get("url")); |
| } |
| }).from("..."); |
| </code></pre></div> |
| </li> |
| </ul> |
| <a name="write--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>write</h4> |
| <pre>public static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><T> write()</pre> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> to a BigQuery table. A formatting |
| function must be provided to convert each input element into a <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a> using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withFormatFunction(SerializableFunction)</code></a>. |
| |
| <p>In BigQuery, each table has an enclosing dataset. The dataset being written must already |
| exist. |
| |
| <p>By default, tables will be created if they do not exist, which corresponds to a <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.CreateDisposition.html#CREATE_IF_NEEDED"><code>BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED</code></a> disposition that matches the default of BigQuery's |
| Jobs API. A schema must be provided (via <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withSchema-com.google.api.services.bigquery.model.TableSchema-"><code>BigQueryIO.Write.withSchema(TableSchema)</code></a>), or else the |
| transform may fail at runtime with an <code>IllegalArgumentException</code>. |
| |
| <p>By default, writes require an empty table, which corresponds to a <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.WriteDisposition.html#WRITE_EMPTY"><code>BigQueryIO.Write.WriteDisposition.WRITE_EMPTY</code></a> disposition that matches the default of BigQuery's Jobs |
| API. |
| |
| <p>Here is a sample transform that produces TableRow values containing "word" and "count" |
| columns: |
| |
| <pre><code> |
| static class FormatCountsFn extends DoFn<KV<String, Long>, TableRow> { |
| public void processElement(ProcessContext c) { |
| TableRow row = new TableRow() |
| .set("word", c.element().getKey()) |
| .set("count", c.element().getValue().intValue()); |
| c.output(row); |
| } |
| } |
| </code></pre></div> |
| </li> |
| </ul> |
| <a name="writeTableRows--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>writeTableRows</h4> |
| <pre>public static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><<a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>> writeTableRows()</pre> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> containing <a href="https://static.javadoc.io/com.google.apis/google-api-services-bigquery/v2-rev20211129-1.32.1/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRows</code></a> to |
| a BigQuery table. |
| |
| <p>It is recommended to instead use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a> with <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withFormatFunction(SerializableFunction)</code></a>.</div> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryInsertErrorCoder.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../../index.html?org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html" target="_top">Frames</a></li> |
| <li><a href="BigQueryIO.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| </body> |
| </html> |