| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_151-google-v7) on Tue Oct 02 14:37:29 PDT 2018 --> |
| <title>BigQueryIO (Apache Beam 2.7.0-SNAPSHOT)</title> |
| <meta name="date" content="2018-10-02"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="BigQueryIO (Apache Beam 2.7.0-SNAPSHOT)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":41,"i1":9,"i2":9,"i3":9,"i4":9}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"],32:["t6","Deprecated Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryInsertErrorCoder.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../../index.html?org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html" target="_top">Frames</a></li> |
| <li><a href="BigQueryIO.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.beam.sdk.io.gcp.bigquery</div> |
| <h2 title="Class BigQueryIO" class="title">Class BigQueryIO</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>java.lang.Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO</li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">BigQueryIO</span> |
| extends java.lang.Object</pre> |
| <div class="block"><a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a>s for reading and writing <a |
| href="https://developers.google.com/bigquery/">BigQuery</a> tables. |
| |
| <h3>Table References</h3> |
| |
| <p>A fully-qualified BigQuery table name consists of three components: |
| |
| <ul> |
| <li><code>projectId</code>: the Cloud project id (defaults to <a href="../../../../../../../org/apache/beam/sdk/extensions/gcp/options/GcpOptions.html#getProject--"><code>GcpOptions.getProject()</code></a>). |
| <li><code>datasetId</code>: the BigQuery dataset id, unique within a project. |
| <li><code>tableId</code>: a table id, unique within a dataset. |
| </ul> |
| |
| <p>BigQuery table references are stored as a <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableReference.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableReference</code></a>, which comes from the <a |
| href="https://cloud.google.com/bigquery/client-libraries">BigQuery Java Client API</a>. Tables |
| can be referred to as Strings, with or without the <code>projectId</code>. A helper function is |
| provided (<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.html#parseTableSpec-java.lang.String-"><code>BigQueryHelpers.parseTableSpec(String)</code></a>) that parses the following string forms |
| into a <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableReference.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableReference</code></a>: |
| |
| <ul> |
| <li>[<code>project_id</code>]:[<code>dataset_id</code>].[<code>table_id</code>] |
| <li>[<code>dataset_id</code>].[<code>table_id</code>] |
| </ul> |
| |
| <h3>Reading</h3> |
| |
| <p>Reading from BigQuery is supported by <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a>, which parses |
| records in <a href="https://cloud.google.com/bigquery/data-formats#avro_format">AVRO format</a> |
| into a custom type using a specified parse function, and by <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> which parses |
| them into <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>, which may be more convenient but has lower performance. |
| |
| <p>Both functions support reading either from a table or from the result of a query, via <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#from-java.lang.String-"><code>BigQueryIO.TypedRead.from(String)</code></a> and <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#fromQuery-java.lang.String-"><code>BigQueryIO.TypedRead.fromQuery(java.lang.String)</code></a> respectively. Exactly one of these must |
| be specified. |
| |
| <p><b>Example: Reading rows of a table as <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</b> |
| |
| <pre><code> |
| PCollection<TableRow> weatherData = pipeline.apply( |
| BigQueryIO.readTableRows().from("clouddataflow-readonly:samples.weather_stations")); |
| </code></pre> |
| |
| <b>Example: Reading rows of a table and parsing them into a custom type.</b> |
| |
| <pre><code> |
| PCollection<WeatherRecord> weatherData = pipeline.apply( |
| BigQueryIO |
| .read(new SerializableFunction<SchemaAndRecord, WeatherRecord>() { |
| public WeatherRecord apply(SchemaAndRecord schemaAndRecord) { |
| return new WeatherRecord(...); |
| } |
| }) |
| .from("clouddataflow-readonly:samples.weather_stations")) |
| .withCoder(SerializableCoder.of(WeatherRecord.class)); |
| </code></pre> |
| |
| <p>Note: When using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a>, you may sometimes need to use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#withCoder-org.apache.beam.sdk.coders.Coder-"><code>BigQueryIO.TypedRead.withCoder(Coder)</code></a> to specify a <a href="../../../../../../../org/apache/beam/sdk/coders/Coder.html" title="class in org.apache.beam.sdk.coders"><code>Coder</code></a> for the result type, if Beam fails to |
| infer it automatically. |
| |
| <p><b>Example: Reading results of a query as <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</b> |
| |
| <pre><code> |
| PCollection<TableRow> meanTemperatureData = pipeline.apply(BigQueryIO.readTableRows() |
| .fromQuery("SELECT year, mean_temp FROM [samples.weather_stations]")); |
| </code></pre> |
| |
| <p>Users can optionally specify a query priority using <code>TypedRead#withQueryPriority( |
| TypedRead.QueryPriority)</code> and a geographic location where the query will be executed using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html#withQueryLocation-java.lang.String-"><code>BigQueryIO.TypedRead.withQueryLocation(String)</code></a>. Query location must be specified for jobs that are not |
| executed in US or EU. See <a |
| href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query">BigQuery Jobs: |
| query</a>. |
| |
| <h3>Writing</h3> |
| |
| <p>To write to a BigQuery table, apply a <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write</code></a> transformation. This consumes a |
| <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of a user-defined type when using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a> (recommended), |
| or a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRows</code></a> as input when using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#writeTableRows--"><code>writeTableRows()</code></a> (not recommended). When using a user-defined type, a function must |
| be provided to turn this type into a <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a> using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withFormatFunction(SerializableFunction)</code></a>. |
| |
| <pre><code> |
| class Quote { Instant timestamp; String exchange; String symbol; double price; } |
| |
| PCollection<Quote> quotes = ... |
| |
| quotes.apply(BigQueryIO |
| .<Quote>write() |
| .to("my-project:my_dataset.my_table") |
| .withSchema(new TableSchema().setFields( |
| ImmutableList.of( |
| new TableFieldSchema().setName("timestamp").setType("TIMESTAMP"), |
| new TableFieldSchema().setName("exchange").setType("STRING"), |
| new TableFieldSchema().setName("symbol").setType("STRING"), |
| new TableFieldSchema().setName("price").setType("FLOAT")))) |
| .withFormatFunction(quote -> new TableRow().set(..set the columns..)) |
| .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); |
| </code></pre> |
| |
| <p>See <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write</code></a> for details on how to specify if a write should append to an |
| existing table, replace the table, or verify that the table is empty. Note that the dataset being |
| written to must already exist. Unbounded PCollections can only be written using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.WriteDisposition.html#WRITE_EMPTY"><code>BigQueryIO.Write.WriteDisposition.WRITE_EMPTY</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.WriteDisposition.html#WRITE_APPEND"><code>BigQueryIO.Write.WriteDisposition.WRITE_APPEND</code></a>. |
| |
| <h3>Loading historical data into time-partitioned BigQuery tables</h3> |
| |
| <p>To load historical data into a time-partitioned BigQuery table, specify <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withTimePartitioning-com.google.api.services.bigquery.model.TimePartitioning-"><code>BigQueryIO.Write.withTimePartitioning(com.google.api.services.bigquery.model.TimePartitioning)</code></a> with a <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TimePartitioning.html?is-external=true#setField-java.lang.String-" title="class or interface in com.google.api.services.bigquery.model"><code>field</code></a> |
| used for <a |
| href="https://cloud.google.com/bigquery/docs/partitioned-tables#partitioned_tables">column-based |
| partitioning</a>. For example: |
| |
| <pre><code> |
| PCollection<Quote> quotes = ...; |
| |
| quotes.apply(BigQueryIO.write() |
| .withSchema(schema) |
| .withFormatFunction(quote -> new TableRow() |
| .set("timestamp", quote.getTimestamp()) |
| .set(..other columns..)) |
| .to("my-project:my_dataset.my_table") |
| .withTimePartitioning(new TimePartitioning().setField("time"))); |
| </code></pre> |
| |
| <h3>Writing different values to different tables</h3> |
| |
| <p>A common use case is to dynamically generate BigQuery table names based on the current value. |
| To support this, <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#to-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.to(SerializableFunction)</code></a> accepts a function mapping the |
| current element to a tablespec. For example, here's code that outputs quotes of different stocks |
| to different tables: |
| |
| <pre><code> |
| PCollection<Quote> quotes = ...; |
| |
| quotes.apply(BigQueryIO.write() |
| .withSchema(schema) |
| .withFormatFunction(quote -> new TableRow()...) |
| .to((ValueInSingleWindow<Quote> quote) -> { |
| String symbol = quote.getSymbol(); |
| return new TableDestination( |
| "my-project:my_dataset.quotes_" + symbol, // Table spec |
| "Quotes of stock " + symbol // Table description |
| ); |
| }); |
| </code></pre> |
| |
| <p>Per-table schemas can also be provided using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withSchemaFromView-org.apache.beam.sdk.values.PCollectionView-"><code>BigQueryIO.Write.withSchemaFromView(org.apache.beam.sdk.values.PCollectionView<java.util.Map<java.lang.String, java.lang.String>>)</code></a>. This |
| allows you the schemas to be calculated based on a previous pipeline stage or statically via a |
| <a href="../../../../../../../org/apache/beam/sdk/transforms/Create.html" title="class in org.apache.beam.sdk.transforms"><code>Create</code></a> transform. This method expects to receive a |
| map-valued <a href="../../../../../../../org/apache/beam/sdk/values/PCollectionView.html" title="interface in org.apache.beam.sdk.values"><code>PCollectionView</code></a>, mapping table specifications (project:dataset.table-id), to |
| JSON formatted <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableSchema</code></a> objects. All destination tables must be present in this map, |
| or the pipeline will fail to create tables. Care should be taken if the map value is based on a |
| triggered aggregation over and unbounded <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a>; the side input will contain the |
| entire history of all table schemas ever generated, which might blow up memory usage. This method |
| can also be useful when writing to a single table, as it allows a previous stage to calculate the |
| schema (possibly based on the full collection of records being written to BigQuery). |
| |
| <p>For the most general form of dynamic table destinations and schemas, look at <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#to-org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinations-"><code>BigQueryIO.Write.to(DynamicDestinations)</code></a>. |
| |
| <h3>Insertion Method</h3> |
| |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write</code></a> supports two methods of inserting data into BigQuery specified using |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withMethod-org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method-"><code>BigQueryIO.Write.withMethod(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method)</code></a>. If no method is supplied, then a default method will be |
| chosen based on the input PCollection. See <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.Method.html" title="enum in org.apache.beam.sdk.io.gcp.bigquery"><code>BigQueryIO.Write.Method</code></a> for more information |
| about the methods. The different insertion methods provide different tradeoffs of cost, quota, |
| and data consistency; please see BigQuery documentation for more information about these |
| tradeoffs. |
| |
| <h3>Usage with templates</h3> |
| |
| <p>When using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> in a template, it's required to specify |
| <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html#withTemplateCompatibility--"><code>BigQueryIO.Read.withTemplateCompatibility()</code></a>. Specifying this in a non-template pipeline is not |
| recommended because it has somewhat lower performance. |
| |
| <p>When using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#writeTableRows--"><code>writeTableRows()</code></a> with batch loads in a template, it is |
| recommended to specify <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withCustomGcsTempLocation-org.apache.beam.sdk.options.ValueProvider-"><code>BigQueryIO.Write.withCustomGcsTempLocation(org.apache.beam.sdk.options.ValueProvider<java.lang.String>)</code></a>. Writing to BigQuery via batch |
| loads involves writing temporary files to this location, so the location must be accessible at |
| pipeline execution time. By default, this location is captured at pipeline <i>construction</i> |
| time, may be inaccessible if the template may be reused from a different project or at a moment |
| when the original location no longer exists. <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withCustomGcsTempLocation-org.apache.beam.sdk.options.ValueProvider-"><code>BigQueryIO.Write.withCustomGcsTempLocation(ValueProvider)</code></a> allows specifying the location as an argument to |
| the template invocation. |
| |
| <h3>Permissions</h3> |
| |
| <p>Permission requirements depend on the <a href="../../../../../../../org/apache/beam/sdk/PipelineRunner.html" title="class in org.apache.beam.sdk"><code>PipelineRunner</code></a> that is used to execute the |
| pipeline. Please refer to the documentation of corresponding <a href="../../../../../../../org/apache/beam/sdk/PipelineRunner.html" title="class in org.apache.beam.sdk"><code>PipelineRunner</code></a>s for more |
| details. |
| |
| <p>Please see <a href="https://cloud.google.com/bigquery/access-control">BigQuery Access Control |
| </a> for security and permission related information specific to BigQuery.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== NESTED CLASS SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.class.summary"> |
| <!-- --> |
| </a> |
| <h3>Nested Class Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation"> |
| <caption><span>Nested Classes</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Class and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Read</a></span></code> |
| <div class="block">Implementation of <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a>.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="type parameter in BigQueryIO.TypedRead">T</a>></span></code> |
| <div class="block">Implementation of <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a>.</div> |
| </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="type parameter in BigQueryIO.Write">T</a>></span></code> |
| <div class="block">Implementation of <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a>.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span><span id="t6" class="tableTab"><span><a href="javascript:show(32);">Deprecated Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Read</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--">read</a></span>()</code> |
| <div class="block"><span class="deprecatedLabel">Deprecated.</span> |
| <div class="block"><span class="deprecationComment">Use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> instead. <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> does exactly the same as <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a>, however <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> performs better.</span></div> |
| </div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-">read</a></span>(<a href="../../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/SchemaAndRecord.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">SchemaAndRecord</a>,T> parseFn)</code> |
| <div class="block">Reads from a BigQuery table or query and returns a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> with one element per |
| each row of the table or query result, parsed from the BigQuery AVRO format using the specified |
| function.</div> |
| </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--">readTableRows</a></span>()</code> |
| <div class="block">Like <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> but represents each row as a <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</div> |
| </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code>static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--">write</a></span>()</code> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> to a BigQuery table.</div> |
| </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><<a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#writeTableRows--">writeTableRows</a></span>()</code> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> containing <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRows</code></a> to |
| a BigQuery table.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.Object</h3> |
| <code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="read--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>read</h4> |
| <pre>@Deprecated |
| public static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Read</a> read()</pre> |
| <div class="block"><span class="deprecatedLabel">Deprecated.</span> <span class="deprecationComment">Use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> or <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> instead. <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#readTableRows--"><code>readTableRows()</code></a> does exactly the same as <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read--"><code>read()</code></a>, however <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> performs better.</span></div> |
| </li> |
| </ul> |
| <a name="readTableRows--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>readTableRows</h4> |
| <pre>public static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><<a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>> readTableRows()</pre> |
| <div class="block">Like <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> but represents each row as a <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>. |
| |
| <p>This method is more convenient to use in some cases, but usually has significantly lower |
| performance than using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#read-org.apache.beam.sdk.transforms.SerializableFunction-"><code>read(SerializableFunction)</code></a> directly to parse data into a |
| domain-specific type, due to the overhead of converting the rows to <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a>.</div> |
| </li> |
| </ul> |
| <a name="read-org.apache.beam.sdk.transforms.SerializableFunction-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>read</h4> |
| <pre>public static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.TypedRead.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.TypedRead</a><T> read(<a href="../../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a><<a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/SchemaAndRecord.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">SchemaAndRecord</a>,T> parseFn)</pre> |
| <div class="block">Reads from a BigQuery table or query and returns a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> with one element per |
| each row of the table or query result, parsed from the BigQuery AVRO format using the specified |
| function. |
| |
| <p>Each <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/SchemaAndRecord.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><code>SchemaAndRecord</code></a> contains a BigQuery <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableSchema.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableSchema</code></a> and a <a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> representing the row, indexed by column name. Here is a sample parse function |
| that parses click events from a table. |
| |
| <pre><code> |
| class ClickEvent { long userId; String url; ... } |
| |
| p.apply(BigQueryIO.read(new SerializableFunction<SchemaAndRecord, ClickEvent>() { |
| public ClickEvent apply(SchemaAndRecord record) { |
| GenericRecord r = record.getRecord(); |
| return new ClickEvent((Long) r.get("userId"), (String) r.get("url")); |
| } |
| }).from("..."); |
| </code></pre></div> |
| </li> |
| </ul> |
| <a name="write--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>write</h4> |
| <pre>public static <T> <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><T> write()</pre> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> to a BigQuery table. A formatting |
| function must be provided to convert each input element into a <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRow</code></a> using <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withFormatFunction(SerializableFunction)</code></a>. |
| |
| <p>In BigQuery, each table has an encosing dataset. The dataset being written must already |
| exist. |
| |
| <p>By default, tables will be created if they do not exist, which corresponds to a <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.CreateDisposition.html#CREATE_IF_NEEDED"><code>BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED</code></a> disposition that matches the default of BigQuery's |
| Jobs API. A schema must be provided (via <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withSchema-com.google.api.services.bigquery.model.TableSchema-"><code>BigQueryIO.Write.withSchema(TableSchema)</code></a>), or else the |
| transform may fail at runtime with an <code>IllegalArgumentException</code>. |
| |
| <p>By default, writes require an empty table, which corresponds to a <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.WriteDisposition.html#WRITE_EMPTY"><code>BigQueryIO.Write.WriteDisposition.WRITE_EMPTY</code></a> disposition that matches the default of BigQuery's Jobs |
| API. |
| |
| <p>Here is a sample transform that produces TableRow values containing "word" and "count" |
| columns: |
| |
| <pre><code> |
| static class FormatCountsFn extends DoFn<KV<String, Long>, TableRow> { |
| public void processElement(ProcessContext c) { |
| TableRow row = new TableRow() |
| .set("word", c.element().getKey()) |
| .set("count", c.element().getValue().intValue()); |
| c.output(row); |
| } |
| } |
| </code></pre></div> |
| </li> |
| </ul> |
| <a name="writeTableRows--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>writeTableRows</h4> |
| <pre>public static <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html" title="class in org.apache.beam.sdk.io.gcp.bigquery">BigQueryIO.Write</a><<a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model">TableRow</a>> writeTableRows()</pre> |
| <div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes a <a href="../../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> containing <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html?is-external=true" title="class or interface in com.google.api.services.bigquery.model"><code>TableRows</code></a> to |
| a BigQuery table. |
| |
| <p>It is recommended to instead use <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html#write--"><code>write()</code></a> with <a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Write.html#withFormatFunction-org.apache.beam.sdk.transforms.SerializableFunction-"><code>BigQueryIO.Write.withFormatFunction(SerializableFunction)</code></a>.</div> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryInsertErrorCoder.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../../../org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.Read.html" title="class in org.apache.beam.sdk.io.gcp.bigquery"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../../index.html?org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html" target="_top">Frames</a></li> |
| <li><a href="BigQueryIO.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| </body> |
| </html> |