blob: 3d572cf0bba06d24d0b4e219fd74cc8f3c42e104 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- GitHub action javadoc-cleanup -->
<link rel="canonical" href="https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/parquet/ParquetIO.html">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- End javadoc-cleanup block -->
<!-- Generated by javadoc -->
<title>ParquetIO (Apache Beam 2.53.0)</title>
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="ParquetIO (Apache Beam 2.53.0)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":9,"i2":9,"i3":9,"i4":9};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Parse.html" title="class in org.apache.beam.sdk.io.parquet"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/io/parquet/ParquetIO.html" target="_top">Frames</a></li>
<li><a href="ParquetIO.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.beam.sdk.io.parquet</div>
<h2 title="Class ParquetIO" class="title">Class ParquetIO</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.beam.sdk.io.parquet.ParquetIO</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre>public class <span class="typeNameLabel">ParquetIO</span>
extends java.lang.Object</pre>
<div class="block">IO to read and write Parquet files.
<h3>Reading Parquet files</h3>
<p><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO</code></a> source returns a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> for Parquet files. The elements in the
<a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> are Avro <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a>.
<p>To configure the <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Read</code></a>, you have to provide the file patterns (from) of the Parquet
files and the schema.
<p>For example:
<pre><code>
PCollection&lt;GenericRecord&gt; records = pipeline.apply(ParquetIO.read(SCHEMA).from("/foo/bar"));
...
</code></pre>
<p>As <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Read</code></a> is based on <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html" title="class in org.apache.beam.sdk.io"><code>FileIO</code></a>, it supports any filesystem (hdfs, ...).
<p>When using schemas created via reflection, it may be useful to generate <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a>
instances rather than instances of the class associated with the schema. <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Read</code></a> and <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.ReadFiles</code></a> provide <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html#withAvroDataModel-org.apache.avro.generic.GenericData-"><code>ParquetIO.Read.withAvroDataModel(GenericData)</code></a> allowing implementations
to set the data model associated with the <code>AvroParquetReader</code>
<p>For more advanced use cases, like reading each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="../../../../../../org/apache/beam/sdk/io/FileIO.ReadableFile.html" title="class in org.apache.beam.sdk.io"><code>FileIO.ReadableFile</code></a>, use the <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.ReadFiles</code></a> transform.
<p>For example:
<pre><code>
PCollection&lt;FileIO.ReadableFile&gt; files = pipeline
.apply(FileIO.match().filepattern(options.getInputFilepattern())
.apply(FileIO.readMatches());
PCollection&lt;GenericRecord&gt; output = files.apply(ParquetIO.readFiles(SCHEMA));
</code></pre>
<p>ParquetIO leverages splittable reading by using Splittable DoFn. It initially splits the files
into the blocks of 64MB and may dynamically split further for higher read efficiency.
<p>Reading with projection can be enabled with the projection schema as following. Splittable
reading is enabled when reading with projection. The projection_schema contains only the column
that we would like to read and encoder_schema contains the schema to encode the output with the
unwanted columns changed to nullable. Partial reading provide decrease of reading time due to
partial processing of the data and partial encoding. The decrease in the reading time depends on
the relative position of the columns. Memory allocation is optimised depending on the encoding
schema. Note that the improvement is not as significant comparing to the proportion of the data
requested, since the processing time saved is only the time to read the unwanted columns, the
reader will still go over the data set according to the encoding schema since data for each
column in a row is stored interleaved.
<pre><code>
PCollection&lt;GenericRecord&gt; records =
pipeline
.apply(
ParquetIO.read(SCHEMA).from("/foo/bar").withProjection(Projection_schema,Encoder_Schema));
</code></pre>
<h3>Reading records of an unknown schema</h3>
<p>To read records from files whose schema is unknown at pipeline construction time or differs
between files, use <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#parseGenericRecords-org.apache.beam.sdk.transforms.SerializableFunction-"><code>parseGenericRecords(SerializableFunction)</code></a> - in this case, you will
need to specify a parsing function for converting each <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> into a value of your
custom type.
<p>For example:
<pre><code>
Pipeline p = ...;
PCollection&lt;Foo&gt; records =
p.apply(
ParquetIO.parseGenericRecords(
new SerializableFunction&lt;GenericRecord, Foo&gt;() {
public Foo apply(GenericRecord record) {
// If needed, access the schema of the record using record.getSchema()
return ...;
}
})
.from(...));
// For reading from files
PCollection&lt;FileIO.ReadableFile&gt; files = p.apply(...);
PCollection&lt;Foo&gt; records =
files
.apply(
ParquetIO.parseFilesGenericRecords(
new SerializableFunction&lt;GenericRecord, Foo&gt;() {
public Foo apply(GenericRecord record) {
// If needed, access the schema of the record using record.getSchema()
return ...;
}
}));
</code></pre>
<h3>Inferring Beam schemas from Parquet files</h3>
<p>If you want to use SQL or schema based operations on an Parquet-based PCollection, you must
configure the read transform to infer the Beam schema and automatically setup the Beam related
coders by doing:
<pre><code>
PCollection&lt;GenericRecord&gt; parquetRecords =
p.apply(ParquetIO.read(...).from(...).withBeamSchemas(true));
</code></pre>
You can also use it when reading a list of filenams from a <code>PCollection&lt;String&gt;</code>:
<pre><code>
PCollection&lt;String&gt; filePatterns = p.apply(...);
PCollection&lt;GenericRecord&gt; parquetRecords =
filePatterns
.apply(ParquetIO.readFiles(...).withBeamSchemas(true));
</code></pre>
<h3>Writing Parquet files</h3>
<p><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Sink</code></a> allows you to write a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> into
a Parquet file. It can be used with the general-purpose <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html" title="class in org.apache.beam.sdk.io"><code>FileIO</code></a> transforms with
FileIO.write/writeDynamic specifically.
<p>By default, <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Sink</code></a> produces output files that are compressed using the <code>CompressionCodec.SNAPPY</code>. This default can be changed or overridden
using <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html#withCompressionCodec-org.apache.parquet.hadoop.metadata.CompressionCodecName-"><code>ParquetIO.Sink.withCompressionCodec(CompressionCodecName)</code></a>.
<p>For example:
<pre><code>
pipeline
.apply(...) // PCollection&lt;GenericRecord&gt;
.apply(FileIO
.&lt;GenericRecord&gt;write()
.via(ParquetIO.sink(SCHEMA)
.withCompressionCodec(CompressionCodecName.SNAPPY))
.to("destination/path")
.withSuffix(".parquet"));
</code></pre></div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="https://beam.apache.org/documentation/io/built-in/parquet/">Beam ParquetIO
documentation</a></dd>
</dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Parse.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Parse</a>&lt;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Parse.html" title="type parameter in ParquetIO.Parse">T</a>&gt;</span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#parseGenericRecords-org.apache.beam.sdk.transforms.SerializableFunction-"><code>parseGenericRecords(SerializableFunction)</code></a>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ParseFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ParseFiles</a>&lt;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ParseFiles.html" title="type parameter in ParquetIO.ParseFiles">T</a>&gt;</span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#parseFilesGenericRecords-org.apache.beam.sdk.transforms.SerializableFunction-"><code>parseFilesGenericRecords(SerializableFunction)</code></a>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Read</a></span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-"><code>read(Schema)</code></a>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ReadFiles</a></span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#readFiles-org.apache.avro.Schema-"><code>readFiles(Schema)</code></a>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Sink</a></span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#sink-org.apache.avro.Schema-"><code>sink(org.apache.avro.Schema)</code></a>.</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static &lt;T&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ParseFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ParseFiles</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#parseFilesGenericRecords-org.apache.beam.sdk.transforms.SerializableFunction-">parseFilesGenericRecords</a></span>(<a href="../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a>&lt;<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic">GenericRecord</a>,T&gt;&nbsp;parseFn)</code>
<div class="block">Reads <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from Parquet files and converts to user defined type using provided
<code>parseFn</code>.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>static &lt;T&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Parse.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Parse</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#parseGenericRecords-org.apache.beam.sdk.transforms.SerializableFunction-">parseGenericRecords</a></span>(<a href="../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a>&lt;<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic">GenericRecord</a>,T&gt;&nbsp;parseFn)</code>
<div class="block">Reads <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from a Parquet file (or multiple Parquet files matching the
pattern) and converts to user defined type using provided parseFn.</div>
</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Read</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-">read</a></span>(<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</code>
<div class="block">Reads <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from a Parquet file (or multiple Parquet files matching the
pattern).</div>
</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ReadFiles</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#readFiles-org.apache.avro.Schema-">readFiles</a></span>(<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</code>
<div class="block">Like <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-"><code>read(Schema)</code></a>, but reads each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="../../../../../../org/apache/beam/sdk/io/FileIO.ReadableFile.html" title="class in org.apache.beam.sdk.io"><code>FileIO.ReadableFile</code></a>, which allows more flexible usage.</div>
</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Sink</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#sink-org.apache.avro.Schema-">sink</a></span>(<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</code>
<div class="block">Creates a <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Sink</code></a> that, for use with <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html#write--"><code>FileIO.write()</code></a>.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="read-org.apache.avro.Schema-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>read</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Read</a>&nbsp;read(<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</pre>
<div class="block">Reads <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from a Parquet file (or multiple Parquet files matching the
pattern).</div>
</li>
</ul>
<a name="readFiles-org.apache.avro.Schema-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>readFiles</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ReadFiles</a>&nbsp;readFiles(<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</pre>
<div class="block">Like <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-"><code>read(Schema)</code></a>, but reads each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="../../../../../../org/apache/beam/sdk/io/FileIO.ReadableFile.html" title="class in org.apache.beam.sdk.io"><code>FileIO.ReadableFile</code></a>, which allows more flexible usage.</div>
</li>
</ul>
<a name="parseGenericRecords-org.apache.beam.sdk.transforms.SerializableFunction-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>parseGenericRecords</h4>
<pre>public static&nbsp;&lt;T&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Parse.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Parse</a>&lt;T&gt;&nbsp;parseGenericRecords(<a href="../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a>&lt;<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic">GenericRecord</a>,T&gt;&nbsp;parseFn)</pre>
<div class="block">Reads <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from a Parquet file (or multiple Parquet files matching the
pattern) and converts to user defined type using provided parseFn.</div>
</li>
</ul>
<a name="parseFilesGenericRecords-org.apache.beam.sdk.transforms.SerializableFunction-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>parseFilesGenericRecords</h4>
<pre>public static&nbsp;&lt;T&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ParseFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ParseFiles</a>&lt;T&gt;&nbsp;parseFilesGenericRecords(<a href="../../../../../../org/apache/beam/sdk/transforms/SerializableFunction.html" title="interface in org.apache.beam.sdk.transforms">SerializableFunction</a>&lt;<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic">GenericRecord</a>,T&gt;&nbsp;parseFn)</pre>
<div class="block">Reads <a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from Parquet files and converts to user defined type using provided
<code>parseFn</code>.</div>
</li>
</ul>
<a name="sink-org.apache.avro.Schema-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>sink</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Sink</a>&nbsp;sink(<a href="https://static.javadoc.io/org.apache.avro/avro/1.8.2/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</pre>
<div class="block">Creates a <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Sink</code></a> that, for use with <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html#write--"><code>FileIO.write()</code></a>.</div>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Parse.html" title="class in org.apache.beam.sdk.io.parquet"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/io/parquet/ParquetIO.html" target="_top">Frames</a></li>
<li><a href="ParquetIO.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</body>
</html>