blob: 4647343693ef78af9bface1c8ff4817ace042737 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_161-google-v7) on Thu Oct 18 16:14:35 PDT 2018 -->
<title>ParquetIO (Apache Beam 2.8.0-SNAPSHOT)</title>
<meta name="date" content="2018-10-18">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="ParquetIO (Apache Beam 2.8.0-SNAPSHOT)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":9,"i2":9};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/io/parquet/ParquetIO.html" target="_top">Frames</a></li>
<li><a href="ParquetIO.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.beam.sdk.io.parquet</div>
<h2 title="Class ParquetIO" class="title">Class ParquetIO</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.beam.sdk.io.parquet.ParquetIO</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre><a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.html" title="annotation in org.apache.beam.sdk.annotations">@Experimental</a>(<a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.html#value--">value</a>=<a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.Kind.html#SOURCE_SINK">SOURCE_SINK</a>)
public class <span class="typeNameLabel">ParquetIO</span>
extends java.lang.Object</pre>
<div class="block">IO to read and write Parquet files.
<h3>Reading Parquet files</h3>
<p><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO</code></a> source returns a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> for Parquet files. The elements in the
<a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> are Avro <a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a>.
<p>To configure the <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Read</code></a>, you have to provide the file patterns (from) of the Parquet
files and the schema.
<p>For example:
<pre><code>
PCollection&lt;GenericRecord&gt; records = pipeline.apply(ParquetIO.read(SCHEMA).from("/foo/bar"));
...
</code></pre>
<p>As <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Read</code></a> is based on <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html" title="class in org.apache.beam.sdk.io"><code>FileIO</code></a>, it supports any filesystem (hdfs, ...).
<p>For more advanced use cases, like reading each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <code>FileIO.ReadableFile</code>, use the <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.ReadFiles</code></a> transform.
<p>For example:
<pre><code>
PCollection&lt;FileIO.ReadableFile&gt; files = pipeline
.apply(FileIO.match().filepattern(options.getInputFilepattern())
.apply(FileIO.readMatches());
PCollection&lt;GenericRecord&gt; output = files.apply(ParquetIO.readFiles(SCHEMA));
</code></pre>
<h3>Writing Parquet files</h3>
<p><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Sink</code></a> allows you to write a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> into
a Parquet file. It can be used with the general-purpose <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html" title="class in org.apache.beam.sdk.io"><code>FileIO</code></a> transforms with
FileIO.write/writeDynamic specifically.
<p>For example:
<pre><code>
pipeline
.apply(...) // PCollection&lt;GenericRecord&gt;
.apply(FileIO.&lt;GenericRecord&gt;
.write()
.via(ParquetIO.sink(SCHEMA))
.to("destination/path")
</code></pre>
<p>This IO API is considered experimental and may break or receive backwards-incompatible changes
in future versions of the Apache Beam SDK.</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Read</a></span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-"><code>read(Schema)</code></a>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ReadFiles</a></span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#readFiles-org.apache.avro.Schema-"><code>readFiles(Schema)</code></a>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Sink</a></span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#sink-org.apache.avro.Schema-"><code>sink(org.apache.avro.Schema)</code></a>.</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Read</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-">read</a></span>(<a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</code>
<div class="block">Reads <a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from a Parquet file (or multiple Parquet files matching the
pattern).</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ReadFiles</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#readFiles-org.apache.avro.Schema-">readFiles</a></span>(<a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</code>
<div class="block">Like <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-"><code>read(Schema)</code></a>, but reads each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="../../../../../../org/apache/beam/sdk/io/FileIO.ReadableFile.html" title="class in org.apache.beam.sdk.io"><code>FileIO.ReadableFile</code></a>, which allows more flexible usage.</div>
</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Sink</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#sink-org.apache.avro.Schema-">sink</a></span>(<a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</code>
<div class="block">Creates a <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Sink</code></a> that, for use with <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html#write--"><code>FileIO.write()</code></a>.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="read-org.apache.avro.Schema-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>read</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Read</a>&nbsp;read(<a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</pre>
<div class="block">Reads <a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/generic/GenericRecord.html?is-external=true" title="class or interface in org.apache.avro.generic"><code>GenericRecord</code></a> from a Parquet file (or multiple Parquet files matching the
pattern).</div>
</li>
</ul>
<a name="readFiles-org.apache.avro.Schema-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>readFiles</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.ReadFiles</a>&nbsp;readFiles(<a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</pre>
<div class="block">Like <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.html#read-org.apache.avro.Schema-"><code>read(Schema)</code></a>, but reads each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="../../../../../../org/apache/beam/sdk/io/FileIO.ReadableFile.html" title="class in org.apache.beam.sdk.io"><code>FileIO.ReadableFile</code></a>, which allows more flexible usage.</div>
</li>
</ul>
<a name="sink-org.apache.avro.Schema-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>sink</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet">ParquetIO.Sink</a>&nbsp;sink(<a href="http://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/Schema.html?is-external=true" title="class or interface in org.apache.avro">Schema</a>&nbsp;schema)</pre>
<div class="block">Creates a <a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Sink.html" title="class in org.apache.beam.sdk.io.parquet"><code>ParquetIO.Sink</code></a> that, for use with <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html#write--"><code>FileIO.write()</code></a>.</div>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../../org/apache/beam/sdk/io/parquet/ParquetIO.Read.html" title="class in org.apache.beam.sdk.io.parquet"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/io/parquet/ParquetIO.html" target="_top">Frames</a></li>
<li><a href="ParquetIO.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</body>
</html>