blob: 16e67e649c1ccfa40e4b88a2f683080d8dbc8ce0 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_181-google-v7) on Mon Jan 27 16:42:29 PST 2020 -->
<title>HadoopFormatIO (Apache Beam 2.20.0-SNAPSHOT)</title>
<meta name="date" content="2020-01-27">
<link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="HadoopFormatIO (Apache Beam 2.20.0-SNAPSHOT)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":9};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/ExternalSynchronization.html" title="interface in org.apache.beam.sdk.io.hadoop.format"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.HadoopInputFormatBoundedSource.html" title="class in org.apache.beam.sdk.io.hadoop.format"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../../index.html?org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html" target="_top">Frames</a></li>
<li><a href="HadoopFormatIO.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.beam.sdk.io.hadoop.format</div>
<h2 title="Class HadoopFormatIO" class="title">Class HadoopFormatIO</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre><a href="../../../../../../../org/apache/beam/sdk/annotations/Experimental.html" title="annotation in org.apache.beam.sdk.annotations">@Experimental</a>(<a href="../../../../../../../org/apache/beam/sdk/annotations/Experimental.html#value--">value</a>=<a href="../../../../../../../org/apache/beam/sdk/annotations/Experimental.Kind.html#SOURCE_SINK">SOURCE_SINK</a>)
public class <span class="typeNameLabel">HadoopFormatIO</span>
extends java.lang.Object</pre>
<div class="block">A <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html" title="class in org.apache.beam.sdk.io.hadoop.format"><code>HadoopFormatIO</code></a> is a Transform for reading data from any source or writing data to any
sink which implements Hadoop <code>InputFormat</code> or <code>OutputFormat</code>. For example: Cassandra,
Elasticsearch, HBase, Redis, Postgres etc. <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html" title="class in org.apache.beam.sdk.io.hadoop.format"><code>HadoopFormatIO</code></a> has to make several performance
trade-offs in connecting to <code>InputFormat</code> or <code>OutputFormat</code>, so if there is another
Beam IO Transform specifically for connecting to your data source of choice, we would recommend
using that one, but this IO Transform allows you to connect to many data sources/sinks that do
not yet have a Beam IO Transform.
<h3>Reading using Hadoop <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html" title="class in org.apache.beam.sdk.io.hadoop.format"><code>HadoopFormatIO</code></a></h3>
<p>You will need to pass a Hadoop <code>Configuration</code> with parameters specifying how the read
will occur. Many properties of the Configuration are optional, and some are required for certain
<code>InputFormat</code> classes, but the following properties must be set for all InputFormats:
<ul>
<li><code>mapreduce.job.inputformat.class</code>: The <code>InputFormat</code> class used to connect to
your data source of choice.
<li><code>key.class</code>: The key class returned by the <code>InputFormat</code> in <code>mapreduce.job.inputformat.class</code>.
<li><code>value.class</code>: The value class returned by the <code>InputFormat</code> in <code>mapreduce.job.inputformat.class</code>.
</ul>
For example:
<pre>
{
Configuration myHadoopConfiguration = new Configuration(false);
// Set Hadoop InputFormat, key and value class in configuration
myHadoopConfiguration.setClass(&quot;mapreduce.job.inputformat.class&quot;,
MyDbInputFormatClass, InputFormat.class);
myHadoopConfiguration.setClass(&quot;key.class&quot;, MyDbInputFormatKeyClass, Object.class);
myHadoopConfiguration.setClass(&quot;value.class&quot;,
MyDbInputFormatValueClass, Object.class);
}
</pre>
<p>You will need to check to see if the key and value classes output by the <code>InputFormat</code>
have a Beam <a href="../../../../../../../org/apache/beam/sdk/coders/Coder.html" title="class in org.apache.beam.sdk.coders"><code>Coder</code></a> available. If not, you can use withKeyTranslation/withValueTranslation
to specify a method transforming instances of those classes into another class that is supported
by a Beam <a href="../../../../../../../org/apache/beam/sdk/coders/Coder.html" title="class in org.apache.beam.sdk.coders"><code>Coder</code></a>. These settings are optional and you don't need to specify translation
for both key and value. If you specify a translation, you will need to make sure the K or V of
the read transform match the output type of the translation.
<p>You will need to set appropriate InputFormat key and value class (i.e. "key.class" and
"value.class") in Hadoop <code>Configuration</code>. If you set different InputFormat key or value
class than InputFormat's actual key or value class then, it may result in an error like
"unexpected extra bytes after decoding" while the decoding process of key/value object happens.
Hence, it is important to set appropriate InputFormat key and value class.
<pre><code>
Pipeline p = ...; // Create pipeline.
// Read data only with Hadoop configuration.
p.apply("read",
HadoopFormatIO.&lt;InputFormatKeyClass, InputFormatKeyClass&gt;read()
.withConfiguration(myHadoopConfiguration);
</code>
// Read data with configuration and key translation (Example scenario: Beam Coder is not
available for key class hence key translation is required.).
SimpleFunction&lt;InputFormatKeyClass, MyKeyClass&gt; myOutputKeyType =
new SimpleFunction&lt;InputFormatKeyClass, MyKeyClass&gt;() {
public MyKeyClass apply(InputFormatKeyClass input) {
// ...logic to transform InputFormatKeyClass to MyKeyClass
}
};
</pre>
<pre><code>
p.apply("read",
HadoopFormatIO.&lt;MyKeyClass, InputFormatKeyClass&gt;read()
.withConfiguration(myHadoopConfiguration)
.withKeyTranslation(myOutputKeyType);
</code></pre>
<p>// Read data with configuration and value translation (Example scenario: Beam Coder is not
available for value class hence value translation is required.).
<pre><code>
SimpleFunction&amp;lt;InputFormatValueClass, MyValueClass&amp;gt; myOutputValueType =
new SimpleFunction&amp;lt;InputFormatValueClass, MyValueClass&amp;gt;() {
public MyValueClass apply(InputFormatValueClass input) {
// ...logic to transform InputFormatValueClass to MyValueClass
}
};
</code></pre>
<pre><code>
p.apply("read",
HadoopFormatIO.&lt;InputFormatKeyClass, MyValueClass&gt;read()
.withConfiguration(myHadoopConfiguration)
.withValueTranslation(myOutputValueType);
</code></pre>
<p>IMPORTANT! In case of using <code>DBInputFormat</code> to read data from RDBMS, Beam parallelizes
the process by using LIMIT and OFFSET clauses of SQL query to fetch different ranges of records
(as a split) by different workers. To guarantee the same order and proper split of results you
need to order them by one or more keys (either PRIMARY or UNIQUE). It can be done during
configuration step, for example:
<pre><code>
Configuration conf = new Configuration();
conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName);
conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, "id", "name");
conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, "id ASC");
</code></pre>
<h3>Writing using Hadoop <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html" title="class in org.apache.beam.sdk.io.hadoop.format"><code>HadoopFormatIO</code></a></h3>
<p>You will need to pass a Hadoop <code>Configuration</code> with parameters specifying how the write
will occur. Many properties of the Configuration are optional, and some are required for certain
<code>OutputFormat</code> classes, but the following properties must be set for all OutputFormats:
<ul>
<li><code>mapreduce.job.id</code>: The identifier of the write job. E.g.: end timestamp of window.
<li><code>mapreduce.job.outputformat.class</code>: The <code>OutputFormat</code> class used to connect to
your data sink of choice.
<li><code>mapreduce.job.output.key.class</code>: The key class passed to the <code>OutputFormat</code> in
<code>mapreduce.job.outputformat.class</code>.
<li><code>mapreduce.job.output.value.class</code>: The value class passed to the <code>OutputFormat</code> in <code>mapreduce.job.outputformat.class</code>.
<li><code>mapreduce.job.reduces</code>: Number of reduce tasks. Value is equal to number of write
tasks which will be generated. This property is not required for <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Write.PartitionedWriterBuilder.html#withoutPartitioning--"><code>HadoopFormatIO.Write.PartitionedWriterBuilder.withoutPartitioning()</code></a> write.
<li><code>mapreduce.job.partitioner.class</code>: Hadoop partitioner class which will be used for
distributing of records among partitions. This property is not required for <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Write.PartitionedWriterBuilder.html#withoutPartitioning--"><code>HadoopFormatIO.Write.PartitionedWriterBuilder.withoutPartitioning()</code></a> write.
</ul>
<b>Note:</b> All mentioned values have appropriate constants. E.g.: <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#OUTPUT_FORMAT_CLASS_ATTR"><code>OUTPUT_FORMAT_CLASS_ATTR</code></a>.
<p>For example:
<pre><code>
Configuration myHadoopConfiguration = new Configuration(false);
// Set Hadoop OutputFormat, key and value class in configuration
myHadoopConfiguration.setClass(&amp;quot;mapreduce.job.outputformat.class&amp;quot;,
MyDbOutputFormatClass, OutputFormat.class);
myHadoopConfiguration.setClass(&amp;quot;mapreduce.job.output.key.class&amp;quot;,
MyDbOutputFormatKeyClass, Object.class);
myHadoopConfiguration.setClass(&amp;quot;mapreduce.job.output.value.class&amp;quot;,
MyDbOutputFormatValueClass, Object.class);
myHadoopConfiguration.setClass(&amp;quot;mapreduce.job.partitioner.class&amp;quot;,
MyPartitionerClass, Object.class);
myHadoopConfiguration.setInt(&amp;quot;mapreduce.job.reduces&amp;quot;, 2);
</code></pre>
<p>You will need to set OutputFormat key and value class (i.e. "mapreduce.job.output.key.class"
and "mapreduce.job.output.value.class") in Hadoop <code>Configuration</code> which are equal to <code>KeyT</code> and <code>ValueT</code>. If you set different OutputFormat key or value class than
OutputFormat's actual key or value class then, it will throw <code>IllegalArgumentException</code>
<h4>Batch writing</h4>
<pre><code>
//Data which will we want to write
PCollection&lt;KV&lt;Text, LongWritable&gt;&gt; boundedWordsCount = ...
//Hadoop configuration for write
//We have partitioned write, so Partitioner and reducers count have to be set - see withPartitioning() javadoc
Configuration myHadoopConfiguration = ...
//path to directory with locks
String locksDirPath = ...;
boundedWordsCount.apply(
"writeBatch",
HadoopFormatIO.&lt;Text, LongWritable&gt;write()
.withConfiguration(myHadoopConfiguration)
.withPartitioning()
.withExternalSynchronization(new HDFSSynchronization(locksDirPath)));
</code></pre>
<h4>Stream writing</h4>
<pre><code>
// Data which will we want to write
PCollection&lt;KV&lt;Text, LongWritable&gt;&gt; unboundedWordsCount = ...;
// Transformation which transforms data of one window into one hadoop configuration
PTransform&lt;PCollection&lt;? extends KV&lt;Text, LongWritable&gt;&gt;, PCollectionView&lt;Configuration&gt;&gt;
configTransform = ...;
unboundedWordsCount.apply(
"writeStream",
HadoopFormatIO.&lt;Text, LongWritable&gt;write()
.withConfigurationTransform(configTransform)
.withExternalSynchronization(new HDFSSynchronization(locksDirPath)));
</code>
}</pre></div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.HadoopInputFormatBoundedSource.html" title="class in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.HadoopInputFormatBoundedSource</a>&lt;<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.HadoopInputFormatBoundedSource.html" title="type parameter in HadoopFormatIO.HadoopInputFormatBoundedSource">K</a>,<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.HadoopInputFormatBoundedSource.html" title="type parameter in HadoopFormatIO.HadoopInputFormatBoundedSource">V</a>&gt;</span></code>
<div class="block">Bounded source implementation for <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html" title="class in org.apache.beam.sdk.io.hadoop.format"><code>HadoopFormatIO</code></a>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html" title="class in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.Read</a>&lt;<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html" title="type parameter in HadoopFormatIO.Read">K</a>,<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html" title="type parameter in HadoopFormatIO.Read">V</a>&gt;</span></code>
<div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that reads from any data source which implements Hadoop InputFormat.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.SerializableSplit.html" title="class in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.SerializableSplit</a></span></code>
<div class="block">A wrapper to allow Hadoop <code>InputSplit</code> to be serialized using
Java's standard serialization mechanisms.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Write.html" title="class in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.Write</a>&lt;<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Write.html" title="type parameter in HadoopFormatIO.Write">KeyT</a>,<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Write.html" title="type parameter in HadoopFormatIO.Write">ValueT</a>&gt;</span></code>
<div class="block">A <a href="../../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that writes to any data sink which implements Hadoop OutputFormat.</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- =========== FIELD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="field.summary">
<!-- -->
</a>
<h3>Field Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation">
<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Field and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#JOB_ID">JOB_ID</a></span></code>
<div class="block"><code>MRJobConfig.ID</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#NUM_REDUCES">NUM_REDUCES</a></span></code>
<div class="block"><code>MRJobConfig.NUM_REDUCES</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#OUTPUT_DIR">OUTPUT_DIR</a></span></code>
<div class="block"><code>MRJobConfig.MAPREDUCE_JOB_DIR</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#OUTPUT_FORMAT_CLASS_ATTR">OUTPUT_FORMAT_CLASS_ATTR</a></span></code>
<div class="block"><code>MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#OUTPUT_KEY_CLASS">OUTPUT_KEY_CLASS</a></span></code>
<div class="block"><code>MRJobConfig.OUTPUT_KEY_CLASS</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#OUTPUT_VALUE_CLASS">OUTPUT_VALUE_CLASS</a></span></code>
<div class="block"><code>MRJobConfig.OUTPUT_VALUE_CLASS</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#PARTITIONER_CLASS_ATTR">PARTITIONER_CLASS_ATTR</a></span></code>
<div class="block"><code>MRJobConfig.PARTITIONER_CLASS_ATTR</code>.</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#HadoopFormatIO--">HadoopFormatIO</a></span>()</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static &lt;K,V&gt;&nbsp;<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html" title="class in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.Read</a>&lt;K,V&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#read--">read</a></span>()</code>
<div class="block">Creates an uninitialized <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html" title="class in org.apache.beam.sdk.io.hadoop.format"><code>HadoopFormatIO.Read</code></a>.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>static &lt;KeyT,ValueT&gt;<br><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Write.WriteBuilder.html" title="interface in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.Write.WriteBuilder</a>&lt;KeyT,ValueT&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html#write--">write</a></span>()</code>
<div class="block">Creates an <code>Write.Builder</code> for creation of Write Transformation.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ FIELD DETAIL =========== -->
<ul class="blockList">
<li class="blockList"><a name="field.detail">
<!-- -->
</a>
<h3>Field Detail</h3>
<a name="OUTPUT_FORMAT_CLASS_ATTR">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>OUTPUT_FORMAT_CLASS_ATTR</h4>
<pre>public static final&nbsp;java.lang.String OUTPUT_FORMAT_CLASS_ATTR</pre>
<div class="block"><code>MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR</code>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="OUTPUT_KEY_CLASS">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>OUTPUT_KEY_CLASS</h4>
<pre>public static final&nbsp;java.lang.String OUTPUT_KEY_CLASS</pre>
<div class="block"><code>MRJobConfig.OUTPUT_KEY_CLASS</code>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.OUTPUT_KEY_CLASS">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="OUTPUT_VALUE_CLASS">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>OUTPUT_VALUE_CLASS</h4>
<pre>public static final&nbsp;java.lang.String OUTPUT_VALUE_CLASS</pre>
<div class="block"><code>MRJobConfig.OUTPUT_VALUE_CLASS</code>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.OUTPUT_VALUE_CLASS">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="NUM_REDUCES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>NUM_REDUCES</h4>
<pre>public static final&nbsp;java.lang.String NUM_REDUCES</pre>
<div class="block"><code>MRJobConfig.NUM_REDUCES</code>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.NUM_REDUCES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="PARTITIONER_CLASS_ATTR">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>PARTITIONER_CLASS_ATTR</h4>
<pre>public static final&nbsp;java.lang.String PARTITIONER_CLASS_ATTR</pre>
<div class="block"><code>MRJobConfig.PARTITIONER_CLASS_ATTR</code>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.PARTITIONER_CLASS_ATTR">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="JOB_ID">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>JOB_ID</h4>
<pre>public static final&nbsp;java.lang.String JOB_ID</pre>
<div class="block"><code>MRJobConfig.ID</code>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.JOB_ID">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a name="OUTPUT_DIR">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>OUTPUT_DIR</h4>
<pre>public static final&nbsp;java.lang.String OUTPUT_DIR</pre>
<div class="block"><code>MRJobConfig.MAPREDUCE_JOB_DIR</code>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../../../constant-values.html#org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO.OUTPUT_DIR">Constant Field Values</a></dd>
</dl>
</li>
</ul>
</li>
</ul>
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="HadoopFormatIO--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>HadoopFormatIO</h4>
<pre>public&nbsp;HadoopFormatIO()</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="read--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>read</h4>
<pre>public static&nbsp;&lt;K,V&gt;&nbsp;<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html" title="class in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.Read</a>&lt;K,V&gt;&nbsp;read()</pre>
<div class="block">Creates an uninitialized <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html" title="class in org.apache.beam.sdk.io.hadoop.format"><code>HadoopFormatIO.Read</code></a>. Before use, the <code>Read</code> must be
initialized with a HadoopFormatIO.Read#withConfiguration(HadoopConfiguration) that specifies
the source. A key/value translation may also optionally be specified using <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html#withKeyTranslation-org.apache.beam.sdk.transforms.SimpleFunction-"><code>HadoopFormatIO.Read.withKeyTranslation(org.apache.beam.sdk.transforms.SimpleFunction&lt;?, K&gt;)</code></a>/ <a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Read.html#withValueTranslation-org.apache.beam.sdk.transforms.SimpleFunction-"><code>HadoopFormatIO.Read.withValueTranslation(org.apache.beam.sdk.transforms.SimpleFunction&lt;?, V&gt;)</code></a>.</div>
</li>
</ul>
<a name="write--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>write</h4>
<pre>public static&nbsp;&lt;KeyT,ValueT&gt;&nbsp;<a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.Write.WriteBuilder.html" title="interface in org.apache.beam.sdk.io.hadoop.format">HadoopFormatIO.Write.WriteBuilder</a>&lt;KeyT,ValueT&gt;&nbsp;write()</pre>
<div class="block">Creates an <code>Write.Builder</code> for creation of Write Transformation. Before creation of the
transformation, chain of builders must be set.</div>
<dl>
<dt><span class="paramLabel">Type Parameters:</span></dt>
<dd><code>KeyT</code> - Type of keys to be written.</dd>
<dd><code>ValueT</code> - Type of values to be written.</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>Write builder</dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/ExternalSynchronization.html" title="interface in org.apache.beam.sdk.io.hadoop.format"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../../../org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.HadoopInputFormatBoundedSource.html" title="class in org.apache.beam.sdk.io.hadoop.format"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../../index.html?org/apache/beam/sdk/io/hadoop/format/HadoopFormatIO.html" target="_top">Frames</a></li>
<li><a href="HadoopFormatIO.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</body>
</html>