blob: 24ebe8e0187e82d6f9223b83f9b6477eccc8698e [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc -->
<title>CoGroup (Apache Beam 2.38.0-SNAPSHOT)</title>
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="CoGroup (Apache Beam 2.38.0-SNAPSHOT)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":9};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/Cast.Widening.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/schemas/transforms/CoGroup.html" target="_top">Frames</a></li>
<li><a href="CoGroup.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.beam.sdk.schemas.transforms</div>
<h2 title="Class CoGroup" class="title">Class CoGroup</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.beam.sdk.schemas.transforms.CoGroup</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre><a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.html" title="annotation in org.apache.beam.sdk.annotations">@Experimental</a>(<a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.html#value--">value</a>=<a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.Kind.html#SCHEMAS">SCHEMAS</a>)
public class <span class="typeNameLabel">CoGroup</span>
extends java.lang.Object</pre>
<div class="block">A transform that performs equijoins across multiple schema <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a>s.
<p>This transform has similarities to <a href="../../../../../../org/apache/beam/sdk/transforms/join/CoGroupByKey.html" title="class in org.apache.beam.sdk.transforms.join"><code>CoGroupByKey</code></a>, however works on PCollections that
have schemas. This allows users of the transform to simply specify schema fields to join on. The
output type of the transform is <code>Row</code> that contains one row field for the key and an ITERABLE
field for each input containing the rows that joined on that key; by default the cross product is
not expanded, but the cross product can be optionally expanded. By default the key field is named
"key" (the name can be overridden using withKeyField) and has index 0. The tags in the
PCollectionTuple control the names of the value fields in the Row.
<p>For example, the following demonstrates joining three PCollections on the "user" and "country"
fields:
<pre><code> PCollection&lt;Row&gt; joined =
PCollectionTuple.of("input1", input1, "input2", input2, "input3", input3)
.apply(CoGroup.join(By.fieldNames("user", "country")));
</code></pre>
<p>In the above case, the key schema will contain the two string fields "user" and "country"; in
this case, the schemas for Input1, Input2, Input3 must all have fields named "user" and
"country". The remainder of the Row will contain three iterable of Row fields named "input1"
"input2" and "input3". This contains all inputs that came in on any of the inputs for that key.
Standard join types (inner join, outer join, etc.) can be accomplished by expanding the cross
product of these iterables in various ways.
<p>To put it in other words, the key schema is convertible to the following POJO:
<pre> @DefaultSchema(JavaFieldSchema.class)
public class JoinedKey {
public String user;
public String country;
}</pre>
<p>The value schema is convertible to the following POJO:
<pre>{@code @DefaultSchema(JavaFieldSchema.class)
public class JoinedValue {
public JoinedKey key;
// The below lists contain all values from each of the three inputs that match on the given
// key.
public Iterable<Input1Type> input1;
public Iterable<Input2Type> input2;
public Iterable<Input3Type> input3;
}
PCollection<JoinedValue> values = joined.apply(Convert.to(JoinedValue.class));
PCollection<JoinedKey> keys = values
.apply(Select.fieldNames("key"))
.apply(Convert.to(JoinedKey.class));
}</pre>
<p>It's also possible to join between different fields in two inputs, as long as the types of
those fields match. In this case, fields must be specified for every input PCollection. For
example:
<pre>{@code PCollection<Row> joined
= PCollectionTuple.of("input1Tag", input1, "input2Tag", input2)
.apply(CoGroup
.join("input1Tag", By.fieldNames("referringUser")))
.join("input2Tag", By.fieldNames("user")));
}</pre>
<p>Traditional (SQL) joins are cross-product joins. All rows that match the join condition are
combined into individual rows and returned; in fact any SQL inner joins is a subset of the
cross-product of two tables. This transform also supports the same functionality using the {@link
Impl#crossProductJoin()} method.
<p>For example, consider the SQL join: SELECT * FROM input1 INNER JOIN input2 ON input1.user =
input2.user
<p>You could express this with:
<pre>{@code
PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2)
.apply(CoGroup.join(By.fieldNames("user")).crossProductJoin();
}</pre>
<p>The schema of the output PCollection contains a nested message for each of input1 and input2.
Like above, you could use the {@link Convert} transform to convert it to the following POJO:
<pre>{@code
{@literal @}DefaultSchema(JavaFieldSchema.class)
public class JoinedValue {
public Input1Type input1;
public Input2Type input2;
}
}</pre>
<p> {@link Select#flattenedSchema()} can then be used to flatten all the subfields into one single
top-level row containing all the fields in both Input1 and Input2; this will often be combined
with a {@link Select} transform to select out the fields of interest, as the key fields will be
identical between input1 and input2.
<p>This transform also supports outer-join semantics. By default, all input PCollections must
participate fully in the join, providing inner-join semantics. This means that the join will only
produce values for "Bob" if all inputs have values for "Bob;" if even a single input does not
have a value for "Bob," an inner-join will produce no value. However, if you mark that input as
having optional participation then the join will contain values for "Bob," as long as at least
one input has a "Bob" value; null values will be added for inputs that have no "Bob" values. To
continue the SQL example:
<p>SELECT * FROM input1 LEFT OUTER JOIN input2 ON input1.user = input2.user
<p>Is equivalent to:
<pre>{@code
PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2)
.apply(CoGroup.join("input1", By.fieldNames("user").withOptionalParticipation())
.join("input2", By.fieldNames("user"))
.crossProductJoin();
}</pre>
<p>SELECT * FROM input1 RIGHT OUTER JOIN input2 ON input1.user = input2.user
<p>Is equivalent to:
<pre>{@code
PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2)
.apply(CoGroup.join("input1", By.fieldNames("user"))
.join("input2", By.fieldNames("user").withOptionalParticipation())
.crossProductJoin();
}</pre>
<p>and SELECT * FROM input1 FULL OUTER JOIN input2 ON input1.user = input2.user
<p>Is equivalent to:
<pre>{@code
PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2)
.apply(CoGroup.join("input1", By.fieldNames("user").withOptionalParticipation())
.join("input2", By.fieldNames("user").withOptionalParticipation())
.crossProductJoin();
}</pre>
<p>While the above examples use two inputs to mimic SQL's left and right join semantics, the
{@link CoGroup} transform supports any number of inputs, and optional participation can be
specified on any subset of them.
<p>Do note that cross-product joins while simpler and easier to program, can cause performance problems.</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a></span></code>
<div class="block">Defines the set of fields to extract for the join key, as well as other per-input join options.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.ExpandCrossProduct.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.ExpandCrossProduct</a></span></code>
<div class="block">A <a href="../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that calculates the cross-product join.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a></span></code>
<div class="block">The implementing PTransform.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Result.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Result</a></span></code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.html#CoGroup--">CoGroup</a></span>()</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.html#join-org.apache.beam.sdk.schemas.transforms.CoGroup.By-">join</a></span>(<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a>&nbsp;clause)</code>
<div class="block">Join all input PCollections using the same args.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.html#join-java.lang.String-org.apache.beam.sdk.schemas.transforms.CoGroup.By-">join</a></span>(java.lang.String&nbsp;tag,
<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a>&nbsp;clause)</code>
<div class="block">Specify the following join arguments (including fields to join by_ for the specified
PCollection.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="CoGroup--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>CoGroup</h4>
<pre>public&nbsp;CoGroup()</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="join-org.apache.beam.sdk.schemas.transforms.CoGroup.By-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>join</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a>&nbsp;join(<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a>&nbsp;clause)</pre>
<div class="block">Join all input PCollections using the same args.
<p>The same fields and other options are used in all input PCollections.</div>
</li>
</ul>
<a name="join-java.lang.String-org.apache.beam.sdk.schemas.transforms.CoGroup.By-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>join</h4>
<pre>public static&nbsp;<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a>&nbsp;join(java.lang.String&nbsp;tag,
<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a>&nbsp;clause)</pre>
<div class="block">Specify the following join arguments (including fields to join by_ for the specified
PCollection.
<p>Each PCollection in the input must have args specified for the join key.</div>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/Cast.Widening.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/schemas/transforms/CoGroup.html" target="_top">Frames</a></li>
<li><a href="CoGroup.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</body>
</html>