| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc --> |
| <title>CoGroup (Apache Beam 2.38.0-SNAPSHOT)</title> |
| <link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="CoGroup (Apache Beam 2.38.0-SNAPSHOT)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":9,"i1":9}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/Cast.Widening.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../index.html?org/apache/beam/sdk/schemas/transforms/CoGroup.html" target="_top">Frames</a></li> |
| <li><a href="CoGroup.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.beam.sdk.schemas.transforms</div> |
| <h2 title="Class CoGroup" class="title">Class CoGroup</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>java.lang.Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.beam.sdk.schemas.transforms.CoGroup</li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <hr> |
| <br> |
| <pre><a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.html" title="annotation in org.apache.beam.sdk.annotations">@Experimental</a>(<a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.html#value--">value</a>=<a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.Kind.html#SCHEMAS">SCHEMAS</a>) |
| public class <span class="typeNameLabel">CoGroup</span> |
| extends java.lang.Object</pre> |
| <div class="block">A transform that performs equijoins across multiple schema <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a>s. |
| |
| <p>This transform has similarities to <a href="../../../../../../org/apache/beam/sdk/transforms/join/CoGroupByKey.html" title="class in org.apache.beam.sdk.transforms.join"><code>CoGroupByKey</code></a>, however works on PCollections that |
| have schemas. This allows users of the transform to simply specify schema fields to join on. The |
| output type of the transform is <code>Row</code> that contains one row field for the key and an ITERABLE |
| field for each input containing the rows that joined on that key; by default the cross product is |
| not expanded, but the cross product can be optionally expanded. By default the key field is named |
| "key" (the name can be overridden using withKeyField) and has index 0. The tags in the |
| PCollectionTuple control the names of the value fields in the Row. |
| |
| <p>For example, the following demonstrates joining three PCollections on the "user" and "country" |
| fields: |
| |
| <pre><code> PCollection<Row> joined = |
| PCollectionTuple.of("input1", input1, "input2", input2, "input3", input3) |
| .apply(CoGroup.join(By.fieldNames("user", "country"))); |
| </code></pre> |
| |
| <p>In the above case, the key schema will contain the two string fields "user" and "country"; in |
| this case, the schemas for Input1, Input2, Input3 must all have fields named "user" and |
| "country". The remainder of the Row will contain three iterable of Row fields named "input1" |
| "input2" and "input3". This contains all inputs that came in on any of the inputs for that key. |
| Standard join types (inner join, outer join, etc.) can be accomplished by expanding the cross |
| product of these iterables in various ways. |
| |
| <p>To put it in other words, the key schema is convertible to the following POJO: |
| |
| <pre> @DefaultSchema(JavaFieldSchema.class) |
| public class JoinedKey { |
| public String user; |
| public String country; |
| }</pre> |
| |
| <p>The value schema is convertible to the following POJO: |
| |
| <pre>{@code @DefaultSchema(JavaFieldSchema.class) |
| public class JoinedValue { |
| public JoinedKey key; |
| // The below lists contain all values from each of the three inputs that match on the given |
| // key. |
| public Iterable<Input1Type> input1; |
| public Iterable<Input2Type> input2; |
| public Iterable<Input3Type> input3; |
| } |
| |
| PCollection<JoinedValue> values = joined.apply(Convert.to(JoinedValue.class)); |
| |
| PCollection<JoinedKey> keys = values |
| .apply(Select.fieldNames("key")) |
| .apply(Convert.to(JoinedKey.class)); |
| }</pre> |
| |
| |
| |
| <p>It's also possible to join between different fields in two inputs, as long as the types of |
| those fields match. In this case, fields must be specified for every input PCollection. For |
| example: |
| |
| <pre>{@code PCollection<Row> joined |
| = PCollectionTuple.of("input1Tag", input1, "input2Tag", input2) |
| .apply(CoGroup |
| .join("input1Tag", By.fieldNames("referringUser"))) |
| .join("input2Tag", By.fieldNames("user"))); |
| }</pre> |
| |
| <p>Traditional (SQL) joins are cross-product joins. All rows that match the join condition are |
| combined into individual rows and returned; in fact any SQL inner joins is a subset of the |
| cross-product of two tables. This transform also supports the same functionality using the {@link |
| Impl#crossProductJoin()} method. |
| |
| <p>For example, consider the SQL join: SELECT * FROM input1 INNER JOIN input2 ON input1.user = |
| input2.user |
| |
| <p>You could express this with: |
| |
| <pre>{@code |
| PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2) |
| .apply(CoGroup.join(By.fieldNames("user")).crossProductJoin(); |
| }</pre> |
| |
| <p>The schema of the output PCollection contains a nested message for each of input1 and input2. |
| Like above, you could use the {@link Convert} transform to convert it to the following POJO: |
| |
| <pre>{@code |
| {@literal @}DefaultSchema(JavaFieldSchema.class) |
| public class JoinedValue { |
| public Input1Type input1; |
| public Input2Type input2; |
| } |
| }</pre> |
| |
| <p> {@link Select#flattenedSchema()} can then be used to flatten all the subfields into one single |
| top-level row containing all the fields in both Input1 and Input2; this will often be combined |
| with a {@link Select} transform to select out the fields of interest, as the key fields will be |
| identical between input1 and input2. |
| |
| <p>This transform also supports outer-join semantics. By default, all input PCollections must |
| participate fully in the join, providing inner-join semantics. This means that the join will only |
| produce values for "Bob" if all inputs have values for "Bob;" if even a single input does not |
| have a value for "Bob," an inner-join will produce no value. However, if you mark that input as |
| having optional participation then the join will contain values for "Bob," as long as at least |
| one input has a "Bob" value; null values will be added for inputs that have no "Bob" values. To |
| continue the SQL example: |
| |
| <p>SELECT * FROM input1 LEFT OUTER JOIN input2 ON input1.user = input2.user |
| |
| <p>Is equivalent to: |
| |
| <pre>{@code |
| PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2) |
| .apply(CoGroup.join("input1", By.fieldNames("user").withOptionalParticipation()) |
| .join("input2", By.fieldNames("user")) |
| .crossProductJoin(); |
| }</pre> |
| |
| <p>SELECT * FROM input1 RIGHT OUTER JOIN input2 ON input1.user = input2.user |
| |
| <p>Is equivalent to: |
| |
| <pre>{@code |
| PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2) |
| .apply(CoGroup.join("input1", By.fieldNames("user")) |
| .join("input2", By.fieldNames("user").withOptionalParticipation()) |
| .crossProductJoin(); |
| }</pre> |
| |
| <p>and SELECT * FROM input1 FULL OUTER JOIN input2 ON input1.user = input2.user |
| |
| <p>Is equivalent to: |
| |
| <pre>{@code |
| PCollection<Row> joined = PCollectionTuple.of("input1", input1, "input2", input2) |
| .apply(CoGroup.join("input1", By.fieldNames("user").withOptionalParticipation()) |
| .join("input2", By.fieldNames("user").withOptionalParticipation()) |
| .crossProductJoin(); |
| }</pre> |
| |
| <p>While the above examples use two inputs to mimic SQL's left and right join semantics, the |
| {@link CoGroup} transform supports any number of inputs, and optional participation can be |
| specified on any subset of them. |
| |
| <p>Do note that cross-product joins while simpler and easier to program, can cause performance problems.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== NESTED CLASS SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.class.summary"> |
| <!-- --> |
| </a> |
| <h3>Nested Class Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation"> |
| <caption><span>Nested Classes</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Class and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a></span></code> |
| <div class="block">Defines the set of fields to extract for the join key, as well as other per-input join options.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.ExpandCrossProduct.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.ExpandCrossProduct</a></span></code> |
| <div class="block">A <a href="../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that calculates the cross-product join.</div> |
| </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a></span></code> |
| <div class="block">The implementing PTransform.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Result.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Result</a></span></code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colOne" scope="col">Constructor and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.html#CoGroup--">CoGroup</a></span>()</code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.html#join-org.apache.beam.sdk.schemas.transforms.CoGroup.By-">join</a></span>(<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a> clause)</code> |
| <div class="block">Join all input PCollections using the same args.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.html#join-java.lang.String-org.apache.beam.sdk.schemas.transforms.CoGroup.By-">join</a></span>(java.lang.String tag, |
| <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a> clause)</code> |
| <div class="block">Specify the following join arguments (including fields to join by_ for the specified |
| PCollection.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.Object</h3> |
| <code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a name="CoGroup--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>CoGroup</h4> |
| <pre>public CoGroup()</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="join-org.apache.beam.sdk.schemas.transforms.CoGroup.By-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>join</h4> |
| <pre>public static <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a> join(<a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a> clause)</pre> |
| <div class="block">Join all input PCollections using the same args. |
| |
| <p>The same fields and other options are used in all input PCollections.</div> |
| </li> |
| </ul> |
| <a name="join-java.lang.String-org.apache.beam.sdk.schemas.transforms.CoGroup.By-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>join</h4> |
| <pre>public static <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.Impl.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.Impl</a> join(java.lang.String tag, |
| <a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms">CoGroup.By</a> clause)</pre> |
| <div class="block">Specify the following join arguments (including fields to join by_ for the specified |
| PCollection. |
| |
| <p>Each PCollection in the input must have args specified for the join key.</div> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/Cast.Widening.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../../org/apache/beam/sdk/schemas/transforms/CoGroup.By.html" title="class in org.apache.beam.sdk.schemas.transforms"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../index.html?org/apache/beam/sdk/schemas/transforms/CoGroup.html" target="_top">Frames</a></li> |
| <li><a href="CoGroup.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| </body> |
| </html> |