| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc --> |
| <title>ContextualTextIO (Apache Beam 2.38.0-SNAPSHOT)</title> |
| <link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="ContextualTextIO (Apache Beam 2.38.0-SNAPSHOT)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":9,"i1":9}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li>Prev Class</li> |
| <li><a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html" title="class in org.apache.beam.sdk.io.contextualtextio"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../index.html?org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html" target="_top">Frames</a></li> |
| <li><a href="ContextualTextIO.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.beam.sdk.io.contextualtextio</div> |
| <h2 title="Class ContextualTextIO" class="title">Class ContextualTextIO</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>java.lang.Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.beam.sdk.io.contextualtextio.ContextualTextIO</li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">ContextualTextIO</span> |
| extends java.lang.Object</pre> |
| <div class="block"><a href="../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a>s that read text files and collect contextual information of the elements in |
| the input. |
| |
| <p>Prefer <a href="../../../../../../org/apache/beam/sdk/io/TextIO.html" title="class in org.apache.beam.sdk.io"><code>TextIO</code></a> when not reading files with multi-line records or additional record |
| metadata is not required. |
| |
| <h2>Reading from text files</h2> |
| |
| <p>To read a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> from one or more text files, use <code>ContextualTextIO.read()</code>. To instantiate a transform use <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html#from-java.lang.String-"><code>ContextualTextIO.Read.from(String)</code></a> and specify the path of the file(s) to be read. |
| Alternatively, if the filenames to be read are themselves in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> you can use |
| <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html" title="class in org.apache.beam.sdk.io"><code>FileIO</code></a> to match them and <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#readFiles--"><code>readFiles()</code></a> to read them. |
| |
| <p><a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#read--"><code>read()</code></a> returns a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="../../../../../../org/apache/beam/sdk/values/Row.html" title="class in org.apache.beam.sdk.values"><code>Row</code></a>s with schema <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/RecordWithMetadata.html#getSchema--"><code>RecordWithMetadata.getSchema()</code></a>, each corresponding to one line of an input UTF-8 text file |
| (split into lines delimited by '\n', '\r', '\r\n', or specified delimiter via <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html#withDelimiter-byte:A-"><code>ContextualTextIO.Read.withDelimiter(byte[])</code></a>). |
| |
| <h3>Filepattern expansion and watching</h3> |
| |
| <p>By default, the filepatterns are expanded only once. The combination of <code>FileIO.Match#continuously(Duration, TerminationCondition)</code> and <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#readFiles--"><code>readFiles()</code></a> allow |
| streaming of new files matching the filepattern(s). |
| |
| <p>By default, <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#read--"><code>read()</code></a> prohibits filepatterns that match no files, and <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#readFiles--"><code>readFiles()</code></a> |
| allows them in case the filepattern contains a glob wildcard character. Use <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html#withEmptyMatchTreatment-org.apache.beam.sdk.io.fs.EmptyMatchTreatment-"><code>ContextualTextIO.Read.withEmptyMatchTreatment(org.apache.beam.sdk.io.fs.EmptyMatchTreatment)</code></a> or <code>FileIO.Match#withEmptyMatchTreatment(EmptyMatchTreatment)</code> plus <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#readFiles--"><code>readFiles()</code></a> to configure |
| this behavior. |
| |
| <p>Example 1: reading a file or filepattern. |
| |
| <pre><code> |
| Pipeline p = ...; |
| |
| // A simple Read of a file: |
| PCollection<Row> records = p.apply(ContextualTextIO.read().from("/local/path/to/file.txt")); |
| </code></pre> |
| |
| <p>Example 2: reading a PCollection of filenames. |
| |
| <pre><code> |
| Pipeline p = ...; |
| |
| // E.g. the filenames might be computed from other data in the pipeline, or |
| // read from a data source. |
| PCollection<String> filenames = ...; |
| |
| // Read all files in the collection. |
| PCollection<Row> records = |
| filenames |
| .apply(FileIO.matchAll()) |
| .apply(FileIO.readMatches()) |
| .apply(ContextualTextIO.readFiles()); |
| </code></pre> |
| |
| <p>Example 3: streaming new files matching a filepattern. |
| |
| <pre><code> |
| Pipeline p = ...; |
| |
| PCollection<Row> records = p.apply(ContextualTextIO.read() |
| .from("/local/path/to/files/*") |
| .watchForNewFiles( |
| // Check for new files every minute |
| Duration.standardMinutes(1), |
| // Stop watching the filepattern if no new files appear within an hour |
| afterTimeSinceNewOutput(Duration.standardHours(1)))); |
| </code></pre> |
| |
| <p>Example 4: reading a file or file pattern of RFC4180-compliant CSV files with fields that may |
| contain line breaks. |
| |
| <p>Example of such a file could be: |
| |
| <p>"aaa","b CRLF bb","ccc" CRLF zzz,yyy,xxx |
| |
| <pre><code> |
| Pipeline p = ...; |
| |
| PCollection<Row> records = p.apply(ContextualTextIO.read() |
| .from("/local/path/to/files/*.csv") |
| .withHasMultilineCSVRecords(true)); |
| </code></pre> |
| |
| <p>Example 5: reading while watching for new files |
| |
| <pre><code> |
| Pipeline p = ...; |
| |
| PCollection<Row> records = p.apply(FileIO.match() |
| .filepattern("filepattern") |
| .continuously( |
| Duration.millis(100), |
| Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3)))) |
| .apply(FileIO.readMatches()) |
| .apply(ContextualTextIO.readFiles()); |
| </code></pre> |
| |
| <p>Example 6: reading with recordNum metadata. |
| |
| <pre><code> |
| Pipeline p = ...; |
| |
| PCollection<Row> records = p.apply(ContextualTextIO.read() |
| .from("/local/path/to/files/*.csv") |
| .setWithRecordNumMetadata(true)); |
| </code></pre> |
| |
| <p>NOTE: When using <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html#withHasMultilineCSVRecords-java.lang.Boolean-"><code>ContextualTextIO.Read.withHasMultilineCSVRecords(Boolean)</code></a>, a single |
| reader will be used to process the file, rather than multiple readers which can read from |
| different offsets. For a large file this can result in lower performance. |
| |
| <p>NOTE: Use <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html#withRecordNumMetadata--"><code>ContextualTextIO.Read.withRecordNumMetadata()</code></a> when recordNum metadata is required. Computing |
| absolute record positions currently introduces a grouping step, which increases the resources |
| used by the pipeline. By default withRecordNumMetadata is set to false, in this case record |
| objects will not contain absolute record positions within the entire file, but will still contain |
| relative positions in respective offsets. |
| |
| <h3>Reading a very large number of files</h3> |
| |
| <p>If it is known that the filepattern will match a very large number of files (e.g. tens of |
| thousands or more), use <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html#withHintMatchesManyFiles--"><code>ContextualTextIO.Read.withHintMatchesManyFiles()</code></a> for better |
| performance and scalability. Note that it may decrease performance if the filepattern matches |
| only a small number of files.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== NESTED CLASS SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.class.summary"> |
| <!-- --> |
| </a> |
| <h3>Nested Class Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation"> |
| <caption><span>Nested Classes</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Class and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html" title="class in org.apache.beam.sdk.io.contextualtextio">ContextualTextIO.Read</a></span></code> |
| <div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#read--"><code>read()</code></a>.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.contextualtextio">ContextualTextIO.ReadFiles</a></span></code> |
| <div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#readFiles--"><code>readFiles()</code></a>.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html" title="class in org.apache.beam.sdk.io.contextualtextio">ContextualTextIO.Read</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#read--">read</a></span>()</code> |
| <div class="block">A <a href="../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that reads from one or more text files and returns a bounded <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> containing one <a href="../../../../../../org/apache/beam/sdk/values/Row.html" title="class in org.apache.beam.sdk.values"><code>element</code></a> for each line in the input files.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>static <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.contextualtextio">ContextualTextIO.ReadFiles</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#readFiles--">readFiles</a></span>()</code> |
| <div class="block">Like <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#read--"><code>read()</code></a>, but reads each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <code>FileIO.ReadableFile</code>, returned by <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html#readMatches--"><code>FileIO.readMatches()</code></a>.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.Object</h3> |
| <code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="read--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>read</h4> |
| <pre>public static <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html" title="class in org.apache.beam.sdk.io.contextualtextio">ContextualTextIO.Read</a> read()</pre> |
| <div class="block">A <a href="../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a> that reads from one or more text files and returns a bounded <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> containing one <a href="../../../../../../org/apache/beam/sdk/values/Row.html" title="class in org.apache.beam.sdk.values"><code>element</code></a> for each line in the input files.</div> |
| </li> |
| </ul> |
| <a name="readFiles--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>readFiles</h4> |
| <pre>public static <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.ReadFiles.html" title="class in org.apache.beam.sdk.io.contextualtextio">ContextualTextIO.ReadFiles</a> readFiles()</pre> |
| <div class="block">Like <a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html#read--"><code>read()</code></a>, but reads each file in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <code>FileIO.ReadableFile</code>, returned by <a href="../../../../../../org/apache/beam/sdk/io/FileIO.html#readMatches--"><code>FileIO.readMatches()</code></a>.</div> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li>Prev Class</li> |
| <li><a href="../../../../../../org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.Read.html" title="class in org.apache.beam.sdk.io.contextualtextio"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../../index.html?org/apache/beam/sdk/io/contextualtextio/ContextualTextIO.html" target="_top">Frames</a></li> |
| <li><a href="ContextualTextIO.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| </body> |
| </html> |