blob: 4640011d5e493f3621888f9ab11104294bffeb12 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc -->
<title>TextRowCountEstimator (Apache Beam 2.38.0-SNAPSHOT)</title>
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="TextRowCountEstimator (Apache Beam 2.38.0-SNAPSHOT)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":10,"i2":6,"i3":6,"i4":6,"i5":6,"i6":6,"i7":6,"i8":6};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/beam/sdk/io/TextIO.Write.html" title="class in org.apache.beam.sdk.io"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.Builder.html" title="class in org.apache.beam.sdk.io"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/beam/sdk/io/TextRowCountEstimator.html" target="_top">Frames</a></li>
<li><a href="TextRowCountEstimator.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.beam.sdk.io</div>
<h2 title="Class TextRowCountEstimator" class="title">Class TextRowCountEstimator</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.beam.sdk.io.TextRowCountEstimator</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre>public abstract class <span class="typeNameLabel">TextRowCountEstimator</span>
extends java.lang.Object</pre>
<div class="block">This returns a row count estimation for files associated with a file pattern.</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.Builder.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.Builder</a></span></code>
<div class="block">Builder for <a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html" title="class in org.apache.beam.sdk.io"><code>TextRowCountEstimator</code></a>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.LimitNumberOfFiles.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.LimitNumberOfFiles</a></span></code>
<div class="block">This strategy stops sampling if we sample enough number of bytes.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.LimitNumberOfTotalBytes.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.LimitNumberOfTotalBytes</a></span></code>
<div class="block">This strategy stops sampling when total number of sampled bytes are more than some threshold.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.NoEstimationException.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.NoEstimationException</a></span></code>
<div class="block">An exception that will be thrown if the estimator cannot get an estimation of the number of
lines.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.SampleAllFiles.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.SampleAllFiles</a></span></code>
<div class="block">This strategy samples all the files.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static interface&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.SamplingStrategy.html" title="interface in org.apache.beam.sdk.io">TextRowCountEstimator.SamplingStrategy</a></span></code>
<div class="block">Sampling Strategy shows us when should we stop reading further files.</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#TextRowCountEstimator--">TextRowCountEstimator</a></span>()</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static <a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.Builder.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.Builder</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#builder--">builder</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>java.lang.Double</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#estimateRowCount-org.apache.beam.sdk.options.PipelineOptions-">estimateRowCount</a></span>(<a href="../../../../../org/apache/beam/sdk/options/PipelineOptions.html" title="interface in org.apache.beam.sdk.options">PipelineOptions</a>&nbsp;pipelineOptions)</code>
<div class="block">Estimates the number of non empty rows.</div>
</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/beam/sdk/io/Compression.html" title="enum in org.apache.beam.sdk.io">Compression</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#getCompression--">getCompression</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>abstract byte[]</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#getDelimiters--">getDelimiters</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/beam/sdk/io/FileIO.ReadMatches.DirectoryTreatment.html" title="enum in org.apache.beam.sdk.io">FileIO.ReadMatches.DirectoryTreatment</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#getDirectoryTreatment--">getDirectoryTreatment</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i5" class="rowColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/beam/sdk/io/fs/EmptyMatchTreatment.html" title="enum in org.apache.beam.sdk.io.fs">EmptyMatchTreatment</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#getEmptyMatchTreatment--">getEmptyMatchTreatment</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i6" class="altColor">
<td class="colFirst"><code>abstract java.lang.String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#getFilePattern--">getFilePattern</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i7" class="rowColor">
<td class="colFirst"><code>abstract long</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#getNumSampledBytesPerFile--">getNumSampledBytesPerFile</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i8" class="altColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.SamplingStrategy.html" title="interface in org.apache.beam.sdk.io">TextRowCountEstimator.SamplingStrategy</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.html#getSamplingStrategy--">getSamplingStrategy</a></span>()</code>&nbsp;</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="TextRowCountEstimator--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>TextRowCountEstimator</h4>
<pre>public&nbsp;TextRowCountEstimator()</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="getNumSampledBytesPerFile--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getNumSampledBytesPerFile</h4>
<pre>public abstract&nbsp;long&nbsp;getNumSampledBytesPerFile()</pre>
</li>
</ul>
<a name="getDelimiters--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getDelimiters</h4>
<pre>public abstract&nbsp;byte[]&nbsp;getDelimiters()</pre>
</li>
</ul>
<a name="getFilePattern--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getFilePattern</h4>
<pre>public abstract&nbsp;java.lang.String&nbsp;getFilePattern()</pre>
</li>
</ul>
<a name="getCompression--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getCompression</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/beam/sdk/io/Compression.html" title="enum in org.apache.beam.sdk.io">Compression</a>&nbsp;getCompression()</pre>
</li>
</ul>
<a name="getSamplingStrategy--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getSamplingStrategy</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.SamplingStrategy.html" title="interface in org.apache.beam.sdk.io">TextRowCountEstimator.SamplingStrategy</a>&nbsp;getSamplingStrategy()</pre>
</li>
</ul>
<a name="getEmptyMatchTreatment--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getEmptyMatchTreatment</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/beam/sdk/io/fs/EmptyMatchTreatment.html" title="enum in org.apache.beam.sdk.io.fs">EmptyMatchTreatment</a>&nbsp;getEmptyMatchTreatment()</pre>
</li>
</ul>
<a name="getDirectoryTreatment--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getDirectoryTreatment</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/beam/sdk/io/FileIO.ReadMatches.DirectoryTreatment.html" title="enum in org.apache.beam.sdk.io">FileIO.ReadMatches.DirectoryTreatment</a>&nbsp;getDirectoryTreatment()</pre>
</li>
</ul>
<a name="builder--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>builder</h4>
<pre>public static&nbsp;<a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.Builder.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.Builder</a>&nbsp;builder()</pre>
</li>
</ul>
<a name="estimateRowCount-org.apache.beam.sdk.options.PipelineOptions-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>estimateRowCount</h4>
<pre>public&nbsp;java.lang.Double&nbsp;estimateRowCount(<a href="../../../../../org/apache/beam/sdk/options/PipelineOptions.html" title="interface in org.apache.beam.sdk.options">PipelineOptions</a>&nbsp;pipelineOptions)
throws java.io.IOException,
<a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.NoEstimationException.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.NoEstimationException</a></pre>
<div class="block">Estimates the number of non empty rows. It samples NumSampledBytesPerFile bytes from every file
until the condition in sampling strategy is met. Then it takes the average line size of the
rows and divides the total file sizes by that number. If all the sampled rows are empty, and it
has not sampled all the lines (due to sampling strategy) it throws Exception.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>Number of estimated rows.</dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.NoEstimationException.html" title="class in org.apache.beam.sdk.io">TextRowCountEstimator.NoEstimationException</a></code> - if all the sampled
lines are empty and we have not read all the lines in the matched files.</dd>
<dd><code>java.io.IOException</code></dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/beam/sdk/io/TextIO.Write.html" title="class in org.apache.beam.sdk.io"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../../org/apache/beam/sdk/io/TextRowCountEstimator.Builder.html" title="class in org.apache.beam.sdk.io"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/beam/sdk/io/TextRowCountEstimator.html" target="_top">Frames</a></li>
<li><a href="TextRowCountEstimator.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</body>
</html>