| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_161-google-v7) on Thu Oct 18 16:14:33 PDT 2018 --> |
| <title>ApproximateUnique (Apache Beam 2.8.0-SNAPSHOT)</title> |
| <meta name="date" content="2018-10-18"> |
| <link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="ApproximateUnique (Apache Beam 2.8.0-SNAPSHOT)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":9,"i1":9,"i2":9,"i3":9}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateQuantiles.ApproximateQuantilesCombineFn.html" title="class in org.apache.beam.sdk.transforms"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.ApproximateUniqueCombineFn.html" title="class in org.apache.beam.sdk.transforms"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/beam/sdk/transforms/ApproximateUnique.html" target="_top">Frames</a></li> |
| <li><a href="ApproximateUnique.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.beam.sdk.transforms</div> |
| <h2 title="Class ApproximateUnique" class="title">Class ApproximateUnique</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>java.lang.Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.beam.sdk.transforms.ApproximateUnique</li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">ApproximateUnique</span> |
| extends java.lang.Object</pre> |
| <div class="block"><code>PTransform</code>s for estimating the number of distinct elements in a <code>PCollection</code>, or |
| the number of distinct values associated with each key in a <code>PCollection</code> of <code>KV</code>s.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== NESTED CLASS SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.class.summary"> |
| <!-- --> |
| </a> |
| <h3>Nested Class Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation"> |
| <caption><span>Nested Classes</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Class and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static class </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.ApproximateUniqueCombineFn.html" title="class in org.apache.beam.sdk.transforms">ApproximateUnique.ApproximateUniqueCombineFn</a><<a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.ApproximateUniqueCombineFn.html" title="type parameter in ApproximateUnique.ApproximateUniqueCombineFn">T</a>></span></code> |
| <div class="block"><code>CombineFn</code> that computes an estimate of the number of distinct values that were |
| combined.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colOne" scope="col">Constructor and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#ApproximateUnique--">ApproximateUnique</a></span>()</code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static <T> org.apache.beam.sdk.transforms.ApproximateUnique.Globally<T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#globally-double-">globally</a></span>(double maximumEstimationError)</code> |
| <div class="block">Like <a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#globally-int-"><code>globally(int)</code></a>, but specifies the desired maximum estimation error instead of the |
| sample size.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>static <T> org.apache.beam.sdk.transforms.ApproximateUnique.Globally<T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#globally-int-">globally</a></span>(int sampleSize)</code> |
| <div class="block">Returns a <code>PTransform</code> that takes a <code>PCollection<T></code> and returns a <code>PCollection<Long></code> containing a single value that is an estimate of the number of distinct |
| elements in the input <code>PCollection</code>.</div> |
| </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code>static <K,V> org.apache.beam.sdk.transforms.ApproximateUnique.PerKey<K,V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#perKey-double-">perKey</a></span>(double maximumEstimationError)</code> |
| <div class="block">Like <a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#perKey-int-"><code>perKey(int)</code></a>, but specifies the desired maximum estimation error instead of the |
| sample size.</div> |
| </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code>static <K,V> org.apache.beam.sdk.transforms.ApproximateUnique.PerKey<K,V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#perKey-int-">perKey</a></span>(int sampleSize)</code> |
| <div class="block">Returns a <code>PTransform</code> that takes a <code>PCollection<KV<K, V>></code> and returns a <code>PCollection<KV<K, Long>></code> that contains an output element mapping each distinct key in the |
| input <code>PCollection</code> to an estimate of the number of distinct values associated with that |
| key in the input <code>PCollection</code>.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.Object</h3> |
| <code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a name="ApproximateUnique--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>ApproximateUnique</h4> |
| <pre>public ApproximateUnique()</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="globally-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>globally</h4> |
| <pre>public static <T> org.apache.beam.sdk.transforms.ApproximateUnique.Globally<T> globally(int sampleSize)</pre> |
| <div class="block">Returns a <code>PTransform</code> that takes a <code>PCollection<T></code> and returns a <code>PCollection<Long></code> containing a single value that is an estimate of the number of distinct |
| elements in the input <code>PCollection</code>. |
| |
| <p>The <code>sampleSize</code> parameter controls the estimation error. The error is about <code>2 |
| / sqrt(sampleSize)</code>, so for <code>ApproximateUnique.globally(10000)</code> the estimation error is |
| about 2%. Similarly, for <code>ApproximateUnique.of(16)</code> the estimation error is about 50%. If |
| there are fewer than <code>sampleSize</code> distinct elements then the returned result will be |
| exact with extremely high probability (the chance of a hash collision is about <code>sampleSize^2 / 2^65</code>). |
| |
| <p>This transform approximates the number of elements in a set by computing the top <code>sampleSize</code> hash values, and using that to extrapolate the size of the entire set of hash |
| values by assuming the rest of the hash values are as densely distributed as the top <code>sampleSize</code>. |
| |
| <p>See also <a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#globally-double-"><code>globally(double)</code></a>. |
| |
| <p>Example of use: |
| |
| <pre><code> |
| PCollection<String> pc = ...; |
| PCollection<Long> approxNumDistinct = |
| pc.apply(ApproximateUnique.<String>globally(1000)); |
| </code></pre></div> |
| <dl> |
| <dt><span class="paramLabel">Type Parameters:</span></dt> |
| <dd><code>T</code> - the type of the elements in the input <code>PCollection</code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>sampleSize</code> - the number of entries in the statistical sample; the higher this number, the |
| more accurate the estimate will be; should be <code>>= 16</code></dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code>java.lang.IllegalArgumentException</code> - if the <code>sampleSize</code> argument is too small</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="globally-double-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>globally</h4> |
| <pre>public static <T> org.apache.beam.sdk.transforms.ApproximateUnique.Globally<T> globally(double maximumEstimationError)</pre> |
| <div class="block">Like <a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#globally-int-"><code>globally(int)</code></a>, but specifies the desired maximum estimation error instead of the |
| sample size.</div> |
| <dl> |
| <dt><span class="paramLabel">Type Parameters:</span></dt> |
| <dd><code>T</code> - the type of the elements in the input <code>PCollection</code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>maximumEstimationError</code> - the maximum estimation error, which should be in the range <code>[0.01, 0.5]</code></dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code>java.lang.IllegalArgumentException</code> - if the <code>maximumEstimationError</code> argument is out of range</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="perKey-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>perKey</h4> |
| <pre>public static <K,V> org.apache.beam.sdk.transforms.ApproximateUnique.PerKey<K,V> perKey(int sampleSize)</pre> |
| <div class="block">Returns a <code>PTransform</code> that takes a <code>PCollection<KV<K, V>></code> and returns a <code>PCollection<KV<K, Long>></code> that contains an output element mapping each distinct key in the |
| input <code>PCollection</code> to an estimate of the number of distinct values associated with that |
| key in the input <code>PCollection</code>. |
| |
| <p>See <a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#globally-int-"><code>globally(int)</code></a> for an explanation of the <code>sampleSize</code> parameter. A |
| separate sampling is computed for each distinct key of the input. |
| |
| <p>See also <a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#perKey-double-"><code>perKey(double)</code></a>. |
| |
| <p>Example of use: |
| |
| <pre><code> |
| PCollection<KV<Integer, String>> pc = ...; |
| PCollection<KV<Integer, Long>> approxNumDistinctPerKey = |
| pc.apply(ApproximateUnique.<Integer, String>perKey(1000)); |
| </code></pre></div> |
| <dl> |
| <dt><span class="paramLabel">Type Parameters:</span></dt> |
| <dd><code>K</code> - the type of the keys in the input and output <code>PCollection</code>s</dd> |
| <dd><code>V</code> - the type of the values in the input <code>PCollection</code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>sampleSize</code> - the number of entries in the statistical sample; the higher this number, the |
| more accurate the estimate will be; should be <code>>= 16</code></dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code>java.lang.IllegalArgumentException</code> - if the <code>sampleSize</code> argument is too small</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="perKey-double-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>perKey</h4> |
| <pre>public static <K,V> org.apache.beam.sdk.transforms.ApproximateUnique.PerKey<K,V> perKey(double maximumEstimationError)</pre> |
| <div class="block">Like <a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html#perKey-int-"><code>perKey(int)</code></a>, but specifies the desired maximum estimation error instead of the |
| sample size.</div> |
| <dl> |
| <dt><span class="paramLabel">Type Parameters:</span></dt> |
| <dd><code>K</code> - the type of the keys in the input and output <code>PCollection</code>s</dd> |
| <dd><code>V</code> - the type of the values in the input <code>PCollection</code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>maximumEstimationError</code> - the maximum estimation error, which should be in the range <code>[0.01, 0.5]</code></dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code>java.lang.IllegalArgumentException</code> - if the <code>maximumEstimationError</code> argument is out of range</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateQuantiles.ApproximateQuantilesCombineFn.html" title="class in org.apache.beam.sdk.transforms"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.ApproximateUniqueCombineFn.html" title="class in org.apache.beam.sdk.transforms"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/beam/sdk/transforms/ApproximateUnique.html" target="_top">Frames</a></li> |
| <li><a href="ApproximateUnique.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| </body> |
| </html> |