blob: 4ab56324e147bcbcfdc29712f5da7257753b4a01 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_161-google-v7) on Thu Oct 18 16:14:32 PDT 2018 -->
<title>ApproximateDistinct (Apache Beam 2.8.0-SNAPSHOT)</title>
<meta name="date" content="2018-10-18">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="ApproximateDistinct (Apache Beam 2.8.0-SNAPSHOT)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":9,"i2":9,"i3":9};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html" target="_top">Frames</a></li>
<li><a href="ApproximateDistinct.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.beam.sdk.extensions.sketching</div>
<h2 title="Class ApproximateDistinct" class="title">Class ApproximateDistinct</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.beam.sdk.extensions.sketching.ApproximateDistinct</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<hr>
<br>
<pre><a href="../../../../../../org/apache/beam/sdk/annotations/Experimental.html" title="annotation in org.apache.beam.sdk.annotations">@Experimental</a>
public final class <span class="typeNameLabel">ApproximateDistinct</span>
extends java.lang.Object</pre>
<div class="block"><a href="../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a>s for computing the approximate number of distinct elements in a stream.
<p>This class relies on the HyperLogLog algorithm, and more precisely HyperLogLog+, the improved
version of Google.
<h2>References</h2>
<p>The implementation comes from <a href="https://github.com/addthis/stream-lib">Addthis'
Stream-lib library</a>. <br>
The original paper of the HyperLogLog is available <a
href="http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf">here</a>. <br>
A paper from the same authors to have a clearer view of the algorithm is available <a
href="http://cscubs.cs.uni-bonn.de/2016/proceedings/paper-03.pdf">here</a>. <br>
Google's HyperLogLog+ version is detailed in <a
href="https://research.google.com/pubs/pub40671.html">this paper</a>.
<h2>Parameters</h2>
<p>Two parameters can be tuned in order to control the computation's accuracy:
<ul>
<li><b>Precision: <code>p</code></b> <br>
Controls the accuracy of the estimation. The precision value will have an impact on the
number of buckets used to store information about the distinct elements. <br>
In general one can expect a relative error of about <code>1.1 / sqrt(2^p)</code>. The value
should be of at least 4 to guarantee a minimal accuracy. <br>
By default, the precision is set to <code>12</code> for a relative error of around <code>2%</code>.
<li><b>Sparse Precision: <code>sp</code></b> <br>
Used to create a sparse representation in order to optimize memory and improve accuracy at
small cardinalities. <br>
The value of <code>sp</code> should be greater than <code>p</code>, but lower than 32. <br>
By default, the sparse representation is not used (<code>sp = 0</code>). One should use it if
the cardinality may be less than <code>12000</code>.
</ul>
<h2>Examples</h2>
<p>There are 2 ways of using this class:
<ul>
<li>Use the <a href="../../../../../../org/apache/beam/sdk/transforms/PTransform.html" title="class in org.apache.beam.sdk.transforms"><code>PTransform</code></a>s that return <code>PCollection&lt;Long&gt;</code> corresponding to the
estimate number of distinct elements in the input <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of objects or for
each key in a <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a> of <a href="../../../../../../org/apache/beam/sdk/values/KV.html" title="class in org.apache.beam.sdk.values"><code>KV</code></a>s.
<li>Use the <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching"><code>ApproximateDistinct.ApproximateDistinctFn</code></a> <code>CombineFn</code> that is exposed in order to make
advanced processing involving the <code>HyperLogLogPlus</code> structure which resumes the
stream.
</ul>
<h3>Using the Transforms</h3>
<h4>Example 1: globally default use</h4>
<pre><code>
PCollection&lt;Integer&gt; input = ...;
PCollection&lt;Long&gt; hllSketch = input.apply(ApproximateDistinct.&lt;Integer&gt;globally());
</code></pre>
<h4>Example 2: per key default use</h4>
<pre><code>
PCollection&lt;Integer, String&gt; input = ...;
PCollection&lt;Integer, Long&gt; hllSketches = input.apply(ApproximateDistinct
.&lt;Integer, String&gt;perKey());
</code></pre>
<h4>Example 3: tune precision and use sparse representation</h4>
<p>One can tune the precision and sparse precision parameters in order to control the accuracy
and the memory. The tuning works exactly the same for <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#globally--"><code>globally()</code></a> and <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#perKey--"><code>perKey()</code></a>.
<pre><code>
int precision = 15;
int sparsePrecision = 25;
PCollection&lt;Double&gt; input = ...;
PCollection&lt;Long&gt; hllSketch = input.apply(ApproximateDistinct
.&lt;Double&gt;globally()
.withPrecision(precision)
.withSparsePrecision(sparsePrecision));
</code></pre>
<h3>Using the <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching"><code>ApproximateDistinct.ApproximateDistinctFn</code></a> CombineFn</h3>
<p>The CombineFn does the same thing as the transform but it can be used in cases where you want
to manipulate the <code>HyperLogLogPlus</code> sketch, for example if you want to store it in a
database to have a backup. It can also be used in stateful processing or in <a href="../../../../../../org/apache/beam/sdk/transforms/CombineFns.ComposedCombineFn.html" title="class in org.apache.beam.sdk.transforms"><code>CombineFns.ComposedCombineFn</code></a>.
<h4>Example 1: basic use</h4>
<p>This example is not really interesting but show how you can properly create an <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching"><code>ApproximateDistinct.ApproximateDistinctFn</code></a>. One must always specify a coder using the <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html#create-org.apache.beam.sdk.coders.Coder-"><code>ApproximateDistinct.ApproximateDistinctFn.create(Coder)</code></a> method.
<pre><code>
PCollection&lt;Integer&gt; input = ...;
PCollection&lt;HyperLogLogPlus&gt; output = input.apply(Combine.globally(ApproximateDistinctFn
.&lt;Integer&gt;create(BigEndianIntegerCoder.of()));
</code></pre>
<h4>Example 2: use the <a href="../../../../../../org/apache/beam/sdk/transforms/Combine.CombineFn.html" title="class in org.apache.beam.sdk.transforms"><code>Combine.CombineFn</code></a> in a stateful <a href="../../../../../../org/apache/beam/sdk/transforms/ParDo.html" title="class in org.apache.beam.sdk.transforms"><code>ParDo</code></a></h4>
<p>One may want to use the <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching"><code>ApproximateDistinct.ApproximateDistinctFn</code></a> in a stateful ParDo in order to make
some processing depending on the current cardinality of the stream. <br>
For more information about stateful processing see the blog spot on this topic <a
href="https://beam.apache.org/blog/2017/02/13/stateful-processing.html">here</a>.
<p>Here is an example of <a href="../../../../../../org/apache/beam/sdk/transforms/DoFn.html" title="class in org.apache.beam.sdk.transforms"><code>DoFn</code></a> using an <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching"><code>ApproximateDistinct.ApproximateDistinctFn</code></a> as a <a href="../../../../../../org/apache/beam/sdk/state/CombiningState.html" title="interface in org.apache.beam.sdk.state"><code>CombiningState</code></a>:
<pre><code>
class StatefulCardinality&lt;V&gt; extends DoFn&lt;V, OutputT&gt; {
@StateId("hyperloglog")
private final StateSpec&lt;CombiningState&lt;V, HyperLogLogPlus, HyperLogLogPlus&gt;&gt;
indexSpec;
public StatefulCardinality(ApproximateDistinctFn&lt;V&gt; fn) {
indexSpec = StateSpecs.combining(fn);
}
@ProcessElement
public void processElement(
ProcessContext context,
@StateId("hllSketch")
CombiningState&lt;V, HyperLogLogPlus, HyperLogLogPlus&gt; hllSketch) {
long current = MoreObjects.firstNonNull(hllSketch.getAccum().cardinality(), 0L);
hllSketch.add(context.element());
context.output(...);
}
}
</code></pre>
<p>Then the <a href="../../../../../../org/apache/beam/sdk/transforms/DoFn.html" title="class in org.apache.beam.sdk.transforms"><code>DoFn</code></a> can be called like this:
<pre><code>
PCollection&lt;V&gt; input = ...;
ApproximateDistinctFn&lt;V&gt; myFn = ApproximateDistinctFn.create(input.getCoder());
PCollection&lt;V&gt; = input.apply(ParDo.of(new StatefulCardinality&lt;&gt;(myFn)));
</code></pre>
<h4>Example 3: use the <code>RetrieveCardinality</code> utility class</h4>
<p>One may want to retrieve the cardinality as a long after making some advanced processing using
the <code>HyperLogLogPlus</code> structure. <br>
The <code>RetrieveCardinality</code> utility class provides an easy way to do so:
<pre><code>
PCollection&lt;MyObject&gt; input = ...;
PCollection&lt;HyperLogLogPlus&gt; hll = input.apply(Combine.globally(ApproximateDistinctFn
.&lt;MyObject&gt;create(new MyObjectCoder())
.withSparseRepresentation(20)));
// Some advanced processing
PCollection&lt;SomeObject&gt; advancedResult = hll.apply(...);
PCollection&lt;Long&gt; cardinality = hll.apply(ApproximateDistinct.RetrieveCardinality.globally());
</code></pre>
<p><b>Warning: this class is experimental.</b> Its API is subject to change in future versions of
Beam. For example, it may be merged with the <a href="../../../../../../org/apache/beam/sdk/transforms/ApproximateUnique.html" title="class in org.apache.beam.sdk.transforms"><code>ApproximateUnique</code></a> transform.</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.ApproximateDistinctFn</a>&lt;<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="type parameter in ApproximateDistinct.ApproximateDistinctFn">InputT</a>&gt;</span></code>
<div class="block">Implements the <a href="../../../../../../org/apache/beam/sdk/transforms/Combine.CombineFn.html" title="class in org.apache.beam.sdk.transforms"><code>Combine.CombineFn</code></a> of <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html" title="class in org.apache.beam.sdk.extensions.sketching"><code>ApproximateDistinct</code></a> transforms.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.GloballyDistinct.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.GloballyDistinct</a>&lt;<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.GloballyDistinct.html" title="type parameter in ApproximateDistinct.GloballyDistinct">InputT</a>&gt;</span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#globally--"><code>globally()</code></a>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.HyperLogLogPlusCoder.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.HyperLogLogPlusCoder</a></span></code>
<div class="block">Coder for <code>HyperLogLogPlus</code> class.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static class&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.PerKeyDistinct.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.PerKeyDistinct</a>&lt;<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.PerKeyDistinct.html" title="type parameter in ApproximateDistinct.PerKeyDistinct">K</a>,<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.PerKeyDistinct.html" title="type parameter in ApproximateDistinct.PerKeyDistinct">V</a>&gt;</span></code>
<div class="block">Implementation of <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#perKey--"><code>perKey()</code></a>.</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#ApproximateDistinct--">ApproximateDistinct</a></span>()</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static &lt;InputT&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.GloballyDistinct.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.GloballyDistinct</a>&lt;InputT&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#globally--">globally</a></span>()</code>
<div class="block">Computes the approximate number of distinct elements in the input <code>PCollection&lt;InputT&gt;</code>
and returns a <code>PCollection&lt;Long&gt;</code>.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>static &lt;K,V&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.PerKeyDistinct.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.PerKeyDistinct</a>&lt;K,V&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#perKey--">perKey</a></span>()</code>
<div class="block">Like <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#globally--"><code>globally()</code></a> but per key, i.e computes the approximate number of distinct values per
key in a <code>PCollection&lt;KV&lt;K, V&gt;&gt;</code> and returns <code>PCollection&lt;KV&lt;K, Long&gt;&gt;</code>.</div>
</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>static long</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#precisionForRelativeError-double-">precisionForRelativeError</a></span>(double&nbsp;relativeError)</code>
<div class="block">Computes the precision based on the desired relative error.</div>
</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>static double</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#relativeErrorForPrecision-int-">relativeErrorForPrecision</a></span>(int&nbsp;p)</code>&nbsp;</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="ApproximateDistinct--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>ApproximateDistinct</h4>
<pre>public&nbsp;ApproximateDistinct()</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="globally--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>globally</h4>
<pre>public static&nbsp;&lt;InputT&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.GloballyDistinct.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.GloballyDistinct</a>&lt;InputT&gt;&nbsp;globally()</pre>
<div class="block">Computes the approximate number of distinct elements in the input <code>PCollection&lt;InputT&gt;</code>
and returns a <code>PCollection&lt;Long&gt;</code>.</div>
<dl>
<dt><span class="paramLabel">Type Parameters:</span></dt>
<dd><code>InputT</code> - the type of the elements in the input <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a></dd>
</dl>
</li>
</ul>
<a name="perKey--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>perKey</h4>
<pre>public static&nbsp;&lt;K,V&gt;&nbsp;<a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.PerKeyDistinct.html" title="class in org.apache.beam.sdk.extensions.sketching">ApproximateDistinct.PerKeyDistinct</a>&lt;K,V&gt;&nbsp;perKey()</pre>
<div class="block">Like <a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html#globally--"><code>globally()</code></a> but per key, i.e computes the approximate number of distinct values per
key in a <code>PCollection&lt;KV&lt;K, V&gt;&gt;</code> and returns <code>PCollection&lt;KV&lt;K, Long&gt;&gt;</code>.</div>
<dl>
<dt><span class="paramLabel">Type Parameters:</span></dt>
<dd><code>K</code> - type of the keys mapping the elements</dd>
<dd><code>V</code> - type of the values being combined per key</dd>
</dl>
</li>
</ul>
<a name="precisionForRelativeError-double-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>precisionForRelativeError</h4>
<pre>public static&nbsp;long&nbsp;precisionForRelativeError(double&nbsp;relativeError)</pre>
<div class="block">Computes the precision based on the desired relative error.
<p>According to the paper, the mean squared error is bounded by the following formula:
<pre>b(m) / sqrt(m)
Where m is the number of buckets used (<code> p = log2(m)</code>)
and <code> b(m) &lt; 1.106</code> for <code> m &gt; 16 (and p &gt; 4)</code>.
</pre>
<br>
<b>WARNING:</b> <br>
This does not mean relative error in the estimation <b>can't</b> be higher. <br>
This only means that on average the relative error will be lower than the desired relative
error. <br>
Nevertheless, the more elements arrive in the <a href="../../../../../../org/apache/beam/sdk/values/PCollection.html" title="class in org.apache.beam.sdk.values"><code>PCollection</code></a>, the lower the variation will
be. <br>
Indeed, this is like when you throw a dice millions of time: the relative frequency of each
different result <code>{1,2,3,4,5,6}</code> will get closer to <code>1/6</code>.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>relativeError</code> - the mean squared error should be in the interval ]0,1]</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>the minimum precision p in order to have the desired relative error on average.</dd>
</dl>
</li>
</ul>
<a name="relativeErrorForPrecision-int-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>relativeErrorForPrecision</h4>
<pre>public static&nbsp;double&nbsp;relativeErrorForPrecision(int&nbsp;p)</pre>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>p</code> - the precision i.e. the number of bits used for indexing the buckets</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>the Mean squared error of the Estimation of cardinality to expect for the given value
of p.</dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../../org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.ApproximateDistinctFn.html" title="class in org.apache.beam.sdk.extensions.sketching"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../index.html?org/apache/beam/sdk/extensions/sketching/ApproximateDistinct.html" target="_top">Frames</a></li>
<li><a href="ApproximateDistinct.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</body>
</html>