blob: 5bfea5e82b05683e7739816766f6dee13eb5122c [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>apache_beam.transforms.stats module &mdash; Apache Beam documentation</title>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.transforms.timeutil module" href="apache_beam.transforms.timeutil.html" />
<link rel="prev" title="apache_beam.transforms.sideinputs module" href="apache_beam.transforms.sideinputs.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Apache Beam
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.internal.html">apache_beam.internal package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.testing.html">apache_beam.testing package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.tools.html">apache_beam.tools package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.transforms.html#submodules">Submodules</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.combiners.html">apache_beam.transforms.combiners module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.core.html">apache_beam.transforms.core module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.create_source.html">apache_beam.transforms.create_source module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.deduplicate.html">apache_beam.transforms.deduplicate module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.display.html">apache_beam.transforms.display module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.environments.html">apache_beam.transforms.environments module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.external.html">apache_beam.transforms.external module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.external_java.html">apache_beam.transforms.external_java module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.external_test_py3.html">apache_beam.transforms.external_test_py3 module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.external_test_py37.html">apache_beam.transforms.external_test_py37 module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.ptransform.html">apache_beam.transforms.ptransform module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.sideinputs.html">apache_beam.transforms.sideinputs module</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">apache_beam.transforms.stats module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.timeutil.html">apache_beam.transforms.timeutil module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.transforms_keyword_only_args_test_py3.html">apache_beam.transforms.transforms_keyword_only_args_test_py3 module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.trigger.html">apache_beam.transforms.trigger module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.userstate.html">apache_beam.transforms.userstate module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.util.html">apache_beam.transforms.util module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.transforms.window.html">apache_beam.transforms.window module</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.version.html">apache_beam.version module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li><a href="apache_beam.transforms.html">apache_beam.transforms package</a> &raquo;</li>
<li>apache_beam.transforms.stats module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.transforms.stats.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="module-apache_beam.transforms.stats">
<span id="apache-beam-transforms-stats-module"></span><h1>apache_beam.transforms.stats module<a class="headerlink" href="#module-apache_beam.transforms.stats" title="Permalink to this headline"></a></h1>
<p>This module has all statistic related transforms.</p>
<dl class="class">
<dt id="apache_beam.transforms.stats.ApproximateUnique">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.stats.</code><code class="descname">ApproximateUnique</code><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateUnique"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateUnique" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.8)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>Hashes input elements and uses those to extrapolate the size of the entire
set of hash values by assuming the rest of the hash values are as densely
distributed as the sample space.</p>
<dl class="staticmethod">
<dt id="apache_beam.transforms.stats.ApproximateUnique.parse_input_params">
<em class="property">static </em><code class="descname">parse_input_params</code><span class="sig-paren">(</span><em>size=None</em>, <em>error=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateUnique.parse_input_params"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateUnique.parse_input_params" title="Permalink to this definition"></a></dt>
<dd><p>Check if input params are valid and return sample size.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>size</strong> – an int not smaller than 16, which we would use to estimate
number of unique values.</li>
<li><strong>error</strong> – max estimation error, which is a float between 0.01 and 0.50.
If error is given, sample size will be calculated from error with
_get_sample_size_from_est_error function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">sample size</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last">ValueError: If both size and error are given, or neither is given, or
values are out of range.</p>
</td>
</tr>
</tbody>
</table>
</dd></dl>
<dl class="class">
<dt id="apache_beam.transforms.stats.ApproximateUnique.Globally">
<em class="property">class </em><code class="descname">Globally</code><span class="sig-paren">(</span><em>size=None</em>, <em>error=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateUnique.Globally"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateUnique.Globally" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>Approximate.Globally approximate number of unique values</p>
<dl class="method">
<dt id="apache_beam.transforms.stats.ApproximateUnique.Globally.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateUnique.Globally.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateUnique.Globally.expand" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.transforms.stats.ApproximateUnique.PerKey">
<em class="property">class </em><code class="descname">PerKey</code><span class="sig-paren">(</span><em>size=None</em>, <em>error=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateUnique.PerKey"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateUnique.PerKey" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>Approximate.PerKey approximate number of unique values per key</p>
<dl class="method">
<dt id="apache_beam.transforms.stats.ApproximateUnique.PerKey.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateUnique.PerKey.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateUnique.PerKey.expand" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.transforms.stats.ApproximateQuantiles">
<em class="property">class </em><code class="descclassname">apache_beam.transforms.stats.</code><code class="descname">ApproximateQuantiles</code><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateQuantiles"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateQuantiles" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.8)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
<p>PTransform for getting the idea of data distribution using approximate N-tile
(e.g. quartiles, percentiles etc.) either globally or per-key.</p>
<dl class="class">
<dt id="apache_beam.transforms.stats.ApproximateQuantiles.Globally">
<em class="property">class </em><code class="descname">Globally</code><span class="sig-paren">(</span><em>num_quantiles</em>, <em>key=None</em>, <em>reverse=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateQuantiles.Globally"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateQuantiles.Globally" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform takes PCollection and returns a list whose single value is
approximate N-tiles of the input collection globally.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>num_quantiles</strong> – number of elements in the resulting quantiles values list.</li>
<li><strong>key</strong> – (optional) Key is a mapping of elements to a comparable key, similar
to the key argument of Python’s sorting methods.</li>
<li><strong>reverse</strong> – (optional) whether to order things smallest to largest, rather
than largest to smallest</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="apache_beam.transforms.stats.ApproximateQuantiles.Globally.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateQuantiles.Globally.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateQuantiles.Globally.expand" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.transforms.stats.ApproximateQuantiles.Globally.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateQuantiles.Globally.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateQuantiles.Globally.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.transforms.stats.ApproximateQuantiles.PerKey">
<em class="property">class </em><code class="descname">PerKey</code><span class="sig-paren">(</span><em>num_quantiles</em>, <em>key=None</em>, <em>reverse=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateQuantiles.PerKey"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateQuantiles.PerKey" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform" title="apache_beam.transforms.ptransform.PTransform"><code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.transforms.ptransform.PTransform</span></code></a></p>
<p>PTransform takes PCollection of KV and returns a list based on each key
whose single value is list of approximate N-tiles of the input element of
the key.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>num_quantiles</strong> – number of elements in the resulting quantiles values list.</li>
<li><strong>key</strong> – (optional) Key is a mapping of elements to a comparable key, similar
to the key argument of Python’s sorting methods.</li>
<li><strong>reverse</strong> – (optional) whether to order things smallest to largest, rather
than largest to smallest</li>
</ul>
</td>
</tr>
</tbody>
</table>
<dl class="method">
<dt id="apache_beam.transforms.stats.ApproximateQuantiles.PerKey.expand">
<code class="descname">expand</code><span class="sig-paren">(</span><em>pcoll</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateQuantiles.PerKey.expand"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateQuantiles.PerKey.expand" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.transforms.stats.ApproximateQuantiles.PerKey.display_data">
<code class="descname">display_data</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/transforms/stats.html#ApproximateQuantiles.PerKey.display_data"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.transforms.stats.ApproximateQuantiles.PerKey.display_data" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="apache_beam.transforms.timeutil.html" class="btn btn-neutral float-right" title="apache_beam.transforms.timeutil module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="apache_beam.transforms.sideinputs.html" class="btn btn-neutral float-left" title="apache_beam.transforms.sideinputs module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>