| <!DOCTYPE html> |
| <html class="writer-html5" lang="en" > |
| <head> |
| <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <title>Frequency Estimation — datasketches 0.1 documentation</title> |
| <link rel="stylesheet" href="_static/pygments.css" type="text/css" /> |
| <link rel="stylesheet" href="_static/css/theme.css" type="text/css" /> |
| <!--[if lt IE 9]> |
| <script src="_static/js/html5shiv.min.js"></script> |
| <![endif]--> |
| |
| <script src="_static/jquery.js"></script> |
| <script src="_static/_sphinx_javascript_frameworks_compat.js"></script> |
| <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script> |
| <script src="_static/doctools.js"></script> |
| <script src="_static/sphinx_highlight.js"></script> |
| <script src="_static/js/theme.js"></script> |
| <link rel="index" title="Index" href="genindex.html" /> |
| <link rel="search" title="Search" href="search.html" /> |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| <div class="wy-grid-for-nav"> |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="index.html" class="icon icon-home"> |
| datasketches |
| </a> |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu"> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="hyper_log_log.html">HyperLogLog</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="cpc.html">Compressed Probabilistic Counting (CPC)</a></li> |
| <li class="toctree-l1"><a class="reference internal" href="theta.html">Theta Sketch</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="kll.html">KLL Sketch</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="frequent_items.html">Frequent Items</a></li> |
| </ul> |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" > |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="index.html">datasketches</a> |
| </nav> |
| |
| <div class="wy-nav-content"> |
| <div class="rst-content"> |
| <div role="navigation" aria-label="Page navigation"> |
| <ul class="wy-breadcrumbs"> |
| <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li> |
| <li class="breadcrumb-item active">Frequency Estimation</li> |
| <li class="wy-breadcrumbs-aside"> |
| <a href="_sources/count_min.rst.txt" rel="nofollow"> View page source</a> |
| </li> |
| </ul> |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <section id="frequency-estimation"> |
| <h1>Frequency Estimation<a class="headerlink" href="#frequency-estimation" title="Permalink to this heading"></a></h1> |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch"> |
| <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">_datasketches.</span></span><span class="sig-name descname"><span class="pre">count_min_sketch</span></span><a class="headerlink" href="#datasketches.count_min_sketch" title="Permalink to this definition"></a></dt> |
| <dd><dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.deserialize"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">deserialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bytes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></span><a class="headerlink" href="#datasketches.count_min_sketch.deserialize" title="Permalink to this definition"></a></dt> |
| <dd><p>Reads a bytes object and returns the corresponding count_min_sketch</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_estimate"> |
| <span class="sig-name descname"><span class="pre">get_estimate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.count_min_sketch.get_estimate" title="Permalink to this definition"></a></dt> |
| <dd><p>Overloaded function.</p> |
| <ol class="arabic simple"> |
| <li><p>get_estimate(self: _datasketches.count_min_sketch, item: int) -> float</p></li> |
| </ol> |
| <p>Returns an estimate of the frequency of the provided 64-bit integer value</p> |
| <ol class="arabic simple" start="2"> |
| <li><p>get_estimate(self: _datasketches.count_min_sketch, item: str) -> float</p></li> |
| </ol> |
| <p>Returns an estimate of the frequency of the provided string</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_lower_bound"> |
| <span class="sig-name descname"><span class="pre">get_lower_bound</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.count_min_sketch.get_lower_bound" title="Permalink to this definition"></a></dt> |
| <dd><p>Overloaded function.</p> |
| <ol class="arabic simple"> |
| <li><p>get_lower_bound(self: _datasketches.count_min_sketch, item: int) -> float</p></li> |
| </ol> |
| <p>Returns an lower bound on the estimate for the given 64-bit integer value</p> |
| <ol class="arabic simple" start="2"> |
| <li><p>get_lower_bound(self: _datasketches.count_min_sketch, item: str) -> float</p></li> |
| </ol> |
| <p>Returns an lower bound on the estimate for the provided string</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_num_buckets"> |
| <span class="sig-name descname"><span class="pre">get_num_buckets</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.get_num_buckets" title="Permalink to this definition"></a></dt> |
| <dd><p>Returns the configured number of buckets for the sketch</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_num_hashes"> |
| <span class="sig-name descname"><span class="pre">get_num_hashes</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.get_num_hashes" title="Permalink to this definition"></a></dt> |
| <dd><p>Returns the configured number of hashes for the sketch</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_relative_error"> |
| <span class="sig-name descname"><span class="pre">get_relative_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">float</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.get_relative_error" title="Permalink to this definition"></a></dt> |
| <dd><p>Returns the maximum permissible error for any frequency estimate query</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_seed"> |
| <span class="sig-name descname"><span class="pre">get_seed</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.get_seed" title="Permalink to this definition"></a></dt> |
| <dd><p>Returns the base hash seed for the sketch</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_serialized_size_bytes"> |
| <span class="sig-name descname"><span class="pre">get_serialized_size_bytes</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.get_serialized_size_bytes" title="Permalink to this definition"></a></dt> |
| <dd><p>Returns the size in bytes of the serialized image of the sketch</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_total_weight"> |
| <span class="sig-name descname"><span class="pre">get_total_weight</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">float</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.get_total_weight" title="Permalink to this definition"></a></dt> |
| <dd><p>Returns the total weight currently inserted into the stream</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.get_upper_bound"> |
| <span class="sig-name descname"><span class="pre">get_upper_bound</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.count_min_sketch.get_upper_bound" title="Permalink to this definition"></a></dt> |
| <dd><p>Overloaded function.</p> |
| <ol class="arabic simple"> |
| <li><p>get_upper_bound(self: _datasketches.count_min_sketch, item: int) -> float</p></li> |
| </ol> |
| <p>Returns an upper bound on the estimate for the given 64-bit integer value</p> |
| <ol class="arabic simple" start="2"> |
| <li><p>get_upper_bound(self: _datasketches.count_min_sketch, item: str) -> float</p></li> |
| </ol> |
| <p>Returns an upper bound on the estimate for the provided string</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.is_empty"> |
| <span class="sig-name descname"><span class="pre">is_empty</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.is_empty" title="Permalink to this definition"></a></dt> |
| <dd><p>Returns True if the sketch has seen no items, otherwise False</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.merge"> |
| <span class="sig-name descname"><span class="pre">merge</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">other</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.merge" title="Permalink to this definition"></a></dt> |
| <dd><p>Merges the provided other sketch into this one</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.serialize"> |
| <span class="sig-name descname"><span class="pre">serialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">bytes</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.serialize" title="Permalink to this definition"></a></dt> |
| <dd><p>Serializes the sketch into a bytes object</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.suggest_num_buckets"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">suggest_num_buckets</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">relative_error</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.suggest_num_buckets" title="Permalink to this definition"></a></dt> |
| <dd><p>Suggests the number of buckets needed to achieve an accuracy within the provided relative_error. For example, when relative_error = 0.05, the returned frequency estimates satisfy the ‘relative_error’ guarantee that never overestimates the weights but may underestimate the weights by 5% of the total weight in the sketch. Returns the number of hash buckets at every level of the sketch required in order to obtain the specified relative error.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.suggest_num_hashes"> |
| <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">suggest_num_hashes</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">confidence</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.suggest_num_hashes" title="Permalink to this definition"></a></dt> |
| <dd><p>Suggests the number of hashes needed to achieve the provided confidence. For example, with 95% confidence, frequency estimates satisfy the ‘relative_error’ guarantee. Returns the number of hash functions that are required in order to achieve the specified confidence of the sketch. confidence = 1 - delta, with delta denoting the sketch failure probability.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.to_string"> |
| <span class="sig-name descname"><span class="pre">to_string</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#datasketches.count_min_sketch" title="_datasketches.count_min_sketch"><span class="pre">_datasketches.count_min_sketch</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="headerlink" href="#datasketches.count_min_sketch.to_string" title="Permalink to this definition"></a></dt> |
| <dd><p>Produces a string summary of the sketch</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.count_min_sketch.update"> |
| <span class="sig-name descname"><span class="pre">update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.count_min_sketch.update" title="Permalink to this definition"></a></dt> |
| <dd><p>Overloaded function.</p> |
| <ol class="arabic simple"> |
| <li><p>update(self: _datasketches.count_min_sketch, item: int, weight: float = 1.0) -> None</p></li> |
| </ol> |
| <p>Updates the sketch with the given 64-bit integer value</p> |
| <ol class="arabic simple" start="2"> |
| <li><p>update(self: _datasketches.count_min_sketch, item: str, weight: float = 1.0) -> None</p></li> |
| </ol> |
| <p>Updates the sketch with the given string</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </section> |
| |
| |
| </div> |
| </div> |
| <footer> |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p>© Copyright 2023, charlie.</p> |
| </div> |
| |
| Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a |
| <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a> |
| provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| |
| </footer> |
| </div> |
| </div> |
| </section> |
| </div> |
| <script> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| </body> |
| </html> |