| |
| |
| <!DOCTYPE html> |
| <html class="writer-html5" lang="en" data-content_root="../"> |
| <head> |
| <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" /> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <title>Variance Optimal Sampling (VarOpt) — datasketches 0.1 documentation</title> |
| <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" /> |
| <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=e59714d7" /> |
| |
| |
| <script src="../_static/jquery.js?v=5d32c60e"></script> |
| <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script> |
| <script src="../_static/documentation_options.js?v=2709fde1"></script> |
| <script src="../_static/doctools.js?v=9bcbadda"></script> |
| <script src="../_static/sphinx_highlight.js?v=dc90522c"></script> |
| <script src="../_static/js/theme.js"></script> |
| <link rel="index" title="Index" href="../genindex.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="Exact and Bounded, Probabilitiy Proportional to Size (EBPPS) Sampling" href="ebpps.html" /> |
| <link rel="prev" title="Random Sampling Sketches" href="index.html" /> |
| </head> |
| |
| <body class="wy-body-for-nav"> |
| <div class="wy-grid-for-nav"> |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side"> |
| <div class="wy-side-scroll"> |
| <div class="wy-side-nav-search" > |
| |
| |
| |
| <a href="../index.html" class="icon icon-home"> |
| datasketches |
| </a> |
| <div role="search"> |
| <form id="rtd-search-form" class="wy-form" action="../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" /> |
| <input type="hidden" name="check_keywords" value="yes" /> |
| <input type="hidden" name="area" value="default" /> |
| </form> |
| </div> |
| </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu"> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../distinct_counting/index.html">Distinct Counting</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../quantiles/index.html">Quantiles Sketches</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../frequency/index.html">Frequency Sketches</a></li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../vector/index.html">Vector Sketches</a></li> |
| </ul> |
| <ul class="current"> |
| <li class="toctree-l1 current"><a class="reference internal" href="index.html">Random Sampling Sketches</a><ul class="current"> |
| <li class="toctree-l2 current"><a class="current reference internal" href="#">Variance Optimal Sampling (VarOpt)</a><ul> |
| <li class="toctree-l3"><a class="reference internal" href="#datasketches.var_opt_sketch"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch</span></code></a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.deserialize"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.deserialize()</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.__init__"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.__init__()</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.estimate_subset_sum"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.estimate_subset_sum</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.get_serialized_size_bytes"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.get_serialized_size_bytes</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.is_empty"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.is_empty</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.k"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.k</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.n"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.n</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.num_samples"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.num_samples</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.serialize"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.serialize</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.to_string"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.to_string</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_sketch.update"><code class="docutils literal notranslate"><span class="pre">var_opt_sketch.update</span></code></a></li> |
| </ul> |
| </li> |
| <li class="toctree-l3"><a class="reference internal" href="#datasketches.var_opt_union"><code class="docutils literal notranslate"><span class="pre">var_opt_union</span></code></a><ul> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.deserialize"><code class="docutils literal notranslate"><span class="pre">var_opt_union.deserialize()</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.__init__"><code class="docutils literal notranslate"><span class="pre">var_opt_union.__init__()</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.get_result"><code class="docutils literal notranslate"><span class="pre">var_opt_union.get_result</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.get_serialized_size_bytes"><code class="docutils literal notranslate"><span class="pre">var_opt_union.get_serialized_size_bytes</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.reset"><code class="docutils literal notranslate"><span class="pre">var_opt_union.reset</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.serialize"><code class="docutils literal notranslate"><span class="pre">var_opt_union.serialize</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.to_string"><code class="docutils literal notranslate"><span class="pre">var_opt_union.to_string</span></code></a></li> |
| <li class="toctree-l4"><a class="reference internal" href="#datasketches.var_opt_union.update"><code class="docutils literal notranslate"><span class="pre">var_opt_union.update</span></code></a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li class="toctree-l2"><a class="reference internal" href="ebpps.html">Exact and Bounded, Probabilitiy Proportional to Size (EBPPS) Sampling</a></li> |
| </ul> |
| </li> |
| </ul> |
| <ul> |
| <li class="toctree-l1"><a class="reference internal" href="../helper/index.html">Helper Classes</a></li> |
| </ul> |
| |
| </div> |
| </div> |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" > |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../index.html">datasketches</a> |
| </nav> |
| |
| <div class="wy-nav-content"> |
| <div class="rst-content"> |
| <div role="navigation" aria-label="Page navigation"> |
| <ul class="wy-breadcrumbs"> |
| <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li> |
| <li class="breadcrumb-item"><a href="index.html">Random Sampling Sketches</a></li> |
| <li class="breadcrumb-item active">Variance Optimal Sampling (VarOpt)</li> |
| <li class="wy-breadcrumbs-aside"> |
| <a href="../_sources/sampling/varopt.rst.txt" rel="nofollow"> View page source</a> |
| </li> |
| </ul> |
| <hr/> |
| </div> |
| <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> |
| <div itemprop="articleBody"> |
| |
| <section id="variance-optimal-sampling-varopt"> |
| <h1>Variance Optimal Sampling (VarOpt)<a class="headerlink" href="#variance-optimal-sampling-varopt" title="Link to this heading"></a></h1> |
| <p>A VarOpt sketch samples data from a stream of items. The sketch is desinged for optimal (minimum) |
| variance when querying the sketch to estimate subset sums of items matching a provided predicate. |
| The sketch will produce a sample of size <cite>k</cite> (or smaller if fewer items have been presented), with |
| the probability of including an item roughly corresponding it the item’s weight relative to the total |
| weight of all items presented to the sketch.</p> |
| <p>VarOpt sampling is related to reservoir sampling, with improved error bounds for subset sum estimation. |
| Feeding the sketch items with a uniform weight value will produce a sample equivalent to reservoir sampling.</p> |
| <div class="admonition note"> |
| <p class="admonition-title">Note</p> |
| <p>Serializing and deserializing this sketch requires the use of a <a class="reference internal" href="../helper/serde.html#datasketches.PyObjectSerDe" title="datasketches.PyObjectSerDe"><code class="xref py py-class docutils literal notranslate"><span class="pre">PyObjectSerDe</span></code></a>.</p> |
| </div> |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch"> |
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">var_opt_sketch</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.var_opt_sketch" title="Link to this definition"></a></dt> |
| <dd><p class="rubric">Static Methods:</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.deserialize"> |
| <span class="sig-name descname"><span class="pre">deserialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bytes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bytes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">serde</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../helper/serde.html#datasketches.PyObjectSerDe" title="_datasketches.PyObjectSerDe"><span class="pre">_datasketches.PyObjectSerDe</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datasketches.var_opt_sketch" title="_datasketches.var_opt_sketch"><span class="pre">_datasketches.var_opt_sketch</span></a></span></span><a class="headerlink" href="#datasketches.var_opt_sketch.deserialize" title="Link to this definition"></a></dt> |
| <dd><p>Reads a bytes object and returns the corresponding var opt sketch</p> |
| </dd></dl> |
| |
| <p class="rubric">Non-static Methods:</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.__init__"> |
| <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datasketches.var_opt_sketch.__init__" title="Link to this definition"></a></dt> |
| <dd><p>Creates a new Var Opt sketch instance</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters<span class="colon">:</span></dt> |
| <dd class="field-odd"><p><strong>k</strong> (<em>int</em>) – Maximum number of samples in the sketch</p> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.estimate_subset_sum"> |
| <span class="sig-name descname"><span class="pre">estimate_subset_sum</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.estimate_subset_sum" title="Link to this definition"></a></dt> |
| <dd><p>Applies a provided predicate to the sketch and returns the estimated total weight matching the predicate, as well as upper and lower bounds on the estimate and the total weight processed by the sketch</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.get_serialized_size_bytes"> |
| <span class="sig-name descname"><span class="pre">get_serialized_size_bytes</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.get_serialized_size_bytes" title="Link to this definition"></a></dt> |
| <dd><p>Computes the size in bytes needed to serialize the current sketch</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.is_empty"> |
| <span class="sig-name descname"><span class="pre">is_empty</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.is_empty" title="Link to this definition"></a></dt> |
| <dd><p>Returns True if the sketch is empty, otherwise False</p> |
| </dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.k"> |
| <em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">k</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.k" title="Link to this definition"></a></dt> |
| <dd><p>Returns the sketch’s maximum configured sample size</p> |
| </dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.n"> |
| <em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.n" title="Link to this definition"></a></dt> |
| <dd><p>Returns the total stream length</p> |
| </dd></dl> |
| |
| <dl class="py property"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.num_samples"> |
| <em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_samples</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.num_samples" title="Link to this definition"></a></dt> |
| <dd><p>Returns the number of samples currently in the sketch</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.serialize"> |
| <span class="sig-name descname"><span class="pre">serialize</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.serialize" title="Link to this definition"></a></dt> |
| <dd><p>Serializes the sketch into a bytes object</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.to_string"> |
| <span class="sig-name descname"><span class="pre">to_string</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.to_string" title="Link to this definition"></a></dt> |
| <dd><p>Produces a string summary of the sketch and optionally prints the items</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_sketch.update"> |
| <span class="sig-name descname"><span class="pre">update</span></span><a class="headerlink" href="#datasketches.var_opt_sketch.update" title="Link to this definition"></a></dt> |
| <dd><p>Updates the sketch with the given value and weight</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| <dl class="py class"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union"> |
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">var_opt_union</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.var_opt_union" title="Link to this definition"></a></dt> |
| <dd><p class="rubric">Static Methods:</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.deserialize"> |
| <span class="sig-name descname"><span class="pre">deserialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bytes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bytes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">serde</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../helper/serde.html#datasketches.PyObjectSerDe" title="_datasketches.PyObjectSerDe"><span class="pre">_datasketches.PyObjectSerDe</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="#datasketches.var_opt_union" title="_datasketches.var_opt_union"><span class="pre">_datasketches.var_opt_union</span></a></span></span><a class="headerlink" href="#datasketches.var_opt_union.deserialize" title="Link to this definition"></a></dt> |
| <dd><p>Constructs a var opt union from the given bytes using the provided serde</p> |
| </dd></dl> |
| |
| <p class="rubric">Non-static Methods:</p> |
| <dl class="py method"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.__init__"> |
| <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datasketches.var_opt_union.__init__" title="Link to this definition"></a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.get_result"> |
| <span class="sig-name descname"><span class="pre">get_result</span></span><a class="headerlink" href="#datasketches.var_opt_union.get_result" title="Link to this definition"></a></dt> |
| <dd><p>Returns a sketch corresponding to the union result</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.get_serialized_size_bytes"> |
| <span class="sig-name descname"><span class="pre">get_serialized_size_bytes</span></span><a class="headerlink" href="#datasketches.var_opt_union.get_serialized_size_bytes" title="Link to this definition"></a></dt> |
| <dd><p>Computes the size in bytes needed to serialize the current union</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.reset"> |
| <span class="sig-name descname"><span class="pre">reset</span></span><a class="headerlink" href="#datasketches.var_opt_union.reset" title="Link to this definition"></a></dt> |
| <dd><p>Resets the union to the empty state</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.serialize"> |
| <span class="sig-name descname"><span class="pre">serialize</span></span><a class="headerlink" href="#datasketches.var_opt_union.serialize" title="Link to this definition"></a></dt> |
| <dd><p>Serializes the union into a bytes object with the provided serde</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.to_string"> |
| <span class="sig-name descname"><span class="pre">to_string</span></span><a class="headerlink" href="#datasketches.var_opt_union.to_string" title="Link to this definition"></a></dt> |
| <dd><p>Produces a string summary of the sketch</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt class="sig sig-object py" id="datasketches.var_opt_union.update"> |
| <span class="sig-name descname"><span class="pre">update</span></span><a class="headerlink" href="#datasketches.var_opt_union.update" title="Link to this definition"></a></dt> |
| <dd><p>Updates the union with the given sketch</p> |
| </dd></dl> |
| |
| </dd></dl> |
| |
| </section> |
| |
| |
| </div> |
| </div> |
| <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer"> |
| <a href="index.html" class="btn btn-neutral float-left" title="Random Sampling Sketches" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a> |
| <a href="ebpps.html" class="btn btn-neutral float-right" title="Exact and Bounded, Probabilitiy Proportional to Size (EBPPS) Sampling" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a> |
| </div> |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <p>© Copyright 2023.</p> |
| </div> |
| |
| Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a |
| <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a> |
| provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| |
| |
| </footer> |
| </div> |
| </div> |
| </section> |
| </div> |
| <script> |
| jQuery(function () { |
| SphinxRtdTheme.Navigation.enable(true); |
| }); |
| </script> |
| |
| </body> |
| </html> |