blob: d8867ed764e21c7dbc80d0074700496c45e2978d [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>KLL Sketch &mdash; datasketches 0.1 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=e59714d7" />
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=2709fde1"></script>
<script src="../_static/doctools.js?v=9bcbadda"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Relative Error Quantiles (REQ) Sketch" href="req.html" />
<link rel="prev" title="Quantiles Sketches" href="index.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
datasketches
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../distinct_counting/index.html">Distinct Counting</a></li>
</ul>
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Quantiles Sketches</a><ul class="current">
<li class="toctree-l2 current"><a class="current reference internal" href="#">KLL Sketch</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#datasketches.kll_ints_sketch"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch</span></code></a><ul>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.deserialize"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.deserialize()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_normalized_rank_error()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.__init__"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.__init__()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_cdf"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_cdf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_max_value"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_max_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_min_value"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_min_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_pmf"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_pmf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_quantile"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_quantile</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_quantiles"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_quantiles</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.get_rank"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.get_rank</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.is_empty"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.is_empty</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.is_estimation_mode"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.is_estimation_mode</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.k"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.k</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.merge"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.merge</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.n"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.n</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.normalized_rank_error</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.num_retained"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.num_retained</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.serialize"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.serialize</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.to_string"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.to_string</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_ints_sketch.update"><code class="docutils literal notranslate"><span class="pre">kll_ints_sketch.update</span></code></a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#datasketches.kll_floats_sketch"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch</span></code></a><ul>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.deserialize"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.deserialize()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_normalized_rank_error()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.__init__"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.__init__()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_cdf"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_cdf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_max_value"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_max_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_min_value"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_min_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_pmf"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_pmf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_quantile"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_quantile</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_quantiles"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_quantiles</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.get_rank"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.get_rank</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.is_empty"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.is_empty</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.is_estimation_mode"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.is_estimation_mode</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.k"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.k</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.merge"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.merge</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.n"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.n</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.normalized_rank_error</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.num_retained"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.num_retained</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.serialize"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.serialize</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.to_string"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.to_string</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_floats_sketch.update"><code class="docutils literal notranslate"><span class="pre">kll_floats_sketch.update</span></code></a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#datasketches.kll_doubles_sketch"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch</span></code></a><ul>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.deserialize"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.deserialize()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_normalized_rank_error()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.__init__"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.__init__()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_cdf"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_cdf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_max_value"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_max_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_min_value"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_min_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_pmf"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_pmf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_quantile"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_quantile</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_quantiles"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_quantiles</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.get_rank"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.get_rank</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.is_empty"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.is_empty</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.is_estimation_mode"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.is_estimation_mode</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.k"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.k</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.merge"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.merge</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.n"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.n</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.normalized_rank_error</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.num_retained"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.num_retained</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.serialize"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.serialize</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.to_string"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.to_string</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_doubles_sketch.update"><code class="docutils literal notranslate"><span class="pre">kll_doubles_sketch.update</span></code></a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#datasketches.kll_items_sketch"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch</span></code></a><ul>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.deserialize"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.deserialize()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_normalized_rank_error()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.__init__"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.__init__()</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_cdf"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_cdf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_max_value"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_max_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_min_value"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_min_value</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_pmf"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_pmf</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_quantile"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_quantile</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_quantiles"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_quantiles</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.get_rank"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.get_rank</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.is_empty"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.is_empty</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.is_estimation_mode"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.is_estimation_mode</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.k"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.k</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.merge"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.merge</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.n"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.n</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.normalized_rank_error"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.normalized_rank_error</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.num_retained"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.num_retained</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.serialize"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.serialize</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.to_string"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.to_string</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="#datasketches.kll_items_sketch.update"><code class="docutils literal notranslate"><span class="pre">kll_items_sketch.update</span></code></a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="req.html">Relative Error Quantiles (REQ) Sketch</a></li>
<li class="toctree-l2"><a class="reference internal" href="tdigest.html">t-digest</a></li>
<li class="toctree-l2"><a class="reference internal" href="quantiles_depr.html">Quantiles Sketch (Deprecated)</a></li>
</ul>
</li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../frequency/index.html">Frequency Sketches</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../vector/index.html">Vector Sketches</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../sampling/index.html">Random Sampling Sketches</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../helper/index.html">Helper Classes</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">datasketches</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="index.html">Quantiles Sketches</a></li>
<li class="breadcrumb-item active">KLL Sketch</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/quantiles/kll.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="kll-sketch">
<h1>KLL Sketch<a class="headerlink" href="#kll-sketch" title="Link to this heading"></a></h1>
<p>Implementation of a very compact quantiles sketch with lazy compaction scheme
and nearly optimal accuracy per retained item.
See <cite>Optimal Quantile Approximation in Streams</cite>.</p>
<p>This is a stochastic streaming sketch that enables near real-time analysis of the
approximate distribution of items from a very large stream in a single pass, requiring only
that the items are comparable.
The analysis is obtained using <cite>get_quantile()</cite> function or the
inverse functions <cite>get_rank()</cite>, <cite>get_pmf()</cite> (Probability Mass Function), and <cite>get_cdf()</cite>
(Cumulative Distribution Function).</p>
<p>Given an input stream of <cite>N</cite> items, the <cite>natural rank</cite> of any specific
item is defined as its index <cite>(1 to N)</cite> in inclusive mode
or <cite>(0 to N-1)</cite> in exclusive mode
in the hypothetical sorted stream of all <cite>N</cite> input items.</p>
<p>The <cite>normalized rank</cite> (<cite>rank</cite>) of any specific item is defined as its
<cite>natural rank</cite> divided by <cite>N</cite>.
Thus, the <cite>normalized rank</cite> is between zero and one.
In the documentation for this sketch <cite>natural rank</cite> is never used so any
reference to just <cite>rank</cite> should be interpreted to mean <cite>normalized rank</cite>.</p>
<p>This sketch is configured with a parameter <cite>k</cite>, which affects the size of the sketch
and its estimation error.</p>
<p>The estimation error is commonly called <cite>epsilon</cite> (or <cite>eps</cite>) and is a fraction
between zero and one. Larger values of <cite>k</cite> result in smaller values of <cite>epsilon</cite>.
Epsilon is always with respect to the rank and cannot be applied to the
corresponding items.</p>
<p>The relationship between the <cite>normalized rank</cite> and the corresponding items can be viewed
as a two-dimensional monotonic plot with the <cite>normalized rank</cite> on one axis and the
corresponding items on the other axis. If the y-axis is specified as the item-axis and
the x-axis as the <cite>normalized rank</cite>, then <cite>y = get_quantile(x)</cite> is a monotonically
increasing function.</p>
<p>The function <cite>get_quantile(rank)</cite> translates ranks into
corresponding quantiles. The functions <cite>get_rank(item)</cite>,
<cite>get_cdf(…)</cite> (Cumulative Distribution Function), and <cite>get_pmf(…)</cite>
(Probability Mass Function) perform the opposite operation and translate items into ranks.</p>
<p>The <cite>get_pmf(…)</cite> function has about 13 to 47% worse rank error (depending
on <cite>k</cite>) than the other queries because the mass of each “bin” of the PMF has
“double-sided” error from the upper and lower edges of the bin as a result of a subtraction,
as the errors from the two edges can sometimes add.</p>
<p>The default <cite>k</cite> of 200 yields a “single-sided” <cite>epsilon</cite> of about 1.33% and a
“double-sided” (PMF) <cite>epsilon</cite> of about 1.65%.</p>
<p>A <cite>get_quantile(rank)</cite> query has the following guarantees:
- Let <cite>q = get_quantile(r)</cite> where <cite>r</cite> is the rank between zero and one.
- The quantile <cite>q</cite> will be an item from the input stream.
- Let <cite>true_rank</cite> be the true rank of <cite>q</cite> derived from the hypothetical sorted
stream of all <cite>N</cite> items.
- Let <cite>eps = get_normalized_rank_error(false)</cite>.
- Then <cite>r - eps ≤ true_rank ≤ r + eps</cite> with a confidence of 99%. Note that the
error is on the rank, not the quantile.</p>
<p>A <cite>get_rank(item)</cite> query has the following guarantees:
- Let <cite>r = get_rank(i)</cite> where <cite>i</cite> is an item between the min and max items of
the input stream.
- Let <cite>true_rank</cite> be the true rank of <cite>i</cite> derived from the hypothetical sorted
stream of all <cite>N</cite> items.
- Let <cite>eps = get_normalized_rank_error(false)</cite>.
- Then <cite>r - eps ≤ true_rank ≤ r + eps</cite> with a confidence of 99%.</p>
<p>A <cite>get_pmf()</cite> query has the following guarantees:
- Let <cite>{r1, r2, …, r(m+1)} = get_pmf(s1, s2, …, sm)</cite> where <cite>s1, s2</cite> are
split points (items from the input domain) between the min and max items of
the input stream.
- Let <cite>mass_i = estimated mass between s_i and s_i+1</cite>.
- Let <cite>true_mass</cite> be the true mass between the items of <cite>s_i</cite>,
<cite>s_i+1</cite> derived from the hypothetical sorted stream of all <cite>N</cite> items.
- Let <cite>eps = get_normalized_rank_error(true)</cite>.
- then <cite>mass - eps ≤ true_mass ≤ mass + eps</cite> with a confidence of 99%.
- <cite>r(m+1)</cite> includes the mass of all points larger than <cite>s_m</cite>.</p>
<p>A <cite>get_cdf(…)</cite> query has the following guarantees;
- Let <cite>{r1, r2, …, r(m+1)} = get_cdf(s1, s2, …, sm)</cite> where <cite>s1, s2, …</cite> are
split points (items from the input domain) between the min and max items of
the input stream.
- Let <cite>mass_i = r_(i+1) - r_i</cite>.
- Let <cite>true_mass</cite> be the true mass between the true ranks of <cite>s_i</cite>,
<cite>s_i+1</cite> derived from the hypothetical sorted stream of all <cite>N</cite> items.
- Let <cite>eps = get_normalized_rank_error(true)</cite>.
- then <cite>mass - eps ≤ true_mass ≤ mass + eps</cite> with a confidence of 99%.
- <cite>1 - r(m+1)</cite> includes the mass of all points larger than <cite>s_m</cite>.</p>
<p>From the above, it might seem like we could make some estimates to bound the
<cite>item</cite> returned from a call to <cite>get_quantile()</cite>. The sketch, however, does not
let us derive error bounds or confidences around items. Because errors are independent, we
can approximately bracket a value as shown below, but there are no error estimates available.
Additionally, the interval may be quite large for certain distributions.
- Let <cite>q = get_quantile(r)</cite>, the estimated quantile of rank <cite>r</cite>.
- Let <cite>eps = get_normalized_rank_error(false)</cite>.
- Let <cite>q_lo = estimated quantile of rank (r - eps)</cite>.
- Let <cite>q_hi = estimated quantile of rank (r + eps)</cite>.
- Then <cite>q_lo ≤ q ≤ q_hi</cite>, with 99% confidence.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>For the <a class="reference internal" href="#datasketches.kll_items_sketch" title="datasketches.kll_items_sketch"><code class="xref py py-class docutils literal notranslate"><span class="pre">kll_items_sketch</span></code></a>, objects must be comparable with <code class="docutils literal notranslate"><span class="pre">__lt__</span></code>.</p>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Serializing and deserializing a <a class="reference internal" href="#datasketches.kll_items_sketch" title="datasketches.kll_items_sketch"><code class="xref py py-class docutils literal notranslate"><span class="pre">kll_items_sketch</span></code></a> requires the use of a <a class="reference internal" href="../helper/serde.html#datasketches.PyObjectSerDe" title="datasketches.PyObjectSerDe"><code class="xref py py-class docutils literal notranslate"><span class="pre">PyObjectSerDe</span></code></a>.</p>
</div>
<dl class="py class">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch">
<em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">kll_ints_sketch</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.kll_ints_sketch" title="Link to this definition"></a></dt>
<dd><p class="rubric">Static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.deserialize">
<span class="sig-name descname"><span class="pre">deserialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bytes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bytes</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datasketches.kll_ints_sketch" title="_datasketches.kll_ints_sketch"><span class="pre">_datasketches.kll_ints_sketch</span></a></span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.deserialize" title="Link to this definition"></a></dt>
<dd><p>Deserializes the sketch from a bytes object.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_normalized_rank_error">
<span class="sig-name descname"><span class="pre">get_normalized_rank_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">as_pmf</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">float</span></span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error given parameters k and the pmf flag.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<p class="rubric">Non-static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">200</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.__init__" title="Link to this definition"></a></dt>
<dd><p>Creates a KLL sketch instance with the given value of k.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>k</strong> (<em>int</em><em>, </em><em>optional</em>) – Controls the size/accuracy trade-off of the sketch. Default is 200.</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_cdf">
<span class="sig-name descname"><span class="pre">get_cdf</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_cdf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analog of the PMF, of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_max_value">
<span class="sig-name descname"><span class="pre">get_max_value</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_max_value" title="Link to this definition"></a></dt>
<dd><p>Returns the maximum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_min_value">
<span class="sig-name descname"><span class="pre">get_min_value</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_min_value" title="Link to this definition"></a></dt>
<dd><p>Returns the minimum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_pmf">
<span class="sig-name descname"><span class="pre">get_pmf</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_pmf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_quantile">
<span class="sig-name descname"><span class="pre">get_quantile</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_quantile" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the data value associated with the given normalized rank in a hypothetical sorted version of the input stream so far.
For kll_floats_sketch: if the sketch is empty this returns nan. For kll_ints_sketch: if the sketch is empty this throws a RuntimeError.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_quantiles">
<span class="sig-name descname"><span class="pre">get_quantiles</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_quantiles" title="Link to this definition"></a></dt>
<dd><p>This returns an array that could have been generated by using get_quantile() for each normalized rank separately.
If the sketch is empty this returns an empty vector.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.get_rank">
<span class="sig-name descname"><span class="pre">get_rank</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.get_rank" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.
The resulting approximation has a probabilistic guarantee that can be obtained from the get_normalized_rank_error(False) function.
With the parameter inclusive=true the weight of the given value is included into the rank.Otherwise the rank equals the sum of the weights of values less than the given value.
If the sketch is empty this returns nan.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.is_empty">
<span class="sig-name descname"><span class="pre">is_empty</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.is_empty" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is empty, otherwise False</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.is_estimation_mode">
<span class="sig-name descname"><span class="pre">is_estimation_mode</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.is_estimation_mode" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is in estimation mode, otherwise False</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.k">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">k</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.k" title="Link to this definition"></a></dt>
<dd><p>The configured parameter k</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.merge">
<span class="sig-name descname"><span class="pre">merge</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.merge" title="Link to this definition"></a></dt>
<dd><p>Merges the provided sketch into this one</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.n">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.n" title="Link to this definition"></a></dt>
<dd><p>The length of the input stream</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.normalized_rank_error">
<span class="sig-name descname"><span class="pre">normalized_rank_error</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error for this sketch.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.num_retained">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_retained</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.num_retained" title="Link to this definition"></a></dt>
<dd><p>The number of retained items (samples) in the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.serialize">
<span class="sig-name descname"><span class="pre">serialize</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.serialize" title="Link to this definition"></a></dt>
<dd><p>Serializes the sketch into a bytes object.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.to_string">
<span class="sig-name descname"><span class="pre">to_string</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.to_string" title="Link to this definition"></a></dt>
<dd><p>Produces a string summary of the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_ints_sketch.update">
<span class="sig-name descname"><span class="pre">update</span></span><a class="headerlink" href="#datasketches.kll_ints_sketch.update" title="Link to this definition"></a></dt>
<dd><p>Overloaded function.</p>
<ol class="arabic simple">
<li><p><code class="docutils literal notranslate"><span class="pre">update(self,</span> <span class="pre">item:</span> <span class="pre">int)</span> <span class="pre">-&gt;</span> <span class="pre">None</span></code></p></li>
</ol>
<p>Updates the sketch with the given value</p>
<ol class="arabic simple" start="2">
<li><p><code class="docutils literal notranslate"><span class="pre">update(self,</span> <span class="pre">array:</span> <span class="pre">ndarray[dtype=int32])</span> <span class="pre">-&gt;</span> <span class="pre">None</span></code></p></li>
</ol>
<p>Updates the sketch with the values in the given array</p>
</dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch">
<em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">kll_floats_sketch</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.kll_floats_sketch" title="Link to this definition"></a></dt>
<dd><p class="rubric">Static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.deserialize">
<span class="sig-name descname"><span class="pre">deserialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bytes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bytes</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datasketches.kll_floats_sketch" title="_datasketches.kll_floats_sketch"><span class="pre">_datasketches.kll_floats_sketch</span></a></span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.deserialize" title="Link to this definition"></a></dt>
<dd><p>Deserializes the sketch from a bytes object.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_normalized_rank_error">
<span class="sig-name descname"><span class="pre">get_normalized_rank_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">as_pmf</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">float</span></span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error given parameters k and the pmf flag.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<p class="rubric">Non-static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">200</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.__init__" title="Link to this definition"></a></dt>
<dd><p>Creates a KLL sketch instance with the given value of k.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>k</strong> (<em>int</em><em>, </em><em>optional</em>) – Controls the size/accuracy trade-off of the sketch. Default is 200.</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_cdf">
<span class="sig-name descname"><span class="pre">get_cdf</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_cdf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analog of the PMF, of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_max_value">
<span class="sig-name descname"><span class="pre">get_max_value</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_max_value" title="Link to this definition"></a></dt>
<dd><p>Returns the maximum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_min_value">
<span class="sig-name descname"><span class="pre">get_min_value</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_min_value" title="Link to this definition"></a></dt>
<dd><p>Returns the minimum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_pmf">
<span class="sig-name descname"><span class="pre">get_pmf</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_pmf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_quantile">
<span class="sig-name descname"><span class="pre">get_quantile</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_quantile" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the data value associated with the given normalized rank in a hypothetical sorted version of the input stream so far.
For kll_floats_sketch: if the sketch is empty this returns nan. For kll_ints_sketch: if the sketch is empty this throws a RuntimeError.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_quantiles">
<span class="sig-name descname"><span class="pre">get_quantiles</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_quantiles" title="Link to this definition"></a></dt>
<dd><p>This returns an array that could have been generated by using get_quantile() for each normalized rank separately.
If the sketch is empty this returns an empty vector.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.get_rank">
<span class="sig-name descname"><span class="pre">get_rank</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.get_rank" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.
The resulting approximation has a probabilistic guarantee that can be obtained from the get_normalized_rank_error(False) function.
With the parameter inclusive=true the weight of the given value is included into the rank.Otherwise the rank equals the sum of the weights of values less than the given value.
If the sketch is empty this returns nan.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.is_empty">
<span class="sig-name descname"><span class="pre">is_empty</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.is_empty" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is empty, otherwise False</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.is_estimation_mode">
<span class="sig-name descname"><span class="pre">is_estimation_mode</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.is_estimation_mode" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is in estimation mode, otherwise False</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.k">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">k</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.k" title="Link to this definition"></a></dt>
<dd><p>The configured parameter k</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.merge">
<span class="sig-name descname"><span class="pre">merge</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.merge" title="Link to this definition"></a></dt>
<dd><p>Merges the provided sketch into this one</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.n">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.n" title="Link to this definition"></a></dt>
<dd><p>The length of the input stream</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.normalized_rank_error">
<span class="sig-name descname"><span class="pre">normalized_rank_error</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error for this sketch.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.num_retained">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_retained</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.num_retained" title="Link to this definition"></a></dt>
<dd><p>The number of retained items (samples) in the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.serialize">
<span class="sig-name descname"><span class="pre">serialize</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.serialize" title="Link to this definition"></a></dt>
<dd><p>Serializes the sketch into a bytes object.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.to_string">
<span class="sig-name descname"><span class="pre">to_string</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.to_string" title="Link to this definition"></a></dt>
<dd><p>Produces a string summary of the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_floats_sketch.update">
<span class="sig-name descname"><span class="pre">update</span></span><a class="headerlink" href="#datasketches.kll_floats_sketch.update" title="Link to this definition"></a></dt>
<dd><p>Overloaded function.</p>
<ol class="arabic simple">
<li><p><code class="docutils literal notranslate"><span class="pre">update(self,</span> <span class="pre">item:</span> <span class="pre">float)</span> <span class="pre">-&gt;</span> <span class="pre">None</span></code></p></li>
</ol>
<p>Updates the sketch with the given value</p>
<ol class="arabic simple" start="2">
<li><p><code class="docutils literal notranslate"><span class="pre">update(self,</span> <span class="pre">array:</span> <span class="pre">ndarray[dtype=float32])</span> <span class="pre">-&gt;</span> <span class="pre">None</span></code></p></li>
</ol>
<p>Updates the sketch with the values in the given array</p>
</dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch">
<em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">kll_doubles_sketch</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.kll_doubles_sketch" title="Link to this definition"></a></dt>
<dd><p class="rubric">Static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.deserialize">
<span class="sig-name descname"><span class="pre">deserialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bytes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bytes</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datasketches.kll_doubles_sketch" title="_datasketches.kll_doubles_sketch"><span class="pre">_datasketches.kll_doubles_sketch</span></a></span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.deserialize" title="Link to this definition"></a></dt>
<dd><p>Deserializes the sketch from a bytes object.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_normalized_rank_error">
<span class="sig-name descname"><span class="pre">get_normalized_rank_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">as_pmf</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">float</span></span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error given parameters k and the pmf flag.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<p class="rubric">Non-static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">200</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.__init__" title="Link to this definition"></a></dt>
<dd><p>Creates a KLL sketch instance with the given value of k.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>k</strong> (<em>int</em><em>, </em><em>optional</em>) – Controls the size/accuracy trade-off of the sketch. Default is 200.</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_cdf">
<span class="sig-name descname"><span class="pre">get_cdf</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_cdf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analog of the PMF, of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_max_value">
<span class="sig-name descname"><span class="pre">get_max_value</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_max_value" title="Link to this definition"></a></dt>
<dd><p>Returns the maximum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_min_value">
<span class="sig-name descname"><span class="pre">get_min_value</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_min_value" title="Link to this definition"></a></dt>
<dd><p>Returns the minimum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_pmf">
<span class="sig-name descname"><span class="pre">get_pmf</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_pmf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_quantile">
<span class="sig-name descname"><span class="pre">get_quantile</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_quantile" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the data value associated with the given normalized rank in a hypothetical sorted version of the input stream so far.
For kll_floats_sketch: if the sketch is empty this returns nan. For kll_ints_sketch: if the sketch is empty this throws a RuntimeError.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_quantiles">
<span class="sig-name descname"><span class="pre">get_quantiles</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_quantiles" title="Link to this definition"></a></dt>
<dd><p>This returns an array that could have been generated by using get_quantile() for each normalized rank separately.
If the sketch is empty this returns an empty vector.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.get_rank">
<span class="sig-name descname"><span class="pre">get_rank</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.get_rank" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.
The resulting approximation has a probabilistic guarantee that can be obtained from the get_normalized_rank_error(False) function.
With the parameter inclusive=true the weight of the given value is included into the rank.Otherwise the rank equals the sum of the weights of values less than the given value.
If the sketch is empty this returns nan.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.is_empty">
<span class="sig-name descname"><span class="pre">is_empty</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.is_empty" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is empty, otherwise False</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.is_estimation_mode">
<span class="sig-name descname"><span class="pre">is_estimation_mode</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.is_estimation_mode" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is in estimation mode, otherwise False</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.k">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">k</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.k" title="Link to this definition"></a></dt>
<dd><p>The configured parameter k</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.merge">
<span class="sig-name descname"><span class="pre">merge</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.merge" title="Link to this definition"></a></dt>
<dd><p>Merges the provided sketch into this one</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.n">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.n" title="Link to this definition"></a></dt>
<dd><p>The length of the input stream</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.normalized_rank_error">
<span class="sig-name descname"><span class="pre">normalized_rank_error</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error for this sketch.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.num_retained">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_retained</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.num_retained" title="Link to this definition"></a></dt>
<dd><p>The number of retained items (samples) in the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.serialize">
<span class="sig-name descname"><span class="pre">serialize</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.serialize" title="Link to this definition"></a></dt>
<dd><p>Serializes the sketch into a bytes object.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.to_string">
<span class="sig-name descname"><span class="pre">to_string</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.to_string" title="Link to this definition"></a></dt>
<dd><p>Produces a string summary of the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_doubles_sketch.update">
<span class="sig-name descname"><span class="pre">update</span></span><a class="headerlink" href="#datasketches.kll_doubles_sketch.update" title="Link to this definition"></a></dt>
<dd><p>Overloaded function.</p>
<ol class="arabic simple">
<li><p><code class="docutils literal notranslate"><span class="pre">update(self,</span> <span class="pre">item:</span> <span class="pre">float)</span> <span class="pre">-&gt;</span> <span class="pre">None</span></code></p></li>
</ol>
<p>Updates the sketch with the given value</p>
<ol class="arabic simple" start="2">
<li><p><code class="docutils literal notranslate"><span class="pre">update(self,</span> <span class="pre">array:</span> <span class="pre">ndarray[dtype=float64])</span> <span class="pre">-&gt;</span> <span class="pre">None</span></code></p></li>
</ol>
<p>Updates the sketch with the values in the given array</p>
</dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch">
<em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">kll_items_sketch</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#datasketches.kll_items_sketch" title="Link to this definition"></a></dt>
<dd><p class="rubric">Static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.deserialize">
<span class="sig-name descname"><span class="pre">deserialize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">bytes</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bytes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">serde</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../helper/serde.html#datasketches.PyObjectSerDe" title="_datasketches.PyObjectSerDe"><span class="pre">_datasketches.PyObjectSerDe</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#datasketches.kll_items_sketch" title="_datasketches.kll_items_sketch"><span class="pre">_datasketches.kll_items_sketch</span></a></span></span><a class="headerlink" href="#datasketches.kll_items_sketch.deserialize" title="Link to this definition"></a></dt>
<dd><p>Deserializes the sketch from a bytes object using the provided serde.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_normalized_rank_error">
<span class="sig-name descname"><span class="pre">get_normalized_rank_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">as_pmf</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">float</span></span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error given parameters k and the pmf flag.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<p class="rubric">Non-static Methods:</p>
<dl class="py method">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">self</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">200</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#datasketches.kll_items_sketch.__init__" title="Link to this definition"></a></dt>
<dd><p>Creates a KLL sketch instance with the given value of k.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>k</strong> (<em>int</em><em>, </em><em>optional</em>) – Controls the size/accuracy trade-off of the sketch. Default is 200.</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_cdf">
<span class="sig-name descname"><span class="pre">get_cdf</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_cdf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analog of the PMF, of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_max_value">
<span class="sig-name descname"><span class="pre">get_max_value</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_max_value" title="Link to this definition"></a></dt>
<dd><p>Returns the maximum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_min_value">
<span class="sig-name descname"><span class="pre">get_min_value</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_min_value" title="Link to this definition"></a></dt>
<dd><p>Returns the minimum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_pmf">
<span class="sig-name descname"><span class="pre">get_pmf</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_pmf" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of split points (values).
The resulting approximations have a probabilistic guarantee that can be obtained from the get_normalized_rank_error(True) function.
If the sketch is empty this returns an empty vector.
split_points is an array of m unique, monotonically increasing float values that divide the real number line into m+1 consecutive disjoint intervals.
If the parameter inclusive=false, the definition of an ‘interval’ is inclusive of the left split point (or minimum value) and exclusive of the right split point, with the exception that the last interval will include the maximum value.
If the parameter inclusive=true, the definition of an ‘interval’ is exclusive of the left split point (or minimum value) and inclusive of the right split point.
It is not necessary to include either the min or max values in these split points.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_quantile">
<span class="sig-name descname"><span class="pre">get_quantile</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_quantile" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the data value associated with the given normalized rank in a hypothetical sorted version of the input stream so far.
For kll_floats_sketch: if the sketch is empty this returns nan. For kll_ints_sketch: if the sketch is empty this throws a RuntimeError.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_quantiles">
<span class="sig-name descname"><span class="pre">get_quantiles</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_quantiles" title="Link to this definition"></a></dt>
<dd><p>This returns an array that could have been generated by using get_quantile() for each normalized rank separately.
If the sketch is empty this returns an empty vector.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.get_rank">
<span class="sig-name descname"><span class="pre">get_rank</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.get_rank" title="Link to this definition"></a></dt>
<dd><p>Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.
The resulting approximation has a probabilistic guarantee that can be obtained from the get_normalized_rank_error(False) function.
With the parameter inclusive=true the weight of the given value is included into the rank.Otherwise the rank equals the sum of the weights of values less than the given value.
If the sketch is empty this returns nan.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.is_empty">
<span class="sig-name descname"><span class="pre">is_empty</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.is_empty" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is empty, otherwise False</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.is_estimation_mode">
<span class="sig-name descname"><span class="pre">is_estimation_mode</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.is_estimation_mode" title="Link to this definition"></a></dt>
<dd><p>Returns True if the sketch is in estimation mode, otherwise False</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.k">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">k</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.k" title="Link to this definition"></a></dt>
<dd><p>The configured parameter k</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.merge">
<span class="sig-name descname"><span class="pre">merge</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.merge" title="Link to this definition"></a></dt>
<dd><p>Merges the provided sketch into this one</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.n">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.n" title="Link to this definition"></a></dt>
<dd><p>The length of the input stream</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.normalized_rank_error">
<span class="sig-name descname"><span class="pre">normalized_rank_error</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.normalized_rank_error" title="Link to this definition"></a></dt>
<dd><p>Gets the normalized rank error for this sketch.
If pmf is True, returns the ‘double-sided’ normalized rank error for the get_PMF() function.
Otherwise, it is the ‘single-sided’ normalized rank error for all the other queries.
Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials</p>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.num_retained">
<em class="property"><span class="k"><span class="pre">property</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_retained</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.num_retained" title="Link to this definition"></a></dt>
<dd><p>The number of retained items (samples) in the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.serialize">
<span class="sig-name descname"><span class="pre">serialize</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.serialize" title="Link to this definition"></a></dt>
<dd><p>Serializes the sketch into a bytes object using the provided serde.</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.to_string">
<span class="sig-name descname"><span class="pre">to_string</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.to_string" title="Link to this definition"></a></dt>
<dd><p>Produces a string summary of the sketch</p>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="datasketches.kll_items_sketch.update">
<span class="sig-name descname"><span class="pre">update</span></span><a class="headerlink" href="#datasketches.kll_items_sketch.update" title="Link to this definition"></a></dt>
<dd><p>Updates the sketch with the given value</p>
</dd></dl>
</dd></dl>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="index.html" class="btn btn-neutral float-left" title="Quantiles Sketches" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="req.html" class="btn btn-neutral float-right" title="Relative Error Quantiles (REQ) Sketch" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2023.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>