blob: 873bdb67f3801e24e46cc2c88eebcbf2798949a5 [file] [log] [blame]
<!DOCTYPE html>
<!-- Start _layouts/doc_page.html-->
<html lang="en">
<head>
<!-- Start _include/site_head.html -->
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="datasketches">
<title>DataSketches | </title>
<link rel="shortcut icon" href="/img/favicon.png">
<!-- original source: https://maxcdn.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.min.css -->
<link rel="stylesheet" href="/css/font-awesome.min.css">
<!-- original source: https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css -->
<link rel="stylesheet" href="/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/fonts.css" type="text/css">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/css/header.css">
<link rel="stylesheet" href="/css/footer.css">
<link rel="stylesheet" href="/css/syntax.css">
<link rel="stylesheet" href="/css/docs.css">
<script type="text/x-mathjax-config">
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]},showMathMenu:false,showMathMenuMSIE:false,showProcessingMessages:false});
</script>
<!-- original source: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMX_HTML-full -->
<script type="text/javascript" src="/js/MathJax.js?config=TeX-AMS_HTML"></script>
<!-- original source: https://code.jquery.com/jquery.min.js -->
<script src="/js/jquery.min.js"></script>
<!-- original source: https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js -->
<script src="/js/bootstrap.min.js"></script> <!-- 3.2.0-->
<!-- End _include/site_head.html -->
</head>
<body>
<!-- Start _include/nav_bar.html -->
<div class="navbar navbar-inverse navbar-static-top ds-nav">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="/" style="padding-top: 0px; padding-bottom: 0px;">
<span class="ds-small-h-logo"></span></a>
</div>
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav navbar-right">
<li>
<a href="/docs/Background/TheChallenge.html">
<span class="fa fa-info-circle"></span> DOCUMENTATION</a>
</li>
<li>
<a href="/docs/Community/Downloads.html">
<span class="fa fa-download"></span> DOWNLOAD</a>
</li>
<!--
<li>
<a href="/docs/Architecture/Components.html">
<span class="fa fa-github"></span> GITHUB</a>
</li>
-->
<li>
<a href="/docs/Community/Research.html">
<span class="fa fa-paper-plane"></span> RESEARCH</a>
</li>
<li>
<a href="/docs/Community/index.html" style="padding-top: 0; padding-bottom: 0;">
<img class="ds-small-man" src="/img/datasketches-ManWhite.svg"/>COMMUNITY</a>
</li>
<li>
<ul class="nav navbar-nav navbar-right ds-nav">
<li class="dropdown ds-nav" >
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false" style="padding-top: 0; padding-bottom: 0;"><img class="apache-logo" src="/img/feather.svg"/>Apache <span class="caret"></span></a>
<ul class="dropdown-menu ds-nav">
<li><a href="https://www.apache.org/" target="_blank">Foundation</a></li>
<li><a href="https://www.apache.org/events/current-event" target="_blank">Events</a></li>
<li><a href="https://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy Policy</a></li>
<li><a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
<li><a href="https://www.apache.org/security/" target="_blank">Security</a></li>
<li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<!-- End _include/nav_bar.html -->
<!-- Start _include/javadocs.html -->
<div class="ds-header">
<div class="container">
<h4>API Snapshots:
<a href="https://apache.github.io/datasketches-java/4.2.0/">Java Core</a>,
<a href="https://apache.github.io/datasketches-cpp/5.0.0/">C++ Core</a>,
<a href="https://apache.github.io/datasketches-python/main/">Python</a>,
<a href="https://apache.github.io/datasketches-memory/master/">Memory</a>,
<a href="/api/pig/snapshot/apidocs/index.html">Pig</a>,
<a href="/api/hive/snapshot/apidocs/index.html">Hive</a>,
</h4>
</div>
</div>
<!-- End _include/javadocs.html -->
<div class="container">
<div class="row">
<!-- Start ToC Block -->
<div class="col-md-3">
<div class="searchbox" style="position:relative">
<gcse:searchbox-only></gcse:searchbox-only>
</div>
<!-- Start _includes/toc.html -->
<!-- Computer Generated File, Do Not Edit! -->
<link rel="stylesheet" href="/css/toc.css">
<div id="toc" class="nav toc hidden-print">
<p id="background">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_background">Background</a>
</p>
<div class="collapse" id="collapse_background">
<li><a href="/docs/Background/TheChallenge.html">•The Challenge</a></li>
<li><a href="/docs/Background/SketchOrigins.html">•Sketch Origins</a></li>
<li><a href="/docs/Background/SketchElements.html">•Sketch Elements</a></li>
<li><a href="/docs/Background/Presentations.html">•Presentations</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches_deck.pdf">•Overview Slide Deck</a></li>
</div>
<p id="architecture-and-design">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_architecture_and_design">Architecture And Design</a>
</p>
<div class="collapse" id="collapse_architecture_and_design">
<li><a href="/docs/Architecture/MajorSketchFamilies.html">•The Major Sketch Families</a></li>
<li><a href="/docs/Architecture/LargeScale.html">•Large Scale Computing</a></li>
<li><a href="/docs/Architecture/KeyFeatures.html">•Key Features</a></li>
<li><a href="/docs/Architecture/SketchFeaturesMatrix.html">•Sketch Features Matrix</a></li>
<li><a href="/docs/Architecture/Components.html">•Components</a></li>
<li><a href="/docs/Architecture/SketchesByComponent.html">•Sketches by Component</a></li>
<li><a href="/docs/Architecture/SketchCriteria.html">•Sketch Criteria</a></li>
<p id="memory-component">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_memory_component">Memory Component</a>
</p>
<div class="collapse" id="collapse_memory_component">
<li><a href="/docs/Memory/MemoryComponent.html">•Memory Component</a></li>
<li><a href="/docs/Memory/MemoryPerformance.html">•Memory Component Performance</a></li>
</div>
<li><a href="/docs/Architecture/OrderSensitivity.html">•Notes on Order Sensitivity</a></li>
<li><a href="/docs/Architecture/Concurrency.html">•Notes on Concurrency</a></li>
</div>
<p id="sketch-families">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sketch_families">Sketch Families</a>
</p>
<div class="collapse" id="collapse_sketch_families">
<p id="distinct-counting">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_distinct_counting">Distinct Counting</a>
</p>
<div class="collapse" id="collapse_distinct_counting">
<li><a href="/docs/DistinctCountFeaturesMatrix.html">•Features Matrix</a></li>
<li><a href="/docs/DistinctCountMeritComparisons.html">•Figures-of-Merit Comparison</a></li>
<p id="cpc-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_sketches">CPC Sketches</a>
</p>
<div class="collapse" id="collapse_cpc_sketches">
<li><a href="/docs/CPC/CPC.html">•CPC Sketch</a></li>
<li><a href="/docs/CPC/CpcPerformance.html">•CPC Sketch Performance</a></li>
<p id="cpc-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_examples">CPC Examples</a>
</p>
<div class="collapse" id="collapse_cpc_examples">
<li><a href="/docs/CPC/CpcJavaExample.html">•CPC Sketch Java Example</a></li>
<li><a href="/docs/CPC/CpcCppExample.html">•CPC Sketch C++ Example</a></li>
<li><a href="/docs/CPC/CpcPigExample.html">•CPC Sketch Pig UDFs</a></li>
<li><a href="/docs/CPC/CpcHiveExample.html">•CPC Sketch Hive UDFs</a></li>
</div>
</div>
<p id="hyperloglog-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hyperloglog_sketches">HyperLogLog Sketches</a>
</p>
<div class="collapse" id="collapse_hyperloglog_sketches">
<li><a href="/docs/HLL/HLL.html">•HLL Sketch</a></li>
<li><a href="/docs/HLL/HllMap.html">•HLL Map Sketch</a></li>
<p id="hll-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_examples">HLL Examples</a>
</p>
<div class="collapse" id="collapse_hll_examples">
<li><a href="/docs/HLL/HllJavaExample.html">•HLL Sketch Java Example</a></li>
<li><a href="/docs/HLL/HllCppExample.html">•HLL Sketch C++ Example</a></li>
<li><a href="/docs/HLL/HllPigUDFs.html">•HLL Sketch Pig UDFs</a></li>
<li><a href="/docs/HLL/HllHiveUDFs.html">•HLL Sketch Hive UDFs</a></li>
</div>
<p id="hll-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_studies">HLL Studies</a>
</p>
<div class="collapse" id="collapse_hll_studies">
<li><a href="/docs/HLL/HllPerformance.html">•HLL Sketch Performance</a></li>
<li><a href="/docs/HLL/Hll_vs_CS_Hllpp.html">•HLL vs Clearspring HLL++</a></li>
<li><a href="/docs/HLL/HllSketchVsDruidHyperLogLogCollector.html">•HLL Sketch vs Druid HyperLogLogCollector</a></li>
</div>
</div>
<p id="theta-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketches">Theta Sketches</a>
</p>
<div class="collapse" id="collapse_theta_sketches">
<li><a href="/docs/Theta/ThetaSketchFramework.html">•Theta Sketch Framework</a></li>
<p id="theta-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_examples">Theta Examples</a>
</p>
<div class="collapse" id="collapse_theta_examples">
<li><a href="/docs/Theta/ConcurrentThetaSketch.html">•Concurrent Theta Sketch</a></li>
<li><a href="/docs/Theta/ThetaJavaExample.html">•Theta Sketch Java Example</a></li>
<li><a href="/docs/Theta/ThetaSparkExample.html">•Theta Sketch Spark Example</a></li>
<li><a href="/docs/Theta/ThetaPigUDFs.html">•Theta Sketch Pig UDFs</a></li>
<li><a href="/docs/Theta/ThetaHiveUDFs.html">•Theta Sketch Hive UDFs</a></li>
</div>
<p id="kmv-tutorial">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_kmv_tutorial">KMV Tutorial</a>
</p>
<div class="collapse" id="collapse_kmv_tutorial">
<li><a href="/docs/Theta/InverseEstimate.html">•The Inverse Estimate</a></li>
<li><a href="/docs/Theta/KMVempty.html">•Empty Sketch</a></li>
<li><a href="/docs/Theta/KMVfirstEst.html">•First Estimator</a></li>
<li><a href="/docs/Theta/KMVbetterEst.html">•Better Estimator</a></li>
<li><a href="/docs/Theta/KMVrejection.html">•Rejection Rules</a></li>
<li><a href="/docs/Theta/KMVupdateVkth.html">•Update V(kth) Rule</a></li>
</div>
<p id="set-operations-and-p-sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_set_operations_and_p-sampling">Set Operations and P-sampling</a>
</p>
<div class="collapse" id="collapse_set_operations_and_p-sampling">
<li><a href="/docs/Theta/ThetaSketchSetOps.html">•Set Operations</a></li>
<li><a href="/docs/Theta/ThetaSetOpsCornerCases.html">•Model & Test Set Operations</a></li>
<li><a href="/docs/Theta/ThetaPSampling.html"><i>p</i>-Sampling</a></li>
</div>
<p id="accuracy">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_accuracy">Accuracy</a>
</p>
<div class="collapse" id="collapse_accuracy">
<li><a href="/docs/Theta/ThetaAccuracy.html">•Basic Accuracy</a></li>
<li><a href="/docs/Theta/ThetaAccuracyPlots.html">•Accuracy Plots</a></li>
<li><a href="/docs/Theta/ThetaErrorTable.html">•Relative Error Table</a></li>
<li><a href="/docs/Theta/ThetaSketchSetOpsAccuracy.html">•SetOp Accuracy</a></li>
<li><a href="/docs/Theta/AccuracyOfDifferentKUnions.html">•Unions With Different k</a></li>
</div>
<p id="size">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_size">Size</a>
</p>
<div class="collapse" id="collapse_size">
<li><a href="/docs/Theta/ThetaSize.html">•Theta Sketch Size</a></li>
</div>
<p id="speed">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_speed">Speed</a>
</p>
<div class="collapse" id="collapse_speed">
<li><a href="/docs/Theta/ThetaUpdateSpeed.html">•Update Speed</a></li>
<li><a href="/docs/Theta/ThetaMergeSpeed.html">•Merge Speed</a></li>
</div>
<p id="theta-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketch_theory">Theta Sketch Theory</a>
</p>
<div class="collapse" id="collapse_theta_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchFramework.pdf">•Theta Sketch Framework (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchEquations.pdf">•Theta Sketch Equations (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches.pdf">•DataSketches (PDF)</a></li>
<li><a href="/docs/Theta/ThetaConfidenceIntervals.html">•Confidence Intervals Notes</a></li>
<li><a href="/docs/Theta/ThetaMergingAlgorithm.html">•Merging Algorithm Notes</a></li>
<li><a href="/docs/Theta/ThetaReferences.html">•Theta References</a></li>
</div>
</div>
<p id="tuple-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_sketches">Tuple Sketches</a>
</p>
<div class="collapse" id="collapse_tuple_sketches">
<li><a href="/docs/Tuple/TupleOverview.html">•Tuple Overview</a></li>
<p id="tuple-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_examples">Tuple Examples</a>
</p>
<div class="collapse" id="collapse_tuple_examples">
<li><a href="/docs/Tuple/TupleJavaExample.html">•Tuple Java Example</a></li>
<li><a href="/docs/Tuple/TupleEngagementExample.html">•Tuple Engagement Example</a></li>
<li><a href="/docs/Tuple/TuplePigUDFs.html">•Tuple Pig UDFs</a></li>
<li><a href="/docs/Tuple/TupleHiveUDFs.html">•Tuple Hive UDFs</a></li>
</div>
</div>
</div>
<p id="most-frequent">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent">Most Frequent</a>
</p>
<div class="collapse" id="collapse_most_frequent">
<li><a href="/docs/Frequency/FrequencySketchesOverview.html">•Frequency Sketches Overview</a></li>
<p id="frequent-item-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_item_sketches">Frequent Item Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_item_sketches">
<li><a href="/docs/Frequency/FrequentItemsOverview.html">•Frequent Items Overview</a></li>
<li><a href="/docs/Frequency/FrequentItemsErrorTable.html">•Frequent Items Error Table</a></li>
<li><a href="/docs/Frequency/FrequentItemsReferences.html">•Frequent Items References</a></li>
<li><a href="/docs/Frequency/FrequentItemsPerformance.html">•Frequent Items Performance</a></li>
<p id="most-frequent-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent_examples">Most Frequent Examples</a>
</p>
<div class="collapse" id="collapse_most_frequent_examples">
<li><a href="/docs/Frequency/FrequentItemsJavaExample.html">•Frequent Items Java Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsCppExample.html">•Frequent Items C++ Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsPigUDFs.html">•Frequent Items Pig UDFs</a></li>
<li><a href="/docs/Frequency/FrequentItemsHiveUDFs.html">•Frequent Items Hive UDFs</a></li>
</div>
</div>
<p id="frequent-distinct-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_distinct_sketches">Frequent Distinct Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_distinct_sketches">
<li><a href="/docs/Frequency/FrequentDistinctTuplesSketch.html">•Frequent Distinct Tuples Sketch</a></li>
</div>
</div>
<p id="quantiles-and-histograms">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_and_histograms">Quantiles And Histograms</a>
</p>
<div class="collapse" id="collapse_quantiles_and_histograms">
<li><a href="/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">•Quantiles and Ranks Tutorial</a></li>
<li><a href="/docs/Quantiles/QuantilesOverview.html">•Quantiles Overview</a></li>
<li><a href="/docs/KLL/KLLSketch.html">•KLL Floats sketch</a></li>
<li><a href="/docs/KLL/KLLAccuracyAndSize.html">•KLL Sketch Accuracy and Size</a></li>
<li><a href="/docs/REQ/ReqSketch.html">•REQ Floats sketch</a></li>
<li><a href="/docs/Quantiles/OrigQuantilesSketch.html">•Original QuantilesSketch</a></li>
<p id="quantiles-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_examples">Quantiles Examples</a>
</p>
<div class="collapse" id="collapse_quantiles_examples">
<li><a href="/docs/Quantiles/QuantilesJavaExample.html">•Quantiles Sketch Java Example</a></li>
<li><a href="/docs/KLL/KLLCppExample.html">•KLL Quantiles Sketch C++ Example</a></li>
<li><a href="/docs/Quantiles/QuantilesPigUDFs.html">•Quantiles Sketch Pig UDFs</a></li>
<li><a href="/docs/Quantiles/QuantilesHiveUDFs.html">•Quantiles Sketch Hive UDFs</a></li>
</div>
<p id="quantiles-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_studies">Quantiles Studies</a>
</p>
<div class="collapse" id="collapse_quantiles_studies">
<li><a href="/docs/QuantilesStudies/DruidApproxHistogramStudy.html">•Druid Approximate Histogram</a></li>
<li><a href="/docs/QuantilesStudies/MomentsSketchStudy.html">•Moments Sketch Study</a></li>
<li><a href="/docs/QuantilesStudies/QuantilesStreamAStudy.html">•Quantiles StreamA Study</a></li>
<li><a href="/docs/QuantilesStudies/ExactQuantiles.html">•Exact Quantiles for Studies</a></li>
</div>
<p id="quantiles-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_sketch_theory">Quantiles Sketch Theory</a>
</p>
<div class="collapse" id="collapse_quantiles_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/Quantiles_KLL.pdf">•Optimal Quantile Approximation in Streams</a></li>
<li><a href="/docs/Quantiles/QuantilesReferences.html">•Quantiles References</a></li>
</div>
</div>
<p id="sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling">Sampling</a>
</p>
<div class="collapse" id="collapse_sampling">
<li><a href="/docs/Sampling/ReservoirSampling.html">•Reservoir Sampling</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPerformance.html">•Reservoir Sampling Performance</a></li>
<li><a href="/docs/Sampling/VarOptSampling.html">•VarOpt Sampling</a></li>
<p id="sampling-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling_examples">Sampling Examples</a>
</p>
<div class="collapse" id="collapse_sampling_examples">
<li><a href="/docs/Sampling/ReservoirSamplingJava.html">•Reservoir Sampling Java Example</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPigUDFs.html">•Reservoir Sampling Pig UDFs</a></li>
<li><a href="/docs/Sampling/VarOptSamplingJava.html">•VarOpt Sampling Java Example</a></li>
<li><a href="/docs/Sampling/VarOptPigUDFs.html">•VarOpt Sampling Pig UDFs</a></li>
</div>
</div>
</div>
<p id="system-integrations">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_system_integrations">System Integrations</a>
</p>
<div class="collapse" id="collapse_system_integrations">
<li><a href="/docs/SystemIntegrations/ApacheDruidIntegration.html">•Using Sketches in ApacheDruid</a></li>
<li><a href="/docs/SystemIntegrations/ApacheHiveIntegration.html">•Using Sketches in Apache Hive</a></li>
<li><a href="/docs/SystemIntegrations/ApachePigIntegration.html">•Using Sketches in Apache Pig</a></li>
<li><a href="/docs/SystemIntegrations/PostgreSQLIntegration.html">•Using Sketches in PostgreSQL</a></li>
</div>
<p id="community">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_community">Community</a>
</p>
<div class="collapse" id="collapse_community">
<li><a href="/docs/Community/index.html">•Community</a></li>
<li><a href="/docs/Community/Downloads.html">•Downloads</a></li>
<li><a href="/docs/Community/NewCommitterProcess.html">•Committer Process</a></li>
<li><a href="/docs/Community/ReleaseProcessForCppComponents.html">•Release Process For CPP Components</a></li>
<li><a href="/docs/Community/ReleaseProcessForJavaComponents.html">•Release Process For Java Components</a></li>
<li><a href="/docs/Community/Transitioning.html">•Transitioning from prior GitHub Site</a></li>
</div>
<p id="research">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_research">Research</a>
</p>
<div class="collapse" id="collapse_research">
<li><a href="/docs/Community/Research.html">•Research</a></li>
</div>
</div>
<!-- End _includes/toc.html -->
<!-- Start _includes/tocScript.html -->
<script>
(function () {
var findLineItem = function (path) {
return document.querySelector(`#toc [href="${path}"]`);
};
function findNavItem(path) {
return document.querySelector(`.nav [href="${path}"]`);
}
var highlighLineItem = function (element) {
element.classList.add('highlight');
};
var checkHasClass = function (element, className) {
return element.className.split(' ').find(function (item) { return item === className || '' })
}
var findAllCollapseParents = function (element) {
var collapseMenus = [];
var elementPointer = element;
while (elementPointer !== document.body) {
if (checkHasClass(elementPointer, 'collapse')) {
collapseMenus.push(elementPointer);
}
elementPointer = elementPointer.parentElement
}
return collapseMenus
};
var openMenuItem = function (element) {
// $(element).collapse('show') would start a transition, adding `in` class instead.
element.classList.add('in');
};
var openAllFromList = function (elementList) {
elementList.forEach(openMenuItem);
};
var highlightAndOpenMenu = function () {
// Highlight & expand nav item in the TOC
var currentLineItem = findLineItem(document.location.pathname);
highlighLineItem(currentLineItem);
openAllFromList(findAllCollapseParents(currentLineItem));
// Highlight nav item in top navigation
highlighLineItem(findNavItem(document.location.pathname));
};
$(highlightAndOpenMenu);
}());
</script>
<!-- End _includes/tocScript.html -->
</div>
<!-- End ToC Block -->
<div class="col-md-9 doc-content">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<h1 id="sketching-quantiles-and-ranks-tutorial">Sketching Quantiles and Ranks Tutorial</h1>
<p>Streaming quantiles algorithms, or quantiles sketches, enable us to analyze the distributions
of massive data very quickly using only a small amount of space.<br />
They allow us to compute quantile values given a desired rank, or compute a rank given
a quantile value. Quantile sketches enable us to plot the CDF, PMF or histograms of a distribution.</p>
<p>The goal of this short tutorial it to introduce the reader to some of the basic concepts
of quantiles, ranks and their functions.</p>
<h2 id="what-is-a-rank">What is a rank?</h2>
<h3 id="a-rank-identifies-the-numeric-position-of-a-specific-value-in-an-enumerated-ordered-set-of-values">A <strong><em>rank</em></strong> identifies the numeric position of a specific value in an enumerated, ordered set of values.</h3>
<p>The actual enumeration can be done in several ways, but for our use here we will define the two common ways that <em>rank</em> can be specified and that we will use.</p>
<ul>
<li>
<p>The <strong>natural rank</strong> is a <strong>natural number</strong> from the set of one-based, natural numbers, ℕ<sub>1</sub>, and is derived by enumerating an ordered set of values, starting with the value 1, up to <em>n</em>, the number of values in the original set.</p>
</li>
<li>The <strong><em>normalized rank</em></strong> is a number between 0.0 and 1.0 computed by dividing the <em>natural rank</em> by the total number of values in the set, <em>n</em>. Thus, for finite sets, any <em>normalized rank</em> is in the range (0, 1]. Normalized ranks are often written as a percent. But don’t confuse percent with percentile! This will be explained below.</li>
<li>A rank of 0, whether natural or normalized, represents the empty set.</li>
</ul>
<p>In our sketch library and documentation, when we refer to <em>rank</em>, we imply <em>normalized rank</em>. However, in this tutorial, we will sometimes use <em>natural ranks</em> to simplify the examples.</p>
<h3 id="rank-and-mass">Rank and Mass</h3>
<p><em>Normalized rank</em> is closely associated with the concept of <em>mass</em>. The value associated with the rank 0.5 represents the median value, or the center of <em>mass</em> of the entire set, where half of the values are below the median and half are above. The concept of mass is important to understanding the Probability Mass Function (PMF) offered by all the quantile sketches in the library.</p>
<h2 id="what-is-a-quantile">What is a quantile?</h2>
<h3 id="a-quantile-is-a-value-that-is-associated-with-a-particular-rank">A <strong><em>quantile</em></strong> is a <strong><em>value</em></strong> that is associated with a particular <strong><em>rank</em></strong>.</h3>
<p><em>Quantile</em> is the general term that includes other terms that are also quantiles.
To wit:</p>
<ul>
<li>A percentile is a quantile where the rank domain is divided into hundredths. For example, “An SAT Math score of 740 is at the 95th percentile”. The score of 740 is the quantile and .95 is the normalized rank.</li>
<li>A decile is a quantile where the rank domain is divided into tenths. For example, “An SAT Math score of 690 is at the 9th decile (rank = 0.9).</li>
<li>A quartile is a quantile where the rank domain is divided into fourths. For example, “An SAT Math score of 600 is at the third quartile (rank = 0.75).</li>
<li>The median is a quantile that splits the rank domain in half. For example, “An SAT Math score of 520 is at the median (rank = 0.5).</li>
</ul>
<h2 id="the-simple-quantile-and-rank-functions">The simple quantile and rank functions</h2>
<p>Let’s examine the following table:</p>
<table>
<thead>
<tr>
<th>Quantile:</th>
<th>10</th>
<th>20</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank</td>
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
<td>5</td>
</tr>
<tr>
<td>Normalized Rank</td>
<td>.2</td>
<td>.4</td>
<td>.6</td>
<td>.8</td>
<td>1.0</td>
</tr>
</tbody>
</table>
<h3 id="note">Note:</h3>
<p>The term “value” can be ambiguous because items that we stream into a sketch are values and numeric ranks are also values. To avoid this ambiguity, we will use the term “quantiles” to refer to values that are streamed into a sketch even before they have been associated with a rank.</p>
<p>Let’s define the simple functions</p>
<h3 id="quantilerank-or-qr--return-the-quantile-q-associated-with-a-given-rank-r"><strong><em>quantile(rank)</em></strong> or <strong><em>q(r)</em></strong> := return the quantile <strong><em>q</em></strong> associated with a given <strong><em>rank, r</em></strong>.</h3>
<h3 id="rankquantile-or-rq--return-the-rank-r-associated-with-a-given-quantile-q"><strong><em>rank(quantile)</em></strong> or <strong><em>r(q)</em></strong> := return the rank <strong><em>r</em></strong> associated with a given <strong><em>quantile, q</em></strong>.</h3>
<p>Using an example from the table:</p>
<ul>
<li>Using natural ranks:
<ul>
<li><em>q(3) = 30</em></li>
<li><em>r(30) = 3</em></li>
</ul>
</li>
<li>Using normalized ranks:
<ul>
<li><em>q(.6) = 30</em></li>
<li><em>r(30) = .6</em></li>
</ul>
</li>
</ul>
<p>Because of the close, two-way relationship of quantiles and ranks,<br />
<em>r(q)</em> and <em>q(r)</em> form a <em>1:1 functional pair</em> if, and only if</p>
<ul>
<li><em>q = q(r(q))</em></li>
<li><em>r = r(q(r))</em></li>
</ul>
<p>And this is certainly true of the table above.</p>
<h2 id="the-challenge-of-duplicates">The challenge of duplicates</h2>
<p>With real data we often encounter duplicate quantiles in the stream. Let’s examine this next table.</p>
<table>
<thead>
<tr>
<th>Quantile:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>20</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank</td>
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
<td>5</td>
</tr>
</tbody>
</table>
<p>As you can see <em>q(r)</em> is straightforward. But how about <em>r(q)</em>? Which of the ranks 2, 3, or 4 should the function return, given the quantile 20? Given this data, and our definitions so far, the function <em>r(q)</em> is ambiguous. We will see how to resolve this shortly.</p>
<h2 id="the-challenge-of-approximation">The challenge of approximation</h2>
<p>By definition, sketching algorithms are approximate, and they achieve their high performance by discarding data. Suppose you feed <em>n</em> quantiles into a sketch that retains only <em>m &lt; n</em> quantiles. This means <em>n-m</em> quantiles were discarded. The sketch must track the quantity <em>n</em> used for computing the rank and quantile functions. When the sketch reconstructs the relationship between ranks and quantiles, <em>n-m</em> quantiles are missing creating holes in the ordered sequence. For example, examine the following tables.</p>
<p>The raw data might look like this, with its associated natural ranks.</p>
<table>
<thead>
<tr>
<th>Quantile:</th>
<th>10</th>
<th>20</th>
<th>30</th>
<th>40</th>
<th>50</th>
<th>60</th>
<th>70</th>
<th>80</th>
<th>90</th>
<th>100</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank</td>
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
<td>5</td>
<td>6</td>
<td>7</td>
<td>8</td>
<td>9</td>
<td>10</td>
</tr>
</tbody>
</table>
<p>The sketch might discard the even numbered quantiles producing something like this:</p>
<table>
<thead>
<tr>
<th>Quantile:</th>
<th>10</th>
<th>30</th>
<th>50</th>
<th>70</th>
<th>90</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank</td>
<td>2</td>
<td>4</td>
<td>6</td>
<td>8</td>
<td>10</td>
</tr>
</tbody>
</table>
<p>When the sketch deletes quantiles it adjusts the associated ranks by effectively increasing the “weight” of adjacent quantiles so that they are approximately positionally correct and the top natural rank corresponds to <em>n</em>.</p>
<p>How do we resolve <em>q(3)</em> or <em>r(20)</em>?</p>
<h2 id="the-need-for-inequality-search">The need for inequality search</h2>
<p>The quantile sketch algorithms discussed in the literature primarily differ by how they choose which quantiles in the stream should be discarded. After the elimination process, all of the quantiles sketch implementations are left with the challenge of how to reconstruct the actual distribution, approximately and with good accuracy.</p>
<p>Given the presence of duplicates and absence of values from the stream we must redefine the above quantile and rank functions as inequalities <strong>while retaining the properties of 1:1 functions.</strong></p>
<p>One can find examples of the following definitions in the research literature. All of our library quantile sketches allow the user to choose the searching criteria.</p>
<p>These next examples use a small data set that mimics what could be the result of both duplication and sketch data deletion.</p>
<h2 id="the-rules-for-returned-quantiles-or-ranks">The rules for returned quantiles or ranks</h2>
<ul>
<li>
<p><strong>Rule 1:</strong> Every quantile that exists in the input stream or retained by the sketch has an associated rank.</p>
</li>
<li>
<p><strong>Rule 2:</strong> All of our quantile sketches only retain quantiles that exist in the actual input stream of quantiles.</p>
</li>
<li>
<p><strong>Rule 3:</strong> For the <em>getQuantile(rank)</em> queries, all of our quantile sketches only return quantiles that were retained by the sketch. (i.e, we do not interpolate between quantiles.)</p>
</li>
<li>
<p><strong>Rule 4:</strong> For the <em>getRank(quantile)</em> queries, all of our quantile sketches only return ranks that are associated with quantiles retained by the sketch. (i.e, we do not interpolate between ranks.)</p>
</li>
<li>
<p><strong>Rule 5:</strong> All of our quantile algorithms compensate for quantiles removed during the sketch quantile selection and compression process by increasing the weights of some of the quantiles not selected for removal, such that:</p>
<ul>
<li>The sum of the natural weights of all quantiles retained by the sketch equals <strong>n</strong>, the total count of all quantiles given to the sketch.</li>
<li>And by corollary, the largest quantile, when sorted by cumulative rank, has a cumulative natural rank of <strong>n</strong>, or equivalently, a cumulative normalized rank of <strong>1.0</strong>.</li>
</ul>
</li>
</ul>
<h2 id="the-rank-functions-with-inequalities">The rank functions with inequalities</h2>
<h3 id="rankquantile-inclusive-or-rq-le-given-q-return-the-rank-r-of-the-largest-quantile-that-is-less-than-or-equal-to-q"><strong><em>rank(quantile, INCLUSIVE)</em></strong> or <strong><em>r(q, LE)</em></strong> :=<br />Given <em>q</em>, return the rank, <em>r</em>, of the largest quantile that is less than or equal to <em>q</em>.</h3>
<p><b>Implementation:</b></p>
<ul>
<li>Given <em>q</em>, search the quantile array until we find the adjacent pair <em>{q1, q2}</em> where <em>q1 &lt;= q &lt; q2</em>.</li>
<li>Return the rank, <em>r</em>, associated with <em>q1</em>, the first of the pair.</li>
</ul>
<p><b>Boundary Exceptions:</b></p>
<ul>
<li><strong>Boundary Rule 1:</strong> If the given <em>q</em> is <em>&gt;=</em> the quantile associated with the largest cumulative rank retained by the sketch, the function will return the largest cumulative rank, <em>1.0</em>.</li>
<li><strong>Boundary Rule 2:</strong> If the given <em>q</em> is <em>&lt;</em> the quantile associated with the smallest cumulative rank retained by the sketch, the function will return a rank of <em>0.0</em>.</li>
</ul>
<p><b>Examples using normalized ranks:</b></p>
<ul>
<li><em>r(30) = .786</em> Normal rule applies: <em>30 &lt;= 30 &lt; 40</em>, return <em>r(q1) = .786</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Quantile input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>30</td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>q1</td>
<td>q2</td>
<td> </td>
</tr>
<tr>
<td>Rank result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>.786</td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<ul>
<li><em>r(55) = 1.0</em> Use Boundary Rule 1: <em>50 &lt;= 55</em>, return <em>1.0</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
<th>?</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
<td> </td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
<td> </td>
</tr>
<tr>
<td>Quantile input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>55</td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>q1</td>
<td>(q2)</td>
</tr>
<tr>
<td>Rank result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>1.0</td>
<td> </td>
</tr>
</tbody>
</table>
<ul>
<li><em>r(5) = 0.0</em> Use Boundary Rule 2: <em>5 &lt; 10</em>, return <em>0.0</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>?</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td> </td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td> </td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Quantile input</td>
<td>5</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td>(q1)</td>
<td>q2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Rank result</td>
<td>0</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<hr />
<h3 id="rankquantile-exclusive-or-rq-lt-given-q-return-the-rank-r-of-the-largest-quantile-that-is-strictly-less-than-q"><strong><em>rank(quantile, EXCLUSIVE)</em></strong> or <strong><em>r(q, LT)</em></strong> :=<br />Given <em>q</em>, return the rank, <em>r</em>, of the largest quantile that is strictly <em>Less Than</em> <em>q</em>.</h3>
<p><b>Implementation:</b></p>
<ul>
<li>Given <em>q</em>, search the quantile array until we find the adjacent pair <em>{q1, q2}</em> where <em>q1 &lt; q &lt;= q2</em>.</li>
<li>Return the rank, <em>r</em>, associated with <em>q1</em>, the first of the pair.</li>
</ul>
<p><b>Boundary Exceptions:</b></p>
<ul>
<li><strong>Boundary Rule 1:</strong> If the given <em>q</em> is <em>&gt;</em> the quantile associated with the largest cumulative rank retained by the sketch, the sketch will return the the largest cumulative rank, <em>1.0</em>.</li>
<li><strong>Boundary Rule 2:</strong> If the given <em>q</em> is <em>&lt;=</em> the quantile associated with the smallest cumulative rank retained by the sketch, the sketch will return a rank of <em>0.0</em>.</li>
</ul>
<p><b>Examples using normalized ranks:</b></p>
<ul>
<li><em>r(30) = .357</em> Normal rule applies: <em>20 &lt; 30 &lt;= 30</em>, return <em>r(q1) = .357</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.000</td>
</tr>
<tr>
<td>Quantile input</td>
<td> </td>
<td> </td>
<td> </td>
<td>30</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td>q1</td>
<td>q2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Rank result</td>
<td> </td>
<td> </td>
<td>.357</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<ul>
<li><em>r(55) = 1.0</em> Use Boundary Rule 1: <em>50 &lt; 55</em>, return <em>1.0</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
<th>?</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
<td> </td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
<td> </td>
</tr>
<tr>
<td>Quantile input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>55</td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>q1</td>
<td>(q2)</td>
</tr>
<tr>
<td>Rank result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>1.000</td>
<td> </td>
</tr>
</tbody>
</table>
<ul>
<li><em>r(5) = 0.0</em> Use Boundary Rule 2: <em>5 &lt;= 10</em>, return <em>0</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>?</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td> </td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td> </td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Quantile input</td>
<td>5</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td>(q1)</td>
<td>q2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Rank result</td>
<td>0</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<h2 id="the-quantile-functions-with-inequalities">The quantile functions with inequalities</h2>
<h3 id="quantilerank-inclusive-or-qr-ge-given-r-return-the-quantile-q-of-the-smallest-rank-that-is-strictly-greater-than-or-equal-to-r"><strong><em>quantile(rank, INCLUSIVE)</em></strong> or <strong><em>q(r, GE)</em></strong> :=<br />Given <em>r</em>, return the quantile, <em>q</em>, of the smallest rank that is strictly Greater than or Equal to <em>r</em>.</h3>
<p><b>Implementation:</b></p>
<ul>
<li>Given <em>r</em>, search the rank array until we find the adjacent pair <em>{r1, r2}</em> where <em>r1 &lt; r &lt;= r2</em>.</li>
<li>Return the quantile, <em>q</em>, associated with <em>r2</em>, the second of the pair.</li>
</ul>
<p><b>Boundary Exceptions:</b></p>
<ul>
<li><strong>Boundary Rule 2:</strong> If the given normalized rank, <em>r</em>, is <em>&lt;=</em> the smallest rank, the function will return the <strong>quantile</strong> associated with the smallest cumulative rank.</li>
</ul>
<p><b>Examples using normalized ranks:</b></p>
<ul>
<li><em>q(.786) = 30</em> Normal rule applies: <em>.643 &lt; .786 &lt;= .786</em>, return <em>q(r2) = 30</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.000</td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>.786</td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>r1</td>
<td>r2</td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>30</td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(1.0) = 50</em> Normal rule applies: <em>.929 &lt; 1.0 &lt;= 1.0</em>, return <em>q(r2) = 50</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>1.0</td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>r1</td>
<td>r2</td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>50</td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(0.0 &lt;= .071) = 10</em> Use Boundary Rule 2: <em>0.0 &lt;= .071</em>, return <em>10</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>?</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td> </td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td> </td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Rank input</td>
<td>0.0</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td>(r1)</td>
<td>r2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Rank result</td>
<td> </td>
<td>10</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<hr />
<h3 id="quantilerank-exclusive-or-qr-gt-given-r-return-the-quantile-q-of-the-smallest-rank-that-is-strictly-greater-than-r"><strong><em>quantile(rank, EXCLUSIVE)</em></strong> or <strong><em>q(r, GT)</em></strong> :=<br />Given <em>r</em>, return the quantile, <em>q</em>, of the smallest rank that is strictly Greater Than <em>r</em>.</h3>
<p><b>Implementation:</b></p>
<ul>
<li>Given <em>r</em>, search the rank array until we find the adjacent pair <em>{r1, r2}</em> where <em>r1 &lt;= r &lt; r2</em>.</li>
<li>Return the quantile, <em>q</em>, associated with <em>r2</em>, the second of the pair.</li>
</ul>
<p><b>Boundary Exceptions:</b></p>
<ul>
<li><strong>Boundary Rule 1:</strong> If the given normalized rank, <em>r</em>, is equal to 1.0, there is no quantile that satisfies this criterion. However, for convenience, the function will return quantile associated with the largest cumulative rank retained by the sketch.</li>
<li><strong>Boundary Rule 2:</strong> If the given normalized rank, <em>r</em>, is less than the smallest rank, the function will return the quantile associated with the smallest cumulative rank retained by the sketch.</li>
</ul>
<p><b>Examples using normalized ranks:</b></p>
<ul>
<li><em>q(.357) = 30</em> Normal rule applies: <em>.357 &lt;= .357 &lt; .500</em>, return <em>q(r2) = 30</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.000</td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td>.357</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td>r1</td>
<td>r2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td>30</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(1.0) = 50</em> Use Boundary Rule 1 <em>1.0 &lt;= 1.0 &lt; ?</em>, return <em>50</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
<th>?</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
<td> </td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
<td> </td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>1.0</td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>r1</td>
<td>(r2)</td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>50</td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(0.99) = 50</em> Normal rule applies <em>.929 &lt;= .99 &lt; 1.0</em>, return <em>50</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>.99</td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>r1</td>
<td>r2</td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>50</td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(0.0 &lt;= .071) = 10</em> Use Boundary Rule 2: <em>0.0 &lt; .071</em>, return <em>10</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>?</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td> </td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td> </td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Rank input</td>
<td>0.0</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td>(r1)</td>
<td>r2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Rank result</td>
<td> </td>
<td>10</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<hr />
<h3 id="quantilerank-exclusive_strict-or-qr-gt_strict-given-r-return-the-quantile-q-of-the-smallest-rank-that-is-strictly-greater-than-r"><strong><em>quantile(rank, EXCLUSIVE_STRICT)</em></strong> or <strong><em>q(r, GT_STRICT)</em></strong> :=<br />Given <em>r</em>, return the quantile, <em>q</em>, of the smallest rank that is strictly Greater Than <em>r</em>.</h3>
<h3 id="note-this-rule-is-marginal-in-its-usefulness-so-it-is-not-currently-implemented">Note: This rule is marginal in its usefulness so it is not currently implemented.</h3>
<p><b>Implementation:</b></p>
<ul>
<li>Given <em>r</em>, search the rank array until we find the adjacent pair <em>{r1, r2}</em> where <em>r1 &lt;= r &lt; r2</em>.</li>
<li>Return the quantile, <em>q</em>, associated with <em>r2</em>, the second of the pair.</li>
</ul>
<p><b>Boundary Exceptions:</b></p>
<ul>
<li><strong>Boundary Rule 1:</strong> If the given normalized rank, <em>r</em>, is equal to <em>1.0</em>, there is no quantile that satisfies this criterion. Return <em>NaN</em> or <em>null</em>.</li>
<li><strong>Boundary Rule 2:</strong> If the given normalized rank, <em>r</em>, is less than the smallest rank, the function will return the quantile associated with the smallest cumulative rank retained by the sketch..</li>
</ul>
<p><b>Examples using normalized ranks:</b></p>
<ul>
<li><em>q(.357) = 30</em> Normal rule applies: <em>.357 &lt;= .357 &lt; .500</em>, return <em>q(r2) = 30</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.000</td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td>.357</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td>r1</td>
<td>r2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td>30</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(1.0) = 50</em> Use Boundary Rule 1 <em>1.0 &lt;= 1.0 &lt; ?</em>, return <em>NaN or null</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
<th>?</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
<td> </td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
<td> </td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>1.0</td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>r1</td>
<td>(r2)</td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>NaN or null</td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(0.99) = 50</em> Normal rule applies <em>.929 &lt;= .99 &lt; 1.0</em>, return <em>50</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Rank input</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>.99</td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>r1</td>
<td>r2</td>
</tr>
<tr>
<td>Quantile result</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td>50</td>
</tr>
</tbody>
</table>
<ul>
<li><em>q(0.0 &lt;= .071) = 10</em> Use Boundary Rule 2: <em>0.0 &lt; .071</em>, return <em>10</em>.</li>
</ul>
<table>
<thead>
<tr>
<th>Quantile[]:</th>
<th>?</th>
<th>10</th>
<th>20</th>
<th>20</th>
<th>30</th>
<th>30</th>
<th>30</th>
<th>40</th>
<th>50</th>
</tr>
</thead>
<tbody>
<tr>
<td>Natural Rank[]:</td>
<td> </td>
<td>1</td>
<td>3</td>
<td>5</td>
<td>7</td>
<td>9</td>
<td>11</td>
<td>13</td>
<td>14</td>
</tr>
<tr>
<td>Normalized Rank[]:</td>
<td> </td>
<td>.071</td>
<td>.214</td>
<td>.357</td>
<td>.500</td>
<td>.643</td>
<td>.786</td>
<td>.929</td>
<td>1.0</td>
</tr>
<tr>
<td>Rank input</td>
<td>0.0</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Qualifying pair</td>
<td>(r1)</td>
<td>r2</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Rank result</td>
<td> </td>
<td>10</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
<h2 id="these-inequality-functions-maintain-the-11-functional-relationship-approximately">These inequality functions maintain the 1:1 functional relationship, approximately.</h2>
<h3 id="the-exclusive-search-for-qr-is-the-inverse-of-the-exclusive-search-for-rq">The <em>exclusive</em> search for q(r) is the inverse of the <em>exclusive</em> search for r(q).</h3>
<h5 id="therefore-q--qrq-and-r--rqr">Therefore, <em>q = q(r(q))</em> and <em>r = r(q(r))</em>.</h5>
<h3 id="the-inclusive-search-for-qr-is-the-inverse-of-the-inclusive-search-for-rq">The <em>inclusive</em> search for q(r) is the inverse of the <em>inclusive</em> search for r(q).</h3>
<h5 id="therefore-q--qrq-and-r--rqr-1">Therefore, <em>q = q(r(q))</em> and <em>r = r(q(r))</em>.</h5>
<h2 id="summary">Summary</h2>
<p>The power of these inequality search algorithms is that they produce repeatable and accurate results, are insensitive to duplicates and sketch deletions, and maintain the property of 1:1 functions.</p>
</div> <!-- End content -->
</div> <!-- End row -->
</div> <!-- End Container -->
<!-- Start _include/page_footer.html -->
<footer class="ds-footer">
<div class="container">
<div class="text-center">
<p>
<div>Copyright © 2024 <a href="https://www.apache.org">Apache Software Foundation</a>,
Licensed under the Apache License, Version 2.0. All Rights Reserved.
| <a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy Policy</a><br/>
Apache DataSketches, Apache, the Apache feather logo, and the Apache DataSketches project logos are trademarks of The Apache Software Foundation.<br/>
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
</div>
</p>
</div>
</div>
</footer>
<!-- End _include/page_footer.html -->
</body>
</html>
<!-- End _layouts/doc_page.html-->