blob: fa8b0a5fdf642734ccc60a18cf49b51760d414a4 [file] [log] [blame]
<!DOCTYPE html>
<!-- Start _layouts/doc_page.html-->
<html lang="en">
<head>
<!-- Start _include/site_head.html -->
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="datasketches">
<title>DataSketches | </title>
<link rel="shortcut icon" href="/img/favicon.png">
<!-- original source: https://maxcdn.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.min.css -->
<link rel="stylesheet" href="/css/font-awesome.min.css">
<!-- original source: https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css -->
<link rel="stylesheet" href="/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/fonts.css" type="text/css">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/css/header.css">
<link rel="stylesheet" href="/css/footer.css">
<link rel="stylesheet" href="/css/syntax.css">
<link rel="stylesheet" href="/css/docs.css">
<script type="text/x-mathjax-config">
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]},showMathMenu:false,showMathMenuMSIE:false,showProcessingMessages:false});
</script>
<!-- original source: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMX_HTML-full -->
<script type="text/javascript" src="/js/MathJax.js?config=TeX-AMS_HTML"></script>
<!-- original source: https://code.jquery.com/jquery.min.js -->
<script src="/js/jquery.min.js"></script>
<!-- original source: https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js -->
<script src="/js/bootstrap.min.js"></script> <!-- 3.2.0-->
<!-- End _include/site_head.html -->
</head>
<body>
<!-- Start _include/nav_bar.html -->
<div class="navbar navbar-inverse navbar-static-top ds-nav">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="/" style="padding-top: 0px; padding-bottom: 0px;">
<span class="ds-small-h-logo"></span></a>
</div>
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav navbar-right">
<li>
<a href="/docs/Background/TheChallenge.html">
<span class="fa fa-info-circle"></span> DOCUMENTATION</a>
</li>
<li>
<a href="/docs/Community/Downloads.html">
<span class="fa fa-download"></span> DOWNLOAD</a>
</li>
<!--
<li>
<a href="/docs/Architecture/Components.html">
<span class="fa fa-github"></span> GITHUB</a>
</li>
-->
<li>
<a href="/docs/Community/Research.html">
<span class="fa fa-paper-plane"></span> RESEARCH</a>
</li>
<li>
<a href="/docs/Community/index.html" style="padding-top: 0; padding-bottom: 0;">
<img class="ds-small-man" src="/img/datasketches-ManWhite.svg"/>COMMUNITY</a>
</li>
<li>
<ul class="nav navbar-nav navbar-right ds-nav">
<li class="dropdown ds-nav" >
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false" style="padding-top: 0; padding-bottom: 0;"><img class="apache-logo" src="/img/feather.svg"/>Apache <span class="caret"></span></a>
<ul class="dropdown-menu ds-nav">
<li><a href="https://www.apache.org/" target="_blank">Foundation</a></li>
<li><a href="https://www.apache.org/events/current-event" target="_blank">Events</a></li>
<li><a href="https://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy Policy</a></li>
<li><a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
<li><a href="https://www.apache.org/security/" target="_blank">Security</a></li>
<li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<!-- End _include/nav_bar.html -->
<!-- Start _include/javadocs.html -->
<div class="ds-header">
<div class="container">
<h4>API Snapshots:
<a href="https://apache.github.io/datasketches-java/4.2.0/">Java Core</a>,
<a href="https://apache.github.io/datasketches-cpp/5.0.0/">C++ Core</a>,
<a href="https://apache.github.io/datasketches-python/main/">Python</a>,
<a href="https://apache.github.io/datasketches-memory/master/">Memory</a>,
<a href="/api/pig/snapshot/apidocs/index.html">Pig</a>,
<a href="/api/hive/snapshot/apidocs/index.html">Hive</a>,
</h4>
</div>
</div>
<!-- End _include/javadocs.html -->
<div class="container">
<div class="row">
<!-- Start ToC Block -->
<div class="col-md-3">
<div class="searchbox" style="position:relative">
<gcse:searchbox-only></gcse:searchbox-only>
</div>
<!-- Start _includes/toc.html -->
<!-- Computer Generated File, Do Not Edit! -->
<link rel="stylesheet" href="/css/toc.css">
<div id="toc" class="nav toc hidden-print">
<p id="background">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_background">Background</a>
</p>
<div class="collapse" id="collapse_background">
<li><a href="/docs/Background/TheChallenge.html">•The Challenge</a></li>
<li><a href="/docs/Background/SketchOrigins.html">•Sketch Origins</a></li>
<li><a href="/docs/Background/SketchElements.html">•Sketch Elements</a></li>
<li><a href="/docs/Background/Presentations.html">•Presentations</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches_deck.pdf">•Overview Slide Deck</a></li>
</div>
<p id="architecture-and-design">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_architecture_and_design">Architecture And Design</a>
</p>
<div class="collapse" id="collapse_architecture_and_design">
<li><a href="/docs/Architecture/MajorSketchFamilies.html">•The Major Sketch Families</a></li>
<li><a href="/docs/Architecture/LargeScale.html">•Large Scale Computing</a></li>
<li><a href="/docs/Architecture/KeyFeatures.html">•Key Features</a></li>
<li><a href="/docs/Architecture/SketchFeaturesMatrix.html">•Sketch Features Matrix</a></li>
<li><a href="/docs/Architecture/Components.html">•Components</a></li>
<li><a href="/docs/Architecture/SketchesByComponent.html">•Sketches by Component</a></li>
<li><a href="/docs/Architecture/SketchCriteria.html">•Sketch Criteria</a></li>
<p id="memory-component">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_memory_component">Memory Component</a>
</p>
<div class="collapse" id="collapse_memory_component">
<li><a href="/docs/Memory/MemoryComponent.html">•Memory Component</a></li>
<li><a href="/docs/Memory/MemoryPerformance.html">•Memory Component Performance</a></li>
</div>
<li><a href="/docs/Architecture/OrderSensitivity.html">•Notes on Order Sensitivity</a></li>
<li><a href="/docs/Architecture/Concurrency.html">•Notes on Concurrency</a></li>
</div>
<p id="sketch-families">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sketch_families">Sketch Families</a>
</p>
<div class="collapse" id="collapse_sketch_families">
<p id="distinct-counting">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_distinct_counting">Distinct Counting</a>
</p>
<div class="collapse" id="collapse_distinct_counting">
<li><a href="/docs/DistinctCountFeaturesMatrix.html">•Features Matrix</a></li>
<li><a href="/docs/DistinctCountMeritComparisons.html">•Figures-of-Merit Comparison</a></li>
<p id="cpc-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_sketches">CPC Sketches</a>
</p>
<div class="collapse" id="collapse_cpc_sketches">
<li><a href="/docs/CPC/CPC.html">•CPC Sketch</a></li>
<li><a href="/docs/CPC/CpcPerformance.html">•CPC Sketch Performance</a></li>
<p id="cpc-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_examples">CPC Examples</a>
</p>
<div class="collapse" id="collapse_cpc_examples">
<li><a href="/docs/CPC/CpcJavaExample.html">•CPC Sketch Java Example</a></li>
<li><a href="/docs/CPC/CpcCppExample.html">•CPC Sketch C++ Example</a></li>
<li><a href="/docs/CPC/CpcPigExample.html">•CPC Sketch Pig UDFs</a></li>
<li><a href="/docs/CPC/CpcHiveExample.html">•CPC Sketch Hive UDFs</a></li>
</div>
</div>
<p id="hyperloglog-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hyperloglog_sketches">HyperLogLog Sketches</a>
</p>
<div class="collapse" id="collapse_hyperloglog_sketches">
<li><a href="/docs/HLL/HLL.html">•HLL Sketch</a></li>
<li><a href="/docs/HLL/HllMap.html">•HLL Map Sketch</a></li>
<p id="hll-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_examples">HLL Examples</a>
</p>
<div class="collapse" id="collapse_hll_examples">
<li><a href="/docs/HLL/HllJavaExample.html">•HLL Sketch Java Example</a></li>
<li><a href="/docs/HLL/HllCppExample.html">•HLL Sketch C++ Example</a></li>
<li><a href="/docs/HLL/HllPigUDFs.html">•HLL Sketch Pig UDFs</a></li>
<li><a href="/docs/HLL/HllHiveUDFs.html">•HLL Sketch Hive UDFs</a></li>
</div>
<p id="hll-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_studies">HLL Studies</a>
</p>
<div class="collapse" id="collapse_hll_studies">
<li><a href="/docs/HLL/HllPerformance.html">•HLL Sketch Performance</a></li>
<li><a href="/docs/HLL/Hll_vs_CS_Hllpp.html">•HLL vs Clearspring HLL++</a></li>
<li><a href="/docs/HLL/HllSketchVsDruidHyperLogLogCollector.html">•HLL Sketch vs Druid HyperLogLogCollector</a></li>
</div>
</div>
<p id="theta-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketches">Theta Sketches</a>
</p>
<div class="collapse" id="collapse_theta_sketches">
<li><a href="/docs/Theta/ThetaSketchFramework.html">•Theta Sketch Framework</a></li>
<p id="theta-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_examples">Theta Examples</a>
</p>
<div class="collapse" id="collapse_theta_examples">
<li><a href="/docs/Theta/ConcurrentThetaSketch.html">•Concurrent Theta Sketch</a></li>
<li><a href="/docs/Theta/ThetaJavaExample.html">•Theta Sketch Java Example</a></li>
<li><a href="/docs/Theta/ThetaSparkExample.html">•Theta Sketch Spark Example</a></li>
<li><a href="/docs/Theta/ThetaPigUDFs.html">•Theta Sketch Pig UDFs</a></li>
<li><a href="/docs/Theta/ThetaHiveUDFs.html">•Theta Sketch Hive UDFs</a></li>
</div>
<p id="kmv-tutorial">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_kmv_tutorial">KMV Tutorial</a>
</p>
<div class="collapse" id="collapse_kmv_tutorial">
<li><a href="/docs/Theta/InverseEstimate.html">•The Inverse Estimate</a></li>
<li><a href="/docs/Theta/KMVempty.html">•Empty Sketch</a></li>
<li><a href="/docs/Theta/KMVfirstEst.html">•First Estimator</a></li>
<li><a href="/docs/Theta/KMVbetterEst.html">•Better Estimator</a></li>
<li><a href="/docs/Theta/KMVrejection.html">•Rejection Rules</a></li>
<li><a href="/docs/Theta/KMVupdateVkth.html">•Update V(kth) Rule</a></li>
</div>
<p id="set-operations-and-p-sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_set_operations_and_p-sampling">Set Operations and P-sampling</a>
</p>
<div class="collapse" id="collapse_set_operations_and_p-sampling">
<li><a href="/docs/Theta/ThetaSketchSetOps.html">•Set Operations</a></li>
<li><a href="/docs/Theta/ThetaSetOpsCornerCases.html">•Model & Test Set Operations</a></li>
<li><a href="/docs/Theta/ThetaPSampling.html"><i>p</i>-Sampling</a></li>
</div>
<p id="accuracy">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_accuracy">Accuracy</a>
</p>
<div class="collapse" id="collapse_accuracy">
<li><a href="/docs/Theta/ThetaAccuracy.html">•Basic Accuracy</a></li>
<li><a href="/docs/Theta/ThetaAccuracyPlots.html">•Accuracy Plots</a></li>
<li><a href="/docs/Theta/ThetaErrorTable.html">•Relative Error Table</a></li>
<li><a href="/docs/Theta/ThetaSketchSetOpsAccuracy.html">•SetOp Accuracy</a></li>
<li><a href="/docs/Theta/AccuracyOfDifferentKUnions.html">•Unions With Different k</a></li>
</div>
<p id="size">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_size">Size</a>
</p>
<div class="collapse" id="collapse_size">
<li><a href="/docs/Theta/ThetaSize.html">•Theta Sketch Size</a></li>
</div>
<p id="speed">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_speed">Speed</a>
</p>
<div class="collapse" id="collapse_speed">
<li><a href="/docs/Theta/ThetaUpdateSpeed.html">•Update Speed</a></li>
<li><a href="/docs/Theta/ThetaMergeSpeed.html">•Merge Speed</a></li>
</div>
<p id="theta-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketch_theory">Theta Sketch Theory</a>
</p>
<div class="collapse" id="collapse_theta_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchFramework.pdf">•Theta Sketch Framework (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchEquations.pdf">•Theta Sketch Equations (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches.pdf">•DataSketches (PDF)</a></li>
<li><a href="/docs/Theta/ThetaConfidenceIntervals.html">•Confidence Intervals Notes</a></li>
<li><a href="/docs/Theta/ThetaMergingAlgorithm.html">•Merging Algorithm Notes</a></li>
<li><a href="/docs/Theta/ThetaReferences.html">•Theta References</a></li>
</div>
</div>
<p id="tuple-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_sketches">Tuple Sketches</a>
</p>
<div class="collapse" id="collapse_tuple_sketches">
<li><a href="/docs/Tuple/TupleOverview.html">•Tuple Overview</a></li>
<p id="tuple-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_examples">Tuple Examples</a>
</p>
<div class="collapse" id="collapse_tuple_examples">
<li><a href="/docs/Tuple/TupleJavaExample.html">•Tuple Java Example</a></li>
<li><a href="/docs/Tuple/TupleEngagementExample.html">•Tuple Engagement Example</a></li>
<li><a href="/docs/Tuple/TuplePigUDFs.html">•Tuple Pig UDFs</a></li>
<li><a href="/docs/Tuple/TupleHiveUDFs.html">•Tuple Hive UDFs</a></li>
</div>
</div>
</div>
<p id="most-frequent">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent">Most Frequent</a>
</p>
<div class="collapse" id="collapse_most_frequent">
<li><a href="/docs/Frequency/FrequencySketchesOverview.html">•Frequency Sketches Overview</a></li>
<p id="frequent-item-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_item_sketches">Frequent Item Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_item_sketches">
<li><a href="/docs/Frequency/FrequentItemsOverview.html">•Frequent Items Overview</a></li>
<li><a href="/docs/Frequency/FrequentItemsErrorTable.html">•Frequent Items Error Table</a></li>
<li><a href="/docs/Frequency/FrequentItemsReferences.html">•Frequent Items References</a></li>
<li><a href="/docs/Frequency/FrequentItemsPerformance.html">•Frequent Items Performance</a></li>
<p id="most-frequent-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent_examples">Most Frequent Examples</a>
</p>
<div class="collapse" id="collapse_most_frequent_examples">
<li><a href="/docs/Frequency/FrequentItemsJavaExample.html">•Frequent Items Java Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsCppExample.html">•Frequent Items C++ Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsPigUDFs.html">•Frequent Items Pig UDFs</a></li>
<li><a href="/docs/Frequency/FrequentItemsHiveUDFs.html">•Frequent Items Hive UDFs</a></li>
</div>
</div>
<p id="frequent-distinct-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_distinct_sketches">Frequent Distinct Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_distinct_sketches">
<li><a href="/docs/Frequency/FrequentDistinctTuplesSketch.html">•Frequent Distinct Tuples Sketch</a></li>
</div>
</div>
<p id="quantiles-and-histograms">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_and_histograms">Quantiles And Histograms</a>
</p>
<div class="collapse" id="collapse_quantiles_and_histograms">
<li><a href="/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">•Quantiles and Ranks Tutorial</a></li>
<li><a href="/docs/Quantiles/QuantilesOverview.html">•Quantiles Overview</a></li>
<li><a href="/docs/KLL/KLLSketch.html">•KLL Floats sketch</a></li>
<li><a href="/docs/KLL/KLLAccuracyAndSize.html">•KLL Sketch Accuracy and Size</a></li>
<li><a href="/docs/REQ/ReqSketch.html">•REQ Floats sketch</a></li>
<li><a href="/docs/Quantiles/OrigQuantilesSketch.html">•Original QuantilesSketch</a></li>
<p id="quantiles-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_examples">Quantiles Examples</a>
</p>
<div class="collapse" id="collapse_quantiles_examples">
<li><a href="/docs/Quantiles/QuantilesJavaExample.html">•Quantiles Sketch Java Example</a></li>
<li><a href="/docs/KLL/KLLCppExample.html">•KLL Quantiles Sketch C++ Example</a></li>
<li><a href="/docs/Quantiles/QuantilesPigUDFs.html">•Quantiles Sketch Pig UDFs</a></li>
<li><a href="/docs/Quantiles/QuantilesHiveUDFs.html">•Quantiles Sketch Hive UDFs</a></li>
</div>
<p id="quantiles-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_studies">Quantiles Studies</a>
</p>
<div class="collapse" id="collapse_quantiles_studies">
<li><a href="/docs/QuantilesStudies/DruidApproxHistogramStudy.html">•Druid Approximate Histogram</a></li>
<li><a href="/docs/QuantilesStudies/MomentsSketchStudy.html">•Moments Sketch Study</a></li>
<li><a href="/docs/QuantilesStudies/QuantilesStreamAStudy.html">•Quantiles StreamA Study</a></li>
<li><a href="/docs/QuantilesStudies/ExactQuantiles.html">•Exact Quantiles for Studies</a></li>
</div>
<p id="quantiles-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_sketch_theory">Quantiles Sketch Theory</a>
</p>
<div class="collapse" id="collapse_quantiles_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/Quantiles_KLL.pdf">•Optimal Quantile Approximation in Streams</a></li>
<li><a href="/docs/Quantiles/QuantilesReferences.html">•Quantiles References</a></li>
</div>
</div>
<p id="sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling">Sampling</a>
</p>
<div class="collapse" id="collapse_sampling">
<li><a href="/docs/Sampling/ReservoirSampling.html">•Reservoir Sampling</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPerformance.html">•Reservoir Sampling Performance</a></li>
<li><a href="/docs/Sampling/VarOptSampling.html">•VarOpt Sampling</a></li>
<p id="sampling-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling_examples">Sampling Examples</a>
</p>
<div class="collapse" id="collapse_sampling_examples">
<li><a href="/docs/Sampling/ReservoirSamplingJava.html">•Reservoir Sampling Java Example</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPigUDFs.html">•Reservoir Sampling Pig UDFs</a></li>
<li><a href="/docs/Sampling/VarOptSamplingJava.html">•VarOpt Sampling Java Example</a></li>
<li><a href="/docs/Sampling/VarOptPigUDFs.html">•VarOpt Sampling Pig UDFs</a></li>
</div>
</div>
</div>
<p id="system-integrations">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_system_integrations">System Integrations</a>
</p>
<div class="collapse" id="collapse_system_integrations">
<li><a href="/docs/SystemIntegrations/ApacheDruidIntegration.html">•Using Sketches in ApacheDruid</a></li>
<li><a href="/docs/SystemIntegrations/ApacheHiveIntegration.html">•Using Sketches in Apache Hive</a></li>
<li><a href="/docs/SystemIntegrations/ApachePigIntegration.html">•Using Sketches in Apache Pig</a></li>
<li><a href="/docs/SystemIntegrations/PostgreSQLIntegration.html">•Using Sketches in PostgreSQL</a></li>
</div>
<p id="community">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_community">Community</a>
</p>
<div class="collapse" id="collapse_community">
<li><a href="/docs/Community/index.html">•Community</a></li>
<li><a href="/docs/Community/Downloads.html">•Downloads</a></li>
<li><a href="/docs/Community/NewCommitterProcess.html">•Committer Process</a></li>
<li><a href="/docs/Community/ReleaseProcessForCppComponents.html">•Release Process For CPP Components</a></li>
<li><a href="/docs/Community/ReleaseProcessForJavaComponents.html">•Release Process For Java Components</a></li>
<li><a href="/docs/Community/Transitioning.html">•Transitioning from prior GitHub Site</a></li>
</div>
<p id="research">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_research">Research</a>
</p>
<div class="collapse" id="collapse_research">
<li><a href="/docs/Community/Research.html">•Research</a></li>
</div>
</div>
<!-- End _includes/toc.html -->
<!-- Start _includes/tocScript.html -->
<script>
(function () {
var findLineItem = function (path) {
return document.querySelector(`#toc [href="${path}"]`);
};
function findNavItem(path) {
return document.querySelector(`.nav [href="${path}"]`);
}
var highlighLineItem = function (element) {
element.classList.add('highlight');
};
var checkHasClass = function (element, className) {
return element.className.split(' ').find(function (item) { return item === className || '' })
}
var findAllCollapseParents = function (element) {
var collapseMenus = [];
var elementPointer = element;
while (elementPointer !== document.body) {
if (checkHasClass(elementPointer, 'collapse')) {
collapseMenus.push(elementPointer);
}
elementPointer = elementPointer.parentElement
}
return collapseMenus
};
var openMenuItem = function (element) {
// $(element).collapse('show') would start a transition, adding `in` class instead.
element.classList.add('in');
};
var openAllFromList = function (elementList) {
elementList.forEach(openMenuItem);
};
var highlightAndOpenMenu = function () {
// Highlight & expand nav item in the TOC
var currentLineItem = findLineItem(document.location.pathname);
highlighLineItem(currentLineItem);
openAllFromList(findAllCollapseParents(currentLineItem));
// Highlight nav item in top navigation
highlighLineItem(findNavItem(document.location.pathname));
};
$(highlightAndOpenMenu);
}());
</script>
<!-- End _includes/tocScript.html -->
</div>
<!-- End ToC Block -->
<div class="col-md-9 doc-content">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<h1 id="sketches-by-component-repository">Sketches by <a href="https://github.com/apache?utf8=%E2%9C%93&amp;q=datasketches">Component Repository</a></h1>
<p>The DataSketches Library is organized into the following repository groups:</p>
<h2 id="java">Java</h2>
<h3 id="datasketches-java">datasketches-java</h3>
<p>This repository has the core-java sketching classes, which are leveraged by some of the other repositories. <br />
This repository has no external dependencies outside of the DataSketches/memory repository, Java and TestNG for unit tests.
This code is versioned and the latest release can be obtained from
<a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a>.</p>
<p><b>High-level Repositories Structure</b></p>
<table>
<thead>
<tr>
<th>Sketches-core Packages.</th>
<th>Package Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>org.apache.datasketches</td>
<td>Common functions and utilities</td>
</tr>
<tr>
<td>org.apache.datasketches.cpc</td>
<td>New Unique Counting Sketch with better accuracy per size than HLL</td>
</tr>
<tr>
<td>org.apache.datasketches.fdt</td>
<td>Frequent Distinct Tuples Sketch.</td>
</tr>
<tr>
<td>org.apache.datasketches.frequencies</td>
<td>Frequent Item Sketches, for both longs and generics</td>
</tr>
<tr>
<td>org.apache.datasketches.hash</td>
<td>The 128-bit MurmurHash3 and adaptors</td>
</tr>
<tr>
<td>org.apache.datasketches.hll</td>
<td>Unique counting HLL sketches for both heap and off-heap.</td>
</tr>
<tr>
<td>org.apache.datasketches.hllmap</td>
<td>The (HLL) Unique Count Map Sketch</td>
</tr>
<tr>
<td>org.apache.datasketches.kll</td>
<td>Quantiles sketch with better accuracy per size than the standard quantiles sketch. Includes PMF, CDF functions, for floats, doubles. On-heap &amp; off-heap.</td>
</tr>
<tr>
<td>org.apache.datasketches.quantiles</td>
<td>Standard Quantiles sketch, plus PMF and CDF functions, for doubles and generics. On-heap &amp; off-heap.</td>
</tr>
<tr>
<td>org.apache.datasketches.req</td>
<td>Relative Error Quantiles (REQ) sketch, plus PMF and CDF functions for floats, on-heap. Extremely high accuracy for very high ranks (e.g., 99.999%ile), or very low ranks (e.g., .00001%ile.</td>
</tr>
<tr>
<td>org.apache.datasketches.sampling</td>
<td>Weighted and uniform reservoir sampling with generics</td>
</tr>
<tr>
<td>org.apache.datasketches.theta</td>
<td>Unique counting Theta Sketches for both on-heap &amp; off-heap</td>
</tr>
<tr>
<td>org.apache.datasketches.tuple</td>
<td>Tuple sketches for both primitives and generics</td>
</tr>
<tr>
<td>org.apache.datasketches.tuple.adouble</td>
<td>A Tuple sketch with a Summary of a single double</td>
</tr>
<tr>
<td>org.apache.datasketches.tuple.aninteger</td>
<td>A Tuple sketch with a Summary of a single integer</td>
</tr>
<tr>
<td>org.apache.datasketches.tuple.Strings</td>
<td>A Tuple sketch with a Summary of an array of Strings</td>
</tr>
</tbody>
</table>
<h3 id="datasketches-memory">datasketches-memory</h3>
<p>This code is versioned and the latest release can be obtained from
<a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a>.</p>
<table>
<thead>
<tr>
<th>Memory Packages</th>
<th>Package Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>org.apache.datasketches.memory</td>
<td>Low level, high-performance Memory data-structure management primarily for off-heap.</td>
</tr>
</tbody>
</table>
<h3 id="datasketches-hive">datasketches-hive</h3>
<p>This repository contains Hive UDFs and UDAFs for use within Hadoop grid enviornments.
This code has dependencies on sketches-core as well as Hadoop and Hive.
Users of this code are advised to use Maven to bring in all the required dependencies.
This code is versioned and the latest release can be obtained from
<a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a>.</p>
<table>
<thead>
<tr>
<th>Sketches-hive Packages</th>
<th>Package Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>org.apache.datasketches.hive.cpc</td>
<td>Hive UDF and UDAFs for CPC sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.hive.frequencies</td>
<td>Hive UDF and UDAFs for Frequent Items sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.hive.hll</td>
<td>Hive UDF and UDAFs for HLL sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.hive.kll</td>
<td>Hive UDF and UDAFs for KLL sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.hive.quantiles</td>
<td>Hive UDF and UDAFs for Quantiles sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.hive.theta</td>
<td>Hive UDF and UDAFs for Theta sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.hive.tuple</td>
<td>Hive UDF and UDAFs for Tuple sketches</td>
</tr>
</tbody>
</table>
<h3 id="datasketches-pig">datasketches-pig</h3>
<p>This repository contains Pig User Defined Functions (UDF) for use within Hadoop grid environments.
This code has dependencies on sketches-core as well as Hadoop and Pig.
Users of this code are advised to use Maven to bring in all the required dependencies.
This code is versioned and the latest release can be obtained from
<a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a>.</p>
<table>
<thead>
<tr>
<th>Sketches-pig Packages</th>
<th>Package Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>org.apache.datasketches.pig.cpc</td>
<td>Pig UDFs for CPC sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.frequencies</td>
<td>Pig UDFs for Frequent Items sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.hash</td>
<td>Pig UDFs for MurmerHash3</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.hll</td>
<td>Pig UDFs for HLL sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.kll</td>
<td>Pig UDFs for KLL sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.quantiles</td>
<td>Pig UDFs for Quantiles sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.sampling.</td>
<td>Pig UDFs for Sampling sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.theta</td>
<td>Pig UDFs for Theta sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.pig.tuple</td>
<td>Pig UDFs for Tuple sketches</td>
</tr>
</tbody>
</table>
<h3 id="datasketches-characterization">datasketches-characterization</h3>
<p>This relatively new repository is for Java and C++ code that we use to characterize the accuracy and speed performance of the sketches in
the library and is constantly being updated. Examples of the job command files used for various tests can be found in the src/main/resources directory.
Some of these tests can run for hours depending on its configuration. This component is not formally released and code must be obtained from
the <a href="https://github.com/apache/datasketches-characterization">GitHub site</a>.</p>
<table>
<thead>
<tr>
<th>Characterization Packages</th>
<th>Package Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>org.apache.datasketches.characterization</td>
<td>Common functions and utilities</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.concurrent</td>
<td>Concurrent Theta Sketch</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.cpc</td>
<td>Compressed Probabilistic Counting Sketch</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.fdt</td>
<td>Frequent Distinct Tuples Sketch</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.frequencies</td>
<td>Frequent Items Sketches</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.hash</td>
<td>Hash function performance</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.hll</td>
<td>HyperLogLog Sketcch</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.memory</td>
<td>Memory performance</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.quantiles</td>
<td>Quantiles performance</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.theta</td>
<td>Theta Sketch</td>
</tr>
<tr>
<td>org.apache.datasketches.characterization.uniquecount</td>
<td>Base Profiles for Unique Counting Sketches</td>
</tr>
</tbody>
</table>
<h3 id="datasketches-server">datasketches-server</h3>
<p>This is a new repository for our experimental docker/container server that enables easy access to the core sketches in the library via HTTP.
This component is not formally released and code must be obtained from
the <a href="https://github.com/apache/datasketches-server">GitHub site</a>.</p>
<h4 id="c-characterizations">C++ Characterizations</h4>
<ul>
<li>CPC</li>
<li>Frequent Items</li>
<li>HLL</li>
<li>KLL</li>
<li>Theta</li>
</ul>
<h3 id="datasketches-vector">datasketches-vector</h3>
<p>This component implements the <a href="/docs/Community/Research.html">Frequent Directions Algorithm</a> [GLP16]. It is still experimental in that the theoretical work has not yet supplied a suitable measure of error for production work. It can be used as is, but it will not go through a formal Apache Release until we can find a way to provide better error properties. It has a dependence on the Memory component.
This component is not formally released and code must be obtained from
the <a href="https://github.com/apache/datasketches-vector">GitHub site</a>.</p>
<h2 id="c-and-python">C++ and Python</h2>
<h3 id="datasketches-cpp">datasketches-cpp</h3>
<p>This is the evolving C++ implementations of the same sketches that are available in Java.
These implementations are <em>binary compatible</em> with their counterparts in Java.
In other words, a sketch created and stored in C++ can be opened and read in Java and visa-versa.
This code is versioned and the latest release can be obtained from
<a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a>.</p>
<p>This site also has our <a href="https://github.com/apache/datasketches-cpp/tree/master/python">Python adaptors</a> that basically wrap the C++ implementations, making the high performance C++ implementations available from Python.</p>
<h3 id="datasketches-postgresql">datasketches-postgresql</h3>
<p>This site provides the postgres-specific adaptors that wrap the C++ implementations making
them available to the PostgreSQL database users. PostgreSQL users should download the PostgreSQL extension from <a href="https://pgxn.org/dist/datasketches/">pgxn.org</a>. For examples refer to the README on the component site. This code is versioned and the latest release can be obtained from
<a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a>.</p>
</div> <!-- End content -->
</div> <!-- End row -->
</div> <!-- End Container -->
<!-- Start _include/page_footer.html -->
<footer class="ds-footer">
<div class="container">
<div class="text-center">
<p>
<div>Copyright © 2024 <a href="https://www.apache.org">Apache Software Foundation</a>,
Licensed under the Apache License, Version 2.0. All Rights Reserved.
| <a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy Policy</a><br/>
Apache DataSketches, Apache, the Apache feather logo, and the Apache DataSketches project logos are trademarks of The Apache Software Foundation.<br/>
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
</div>
</p>
</div>
</div>
</footer>
<!-- End _include/page_footer.html -->
</body>
</html>
<!-- End _layouts/doc_page.html-->