blob: f6007bc2507e1e4217b9fc1ff78a9166ab16aaa5 [file] [log] [blame]
<!DOCTYPE html>
<!-- Start _layouts/doc_page.html-->
<html lang="en">
<head>
<!-- Start _include/site_head.html -->
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="datasketches">
<title>DataSketches | </title>
<link rel="shortcut icon" href="/img/favicon.png">
<!-- original source: https://maxcdn.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.min.css -->
<link rel="stylesheet" href="/css/font-awesome.min.css">
<!-- original source: https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css -->
<link rel="stylesheet" href="/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/fonts.css" type="text/css">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/css/header.css">
<link rel="stylesheet" href="/css/footer.css">
<link rel="stylesheet" href="/css/syntax.css">
<link rel="stylesheet" href="/css/docs.css">
<script type="text/x-mathjax-config">
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]},showMathMenu:false,showMathMenuMSIE:false,showProcessingMessages:false});
</script>
<!-- original source: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMX_HTML-full -->
<script type="text/javascript" src="/js/MathJax.js?config=TeX-AMS_HTML"></script>
<!-- original source: https://code.jquery.com/jquery.min.js -->
<script src="/js/jquery.min.js"></script>
<!-- original source: https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js -->
<script src="/js/bootstrap.min.js"></script> <!-- 3.2.0-->
<!-- End _include/site_head.html -->
</head>
<body>
<!-- Start _include/nav_bar.html -->
<div class="navbar navbar-inverse navbar-static-top ds-nav">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="/" style="padding-top: 0px; padding-bottom: 0px;">
<span class="ds-small-h-logo"></span></a>
</div>
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav navbar-right">
<li>
<a href="/docs/Background/TheChallenge.html">
<span class="fa fa-info-circle"></span> DOCUMENTATION</a>
</li>
<li>
<a href="/docs/Community/Downloads.html">
<span class="fa fa-download"></span> DOWNLOAD</a>
</li>
<!--
<li>
<a href="/docs/Architecture/Components.html">
<span class="fa fa-github"></span> GITHUB</a>
</li>
-->
<li>
<a href="/docs/Community/Research.html">
<span class="fa fa-paper-plane"></span> RESEARCH</a>
</li>
<li>
<a href="/docs/Community/index.html" style="padding-top: 0; padding-bottom: 0;">
<img class="ds-small-man" src="/img/datasketches-ManWhite.svg"/>COMMUNITY</a>
</li>
<li>
<ul class="nav navbar-nav navbar-right ds-nav">
<li class="dropdown ds-nav" >
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false" style="padding-top: 0; padding-bottom: 0;"><img class="apache-logo" src="/img/feather.svg"/>Apache <span class="caret"></span></a>
<ul class="dropdown-menu ds-nav">
<li><a href="https://www.apache.org/" target="_blank">Foundation</a></li>
<li><a href="https://www.apache.org/events/current-event" target="_blank">Events</a></li>
<li><a href="https://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank">Privacy Policy</a></li>
<li><a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
<li><a href="https://www.apache.org/security/" target="_blank">Security</a></li>
<li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<!-- End _include/nav_bar.html -->
<!-- Start _include/javadocs.html -->
<div class="ds-header">
<div class="container">
<h4>API Snapshots:
<a href="https://apache.github.io/datasketches-java/4.2.0/">Java Core</a>,
<a href="https://apache.github.io/datasketches-cpp/5.0.0/">C++ Core</a>,
<a href="https://apache.github.io/datasketches-python/main/">Python</a>,
<a href="https://apache.github.io/datasketches-memory/master/">Memory</a>,
<a href="/api/pig/snapshot/apidocs/index.html">Pig</a>,
<a href="/api/hive/snapshot/apidocs/index.html">Hive</a>,
</h4>
</div>
</div>
<!-- End _include/javadocs.html -->
<div class="container">
<div class="row">
<!-- Start ToC Block -->
<div class="col-md-3">
<div class="searchbox" style="position:relative">
<gcse:searchbox-only></gcse:searchbox-only>
</div>
<!-- Start _includes/toc.html -->
<!-- Computer Generated File, Do Not Edit! -->
<link rel="stylesheet" href="/css/toc.css">
<div id="toc" class="nav toc hidden-print">
<p id="background">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_background">Background</a>
</p>
<div class="collapse" id="collapse_background">
<li><a href="/docs/Background/TheChallenge.html">•The Challenge</a></li>
<li><a href="/docs/Background/SketchOrigins.html">•Sketch Origins</a></li>
<li><a href="/docs/Background/SketchElements.html">•Sketch Elements</a></li>
<li><a href="/docs/Background/Presentations.html">•Presentations</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches_deck.pdf">•Overview Slide Deck</a></li>
</div>
<p id="architecture-and-design">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_architecture_and_design">Architecture And Design</a>
</p>
<div class="collapse" id="collapse_architecture_and_design">
<li><a href="/docs/Architecture/MajorSketchFamilies.html">•The Major Sketch Families</a></li>
<li><a href="/docs/Architecture/LargeScale.html">•Large Scale Computing</a></li>
<li><a href="/docs/Architecture/KeyFeatures.html">•Key Features</a></li>
<li><a href="/docs/Architecture/SketchFeaturesMatrix.html">•Sketch Features Matrix</a></li>
<li><a href="/docs/Architecture/Components.html">•Components</a></li>
<li><a href="/docs/Architecture/SketchesByComponent.html">•Sketches by Component</a></li>
<li><a href="/docs/Architecture/SketchCriteria.html">•Sketch Criteria</a></li>
<p id="memory-component">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_memory_component">Memory Component</a>
</p>
<div class="collapse" id="collapse_memory_component">
<li><a href="/docs/Memory/MemoryComponent.html">•Memory Component</a></li>
<li><a href="/docs/Memory/MemoryPerformance.html">•Memory Component Performance</a></li>
</div>
<li><a href="/docs/Architecture/OrderSensitivity.html">•Notes on Order Sensitivity</a></li>
<li><a href="/docs/Architecture/Concurrency.html">•Notes on Concurrency</a></li>
</div>
<p id="sketch-families">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sketch_families">Sketch Families</a>
</p>
<div class="collapse" id="collapse_sketch_families">
<p id="distinct-counting">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_distinct_counting">Distinct Counting</a>
</p>
<div class="collapse" id="collapse_distinct_counting">
<li><a href="/docs/DistinctCountFeaturesMatrix.html">•Features Matrix</a></li>
<li><a href="/docs/DistinctCountMeritComparisons.html">•Figures-of-Merit Comparison</a></li>
<p id="cpc-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_sketches">CPC Sketches</a>
</p>
<div class="collapse" id="collapse_cpc_sketches">
<li><a href="/docs/CPC/CPC.html">•CPC Sketch</a></li>
<li><a href="/docs/CPC/CpcPerformance.html">•CPC Sketch Performance</a></li>
<p id="cpc-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_examples">CPC Examples</a>
</p>
<div class="collapse" id="collapse_cpc_examples">
<li><a href="/docs/CPC/CpcJavaExample.html">•CPC Sketch Java Example</a></li>
<li><a href="/docs/CPC/CpcCppExample.html">•CPC Sketch C++ Example</a></li>
<li><a href="/docs/CPC/CpcPigExample.html">•CPC Sketch Pig UDFs</a></li>
<li><a href="/docs/CPC/CpcHiveExample.html">•CPC Sketch Hive UDFs</a></li>
</div>
</div>
<p id="hyperloglog-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hyperloglog_sketches">HyperLogLog Sketches</a>
</p>
<div class="collapse" id="collapse_hyperloglog_sketches">
<li><a href="/docs/HLL/HLL.html">•HLL Sketch</a></li>
<li><a href="/docs/HLL/HllMap.html">•HLL Map Sketch</a></li>
<p id="hll-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_examples">HLL Examples</a>
</p>
<div class="collapse" id="collapse_hll_examples">
<li><a href="/docs/HLL/HllJavaExample.html">•HLL Sketch Java Example</a></li>
<li><a href="/docs/HLL/HllCppExample.html">•HLL Sketch C++ Example</a></li>
<li><a href="/docs/HLL/HllPigUDFs.html">•HLL Sketch Pig UDFs</a></li>
<li><a href="/docs/HLL/HllHiveUDFs.html">•HLL Sketch Hive UDFs</a></li>
</div>
<p id="hll-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_studies">HLL Studies</a>
</p>
<div class="collapse" id="collapse_hll_studies">
<li><a href="/docs/HLL/HllPerformance.html">•HLL Sketch Performance</a></li>
<li><a href="/docs/HLL/Hll_vs_CS_Hllpp.html">•HLL vs Clearspring HLL++</a></li>
<li><a href="/docs/HLL/HllSketchVsDruidHyperLogLogCollector.html">•HLL Sketch vs Druid HyperLogLogCollector</a></li>
</div>
</div>
<p id="theta-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketches">Theta Sketches</a>
</p>
<div class="collapse" id="collapse_theta_sketches">
<li><a href="/docs/Theta/ThetaSketchFramework.html">•Theta Sketch Framework</a></li>
<p id="theta-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_examples">Theta Examples</a>
</p>
<div class="collapse" id="collapse_theta_examples">
<li><a href="/docs/Theta/ConcurrentThetaSketch.html">•Concurrent Theta Sketch</a></li>
<li><a href="/docs/Theta/ThetaJavaExample.html">•Theta Sketch Java Example</a></li>
<li><a href="/docs/Theta/ThetaSparkExample.html">•Theta Sketch Spark Example</a></li>
<li><a href="/docs/Theta/ThetaPigUDFs.html">•Theta Sketch Pig UDFs</a></li>
<li><a href="/docs/Theta/ThetaHiveUDFs.html">•Theta Sketch Hive UDFs</a></li>
</div>
<p id="kmv-tutorial">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_kmv_tutorial">KMV Tutorial</a>
</p>
<div class="collapse" id="collapse_kmv_tutorial">
<li><a href="/docs/Theta/InverseEstimate.html">•The Inverse Estimate</a></li>
<li><a href="/docs/Theta/KMVempty.html">•Empty Sketch</a></li>
<li><a href="/docs/Theta/KMVfirstEst.html">•First Estimator</a></li>
<li><a href="/docs/Theta/KMVbetterEst.html">•Better Estimator</a></li>
<li><a href="/docs/Theta/KMVrejection.html">•Rejection Rules</a></li>
<li><a href="/docs/Theta/KMVupdateVkth.html">•Update V(kth) Rule</a></li>
</div>
<p id="set-operations-and-p-sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_set_operations_and_p-sampling">Set Operations and P-sampling</a>
</p>
<div class="collapse" id="collapse_set_operations_and_p-sampling">
<li><a href="/docs/Theta/ThetaSketchSetOps.html">•Set Operations</a></li>
<li><a href="/docs/Theta/ThetaSetOpsCornerCases.html">•Model & Test Set Operations</a></li>
<li><a href="/docs/Theta/ThetaPSampling.html"><i>p</i>-Sampling</a></li>
</div>
<p id="accuracy">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_accuracy">Accuracy</a>
</p>
<div class="collapse" id="collapse_accuracy">
<li><a href="/docs/Theta/ThetaAccuracy.html">•Basic Accuracy</a></li>
<li><a href="/docs/Theta/ThetaAccuracyPlots.html">•Accuracy Plots</a></li>
<li><a href="/docs/Theta/ThetaErrorTable.html">•Relative Error Table</a></li>
<li><a href="/docs/Theta/ThetaSketchSetOpsAccuracy.html">•SetOp Accuracy</a></li>
<li><a href="/docs/Theta/AccuracyOfDifferentKUnions.html">•Unions With Different k</a></li>
</div>
<p id="size">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_size">Size</a>
</p>
<div class="collapse" id="collapse_size">
<li><a href="/docs/Theta/ThetaSize.html">•Theta Sketch Size</a></li>
</div>
<p id="speed">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_speed">Speed</a>
</p>
<div class="collapse" id="collapse_speed">
<li><a href="/docs/Theta/ThetaUpdateSpeed.html">•Update Speed</a></li>
<li><a href="/docs/Theta/ThetaMergeSpeed.html">•Merge Speed</a></li>
</div>
<p id="theta-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketch_theory">Theta Sketch Theory</a>
</p>
<div class="collapse" id="collapse_theta_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchFramework.pdf">•Theta Sketch Framework (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchEquations.pdf">•Theta Sketch Equations (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches.pdf">•DataSketches (PDF)</a></li>
<li><a href="/docs/Theta/ThetaConfidenceIntervals.html">•Confidence Intervals Notes</a></li>
<li><a href="/docs/Theta/ThetaMergingAlgorithm.html">•Merging Algorithm Notes</a></li>
<li><a href="/docs/Theta/ThetaReferences.html">•Theta References</a></li>
</div>
</div>
<p id="tuple-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_sketches">Tuple Sketches</a>
</p>
<div class="collapse" id="collapse_tuple_sketches">
<li><a href="/docs/Tuple/TupleOverview.html">•Tuple Overview</a></li>
<p id="tuple-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_examples">Tuple Examples</a>
</p>
<div class="collapse" id="collapse_tuple_examples">
<li><a href="/docs/Tuple/TupleJavaExample.html">•Tuple Java Example</a></li>
<li><a href="/docs/Tuple/TupleEngagementExample.html">•Tuple Engagement Example</a></li>
<li><a href="/docs/Tuple/TuplePigUDFs.html">•Tuple Pig UDFs</a></li>
<li><a href="/docs/Tuple/TupleHiveUDFs.html">•Tuple Hive UDFs</a></li>
</div>
</div>
</div>
<p id="most-frequent">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent">Most Frequent</a>
</p>
<div class="collapse" id="collapse_most_frequent">
<li><a href="/docs/Frequency/FrequencySketchesOverview.html">•Frequency Sketches Overview</a></li>
<p id="frequent-item-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_item_sketches">Frequent Item Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_item_sketches">
<li><a href="/docs/Frequency/FrequentItemsOverview.html">•Frequent Items Overview</a></li>
<li><a href="/docs/Frequency/FrequentItemsErrorTable.html">•Frequent Items Error Table</a></li>
<li><a href="/docs/Frequency/FrequentItemsReferences.html">•Frequent Items References</a></li>
<li><a href="/docs/Frequency/FrequentItemsPerformance.html">•Frequent Items Performance</a></li>
<p id="most-frequent-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent_examples">Most Frequent Examples</a>
</p>
<div class="collapse" id="collapse_most_frequent_examples">
<li><a href="/docs/Frequency/FrequentItemsJavaExample.html">•Frequent Items Java Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsCppExample.html">•Frequent Items C++ Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsPigUDFs.html">•Frequent Items Pig UDFs</a></li>
<li><a href="/docs/Frequency/FrequentItemsHiveUDFs.html">•Frequent Items Hive UDFs</a></li>
</div>
</div>
<p id="frequent-distinct-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_distinct_sketches">Frequent Distinct Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_distinct_sketches">
<li><a href="/docs/Frequency/FrequentDistinctTuplesSketch.html">•Frequent Distinct Tuples Sketch</a></li>
</div>
</div>
<p id="quantiles-and-histograms">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_and_histograms">Quantiles And Histograms</a>
</p>
<div class="collapse" id="collapse_quantiles_and_histograms">
<li><a href="/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">•Quantiles and Ranks Tutorial</a></li>
<li><a href="/docs/Quantiles/QuantilesOverview.html">•Quantiles Overview</a></li>
<li><a href="/docs/KLL/KLLSketch.html">•KLL Floats sketch</a></li>
<li><a href="/docs/KLL/KLLAccuracyAndSize.html">•KLL Sketch Accuracy and Size</a></li>
<li><a href="/docs/REQ/ReqSketch.html">•REQ Floats sketch</a></li>
<li><a href="/docs/Quantiles/OrigQuantilesSketch.html">•Original QuantilesSketch</a></li>
<p id="quantiles-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_examples">Quantiles Examples</a>
</p>
<div class="collapse" id="collapse_quantiles_examples">
<li><a href="/docs/Quantiles/QuantilesJavaExample.html">•Quantiles Sketch Java Example</a></li>
<li><a href="/docs/KLL/KLLCppExample.html">•KLL Quantiles Sketch C++ Example</a></li>
<li><a href="/docs/Quantiles/QuantilesPigUDFs.html">•Quantiles Sketch Pig UDFs</a></li>
<li><a href="/docs/Quantiles/QuantilesHiveUDFs.html">•Quantiles Sketch Hive UDFs</a></li>
</div>
<p id="quantiles-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_studies">Quantiles Studies</a>
</p>
<div class="collapse" id="collapse_quantiles_studies">
<li><a href="/docs/QuantilesStudies/DruidApproxHistogramStudy.html">•Druid Approximate Histogram</a></li>
<li><a href="/docs/QuantilesStudies/MomentsSketchStudy.html">•Moments Sketch Study</a></li>
<li><a href="/docs/QuantilesStudies/QuantilesStreamAStudy.html">•Quantiles StreamA Study</a></li>
<li><a href="/docs/QuantilesStudies/ExactQuantiles.html">•Exact Quantiles for Studies</a></li>
</div>
<p id="quantiles-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_sketch_theory">Quantiles Sketch Theory</a>
</p>
<div class="collapse" id="collapse_quantiles_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/Quantiles_KLL.pdf">•Optimal Quantile Approximation in Streams</a></li>
<li><a href="/docs/Quantiles/QuantilesReferences.html">•Quantiles References</a></li>
</div>
</div>
<p id="sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling">Sampling</a>
</p>
<div class="collapse" id="collapse_sampling">
<li><a href="/docs/Sampling/ReservoirSampling.html">•Reservoir Sampling</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPerformance.html">•Reservoir Sampling Performance</a></li>
<li><a href="/docs/Sampling/VarOptSampling.html">•VarOpt Sampling</a></li>
<p id="sampling-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling_examples">Sampling Examples</a>
</p>
<div class="collapse" id="collapse_sampling_examples">
<li><a href="/docs/Sampling/ReservoirSamplingJava.html">•Reservoir Sampling Java Example</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPigUDFs.html">•Reservoir Sampling Pig UDFs</a></li>
<li><a href="/docs/Sampling/VarOptSamplingJava.html">•VarOpt Sampling Java Example</a></li>
<li><a href="/docs/Sampling/VarOptPigUDFs.html">•VarOpt Sampling Pig UDFs</a></li>
</div>
</div>
</div>
<p id="system-integrations">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_system_integrations">System Integrations</a>
</p>
<div class="collapse" id="collapse_system_integrations">
<li><a href="/docs/SystemIntegrations/ApacheDruidIntegration.html">•Using Sketches in ApacheDruid</a></li>
<li><a href="/docs/SystemIntegrations/ApacheHiveIntegration.html">•Using Sketches in Apache Hive</a></li>
<li><a href="/docs/SystemIntegrations/ApachePigIntegration.html">•Using Sketches in Apache Pig</a></li>
<li><a href="/docs/SystemIntegrations/PostgreSQLIntegration.html">•Using Sketches in PostgreSQL</a></li>
</div>
<p id="community">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_community">Community</a>
</p>
<div class="collapse" id="collapse_community">
<li><a href="/docs/Community/index.html">•Community</a></li>
<li><a href="/docs/Community/Downloads.html">•Downloads</a></li>
<li><a href="/docs/Community/NewCommitterProcess.html">•Committer Process</a></li>
<li><a href="/docs/Community/ReleaseProcessForCppComponents.html">•Release Process For CPP Components</a></li>
<li><a href="/docs/Community/ReleaseProcessForJavaComponents.html">•Release Process For Java Components</a></li>
<li><a href="/docs/Community/Transitioning.html">•Transitioning from prior GitHub Site</a></li>
</div>
<p id="research">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_research">Research</a>
</p>
<div class="collapse" id="collapse_research">
<li><a href="/docs/Community/Research.html">•Research</a></li>
</div>
</div>
<!-- End _includes/toc.html -->
<!-- Start _includes/tocScript.html -->
<script>
(function () {
var findLineItem = function (path) {
return document.querySelector(`#toc [href="${path}"]`);
};
function findNavItem(path) {
return document.querySelector(`.nav [href="${path}"]`);
}
var highlighLineItem = function (element) {
element.classList.add('highlight');
};
var checkHasClass = function (element, className) {
return element.className.split(' ').find(function (item) { return item === className || '' })
}
var findAllCollapseParents = function (element) {
var collapseMenus = [];
var elementPointer = element;
while (elementPointer !== document.body) {
if (checkHasClass(elementPointer, 'collapse')) {
collapseMenus.push(elementPointer);
}
elementPointer = elementPointer.parentElement
}
return collapseMenus
};
var openMenuItem = function (element) {
// $(element).collapse('show') would start a transition, adding `in` class instead.
element.classList.add('in');
};
var openAllFromList = function (elementList) {
elementList.forEach(openMenuItem);
};
var highlightAndOpenMenu = function () {
// Highlight & expand nav item in the TOC
var currentLineItem = findLineItem(document.location.pathname);
highlighLineItem(currentLineItem);
openAllFromList(findAllCollapseParents(currentLineItem));
// Highlight nav item in top navigation
highlighLineItem(findNavItem(document.location.pathname));
};
$(highlightAndOpenMenu);
}());
</script>
<!-- End _includes/tocScript.html -->
</div>
<!-- End ToC Block -->
<div class="col-md-9 doc-content">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<h1 id="apache-project-maturity-model-assessment-for-datasketches-draft">Apache Project Maturity Model Assessment for DataSketches (DRAFT)</h1>
<h2 id="overview">Overview</h2>
<p>This is an assessment of the DataSketches podling’s maturity, meant to help inform the decision (of the mentors, community, Incubator PMC and ASF Board of Directors) to graduate it as a top-level Apache project.</p>
<p>It is based on the ASF project maturity model at <a href="https://community.apache.org/apache-way/apache-project-maturity-model.html">https://community.apache.org/apache-way/apache-project-maturity-model.html</a>.</p>
<h2 id="status-of-this-document">Status of this document</h2>
<p>There is a parallel <em>[DISCUSS] DataSketches Maturity</em> on the <a href="mailto:dev@datasketches.apache.org">dev@datasketches.apache.org</a> mail list to enable discussion of any issues. If there is disagreement on an issue, it will be marked here as under discussion.</p>
<h2 id="code">Code</h2>
<h3 id="cd10">CD10</h3>
<blockquote>
<p>The project produces Open Source software, for distribution to the public at no charge.</p>
</blockquote>
<h4 id="yes">Yes.</h4>
<ul>
<li>The project source code is licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, version 2.0</a>.</li>
</ul>
<h3 id="cd20">CD20</h3>
<blockquote>
<p>The project’s code is easily discoverable and publicly accessible.</p>
</blockquote>
<h4 id="yes-1">Yes.</h4>
<ul>
<li>See <a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a>.</li>
</ul>
<h3 id="cd30">CD30</h3>
<blockquote>
<p>The code can be built in a reproducible way using widely available standard tools.</p>
</blockquote>
<h4 id="yes-2">Yes.</h4>
<ul>
<li>See the README document on each of the projects repositories<sup>1</sup>. <a href="https://github.com/apache/incubator-datasketches-java/blob/master/README.md">For Example.</a>.</li>
</ul>
<h3 id="cd40">CD40</h3>
<blockquote>
<p>The full history of the project’s code is available via a source code control system, in a way that allows any released version to be recreated.</p>
</blockquote>
<h4 id="yes-3">Yes.</h4>
<ul>
<li>We use Git/GitHub for source code, documents, and the <a href="https://datasketches.apache.org">website</a>.</li>
<li>For all the Apache DataSketches repositories subject to being released<sup>1</sup>, releases are cut from the respective repository.</li>
<li>All releases are tagged and in separate, easy-to-locate branches.</li>
</ul>
<h3 id="cd50">CD50</h3>
<blockquote>
<p>The provenance of each line of code is established via the source code control system,
in a reliable way based on strong authentication of the committer.
When third-party contributions are committed, commit messages provide reliable
information about the code provenance.</p>
</blockquote>
<h4 id="yes-4">Yes.</h4>
<ul>
<li>The project uses Apache managed GitHub repositories, ensuring provenance of each line of code to a committer.</li>
<li>Third party contributions are accepted in accordance with the <a href="href=&quot;https://www.apache.org/legal/3party.html">Apache Third-Party Licensing Policy</a> only.</li>
</ul>
<h2 id="licenses-and-copyright">Licenses and Copyright</h2>
<h3 id="lc10">LC10</h3>
<blockquote>
<p>The code is released under the Apache License, version 2.0.</p>
</blockquote>
<h4 id="yes-5">Yes.</h4>
<ul>
<li>See for example, <a href="https://github.com/apache/incubator-datasketches-java/blob/master/LICENSE">LICENSE</a>, which has been accepted in multiple release cycles.</li>
<li>All source code files have license headers.</li>
<li>All releases pass the Apache <em>Release Audit Tool</em> according to the <a href="https://www.apache.org/legal/src-headers.html">ASF Source Header and Copyright Notice Policy</a>.</li>
</ul>
<h3 id="lc20">LC20</h3>
<blockquote>
<p>Libraries that are mandatory dependencies of the project’s code
do not create more restrictions than the Apache License does.</p>
</blockquote>
<h4 id="yes-6">Yes.</h4>
<ul>
<li>All code dependencies have been reviewed to contain approved licenses only.</li>
</ul>
<h3 id="lc30">LC30</h3>
<blockquote>
<p>The libraries mentioned in LC20 are available as Open Source software.</p>
</blockquote>
<h4 id="yes-7">Yes.</h4>
<ul>
<li>The references to the open-source libraries mentioned in LC20 can be found in the LICENSE file on each of the repositories subject to release<sup>1</sup>.</li>
</ul>
<h3 id="lc40">LC40</h3>
<blockquote>
<p>Committers are bound by an Individual Contributor Agreement
(the <a href="https://www.apache.org/licenses/icla.txt">Apache ICLA</a>) that
defines which code they are allowed to commit and how they need to identify code that is not their own.</p>
</blockquote>
<h4 id="yes-8">Yes.</h4>
<ul>
<li>The project uses GitHub repositories managed by Apache where write access requires an Apache account and an ICLA on file.</li>
</ul>
<h3 id="lc50">LC50</h3>
<blockquote>
<p>The copyright ownership of everything that the project produces is clearly defined and documented.</p>
</blockquote>
<ul>
<li>All files in the source repository have appropriate headers (See LC10).</li>
<li>Software Grant Agreements for the initial donations and Corporate CLAs have been filed.</li>
</ul>
<h2 id="releases">Releases</h2>
<h3 id="re10">RE10</h3>
<blockquote>
<p>Releases consist of source code, distributed using standard and open archive
formats that are expected to stay readable in the long term.</p>
</blockquote>
<h4 id="yes-9">Yes.</h4>
<ul>
<li>Source releases are distributed via <a href="http://ws.apache.org/mirrors.cgi">Apache Download Mirrors</a>
and linked from the website <a href="https://datasketches.apache.org/docs/Community/Downloads.html">Downloads</a> page.</li>
</ul>
<h3 id="re20">RE20</h3>
<blockquote>
<p>Releases are approved by the project’s PMC (see CS10), in order to make them
an act of the Foundation.</p>
</blockquote>
<h4 id="yes-10">Yes.</h4>
<ul>
<li>All incubating releases have been approved by the DataSketches community, PPMC, and the IPMC, all with at least 3 IPMC votes.</li>
</ul>
<h3 id="re30">RE30</h3>
<blockquote>
<p>Releases are signed and/or distributed along with digests that can be
reliably used to validate the downloaded archives.</p>
</blockquote>
<h4 id="yes-11">Yes.</h4>
<ul>
<li>All releases are signed, and KEYS files are provided on <a href="https://dist.apache.org">dist.apache.org</a>.</li>
</ul>
<h3 id="re40">RE40</h3>
<blockquote>
<p>Convenience binaries can be distributed alongside source code but they are not
Apache Releases – they are just a convenience provided with no guarantee.</p>
</blockquote>
<h4 id="yes-12">Yes.</h4>
<ul>
<li>We distribute Java jar file bundles via <a href="https://repository.apache.org">Nexus Repository Manager</a>. These jar files include source jars as well as compiled binaries of the source code.</li>
<li>However, we discovered that we need to have a copy of these jar files also on <a href="https://dist.apache.org">dist.apache.org</a>. This was an oversight. This has now been corrected on <a href="https://dist.apache.org">dist.apache.org</a> for all current DataSketches releases where applicable as follows:
<ul>
<li>datasketches-java 1.3.0-incubating</li>
<li>datasketches-hive 1.1.0-incubating</li>
<li>datasketches-pig 1.0.0-incubating</li>
<li>datasketches-memory 1.2.0-incubating</li>
</ul>
</li>
<li>This will also be corrected for all new releases going forward.</li>
<li>In the future, we may have needs for distributions through other venues, e.g., pgxn.org, pypi, and docker; some of these may be binaries. Any such external distributions will have copies on <a href="https://dist.apache.org">dist.apache.org</a>.</li>
</ul>
<h3 id="re50">RE50</h3>
<blockquote>
<p>The release process is documented and repeatable to the extent that
someone new to the project is able to independently generate the complete
set of artifacts required for a release.</p>
</blockquote>
<h4 id="yes-13">Yes.</h4>
<ul>
<li>All committers have access to <a href="https://dist.apache.org/repos/dist/dev/incubator/datasketches/scripts/">detailed release scripts</a>.</li>
<li>As of October 24, 2020, we have successfully completed 17 Apache releases since the start of our incubation.</li>
<li>We have 3 committers that have qualified to be <em>release managers</em> and have successfully performed releases.</li>
</ul>
<h2 id="quality">Quality</h2>
<h3 id="qu10">QU10</h3>
<blockquote>
<p>The project is open and honest about the quality of its code. Various levels
of quality and maturity for various modules are natural and acceptable
as long as they are clearly communicated.</p>
</blockquote>
<h4 id="yes-14">Yes.</h4>
<ul>
<li>Bugs, various deficiencies and documentation problems come to us from many different sources. Once these bugs are made known to us we record them using the GitHub issues lists of the relevant repository<sup>1</sup>.</li>
</ul>
<h3 id="qu20">QU20</h3>
<blockquote>
<p>The project puts a very high priority on producing secure software.</p>
</blockquote>
<h3 id="yes-15">Yes.</h3>
<ul>
<li>Security issues will be treated with the highest priority.</li>
<li>We will follow the guidelines proposed by <a href="https://cve.mitre.org/about/documents.html">CVE Documents and Guidance</a> should these issues arise.</li>
</ul>
<h3 id="qu30">QU30</h3>
<blockquote>
<p>The project provides a well-documented, secure and private channel to report security issues, along with a documented way of responding to them.</p>
</blockquote>
<h4 id="yes-16">Yes.</h4>
<ul>
<li>See <a href="https://datasketches.apache.org/docs/Community/index.html">Community page, “Reporting Security Issues”</a>.</li>
</ul>
<h3 id="qu40">QU40</h3>
<blockquote>
<p>The project puts a high priority on backwards compatibility and aims to document any
incompatible changes and provide tools and documentation to help users transition to new features.</p>
</blockquote>
<h4 id="yes-17">Yes.</h4>
<ul>
<li>We define two types of backward compatibility, API, and Binary:
<ul>
<li><strong>API:</strong> To the greatest extent possible we try to maintain compatiblity with older APIs. However, some API changes are inevitable. In these cases we deprecate the older API alongside the newer recommended API for at least one major release cycle, after which the older API may be removed. This is standard policy for most industry code bases.</li>
<li><strong>Binary:</strong> Our current codebase is able to read and process older binary representations of our sketches since about 2014. This is extremely important for our users and is very high priority as our code evolves.</li>
</ul>
</li>
</ul>
<h3 id="qu50">QU50</h3>
<blockquote>
<p>The project strives to respond to documented bug reports in a timely manner.</p>
</blockquote>
<h4 id="yes-18">Yes.</h4>
<ul>
<li>We respond very quickly to bug and problem reports and have received excellent feedback from our users about our quick response.</li>
</ul>
<h2 id="community">Community</h2>
<h3 id="co10">CO10</h3>
<blockquote>
<p>The project has a well-known homepage that points to all the information
required to operate according to this maturity model.</p>
</blockquote>
<h4 id="yes-19">Yes.</h4>
<ul>
<li>See <a href="https://datasketches.apache.org">datasketches.apache.org</a>.</li>
</ul>
<h3 id="co20">CO20</h3>
<blockquote>
<p>The community welcomes contributions from anyone who acts in good
faith and in a respectful manner and adds value to the project.</p>
</blockquote>
<h4 id="yes-20">Yes.</h4>
<ul>
<li>See <a href="https://datasketches.apache.org/docs/Community/index.html">Community, Contributing topic</a>.</li>
</ul>
<h3 id="co30">CO30</h3>
<blockquote>
<p>Contributions include not only source code, but also documentation, constructive bug
reports, constructive discussions, marketing and generally anything that adds value to the project.</p>
</blockquote>
<h4 id="yes-21">Yes.</h4>
<ul>
<li>See <a href="https://datasketches.apache.org/docs/Community/index.html">Community, Contributing topic</a>.</li>
</ul>
<h3 id="co40">CO40</h3>
<blockquote>
<p>The community strives to be meritocratic and over time aims to give more rights and
responsibilities to contributors who add value to the project.</p>
</blockquote>
<h4 id="yes-22">Yes.</h4>
<ul>
<li>We have elected three new committers and have more on the way. All of these are and will be meritocracy based.</li>
</ul>
<h3 id="co50">CO50</h3>
<blockquote>
<p>The way in which contributors can be granted more rights such as commit
access or decision power is clearly documented and is the same for all contributors.</p>
</blockquote>
<h4 id="yes-23">Yes.</h4>
<ul>
<li>See <a href="https://datasketches.apache.org/docs/Community/index.html">Community, Contributing topic</a>.</li>
</ul>
<h3 id="co60">CO60</h3>
<blockquote>
<p>The community operates based on consensus of its members (see CS10) who
have decision power. Dictators, benevolent or not, are not welcome in
Apache projects.</p>
</blockquote>
<h4 id="yes-24">Yes.</h4>
<ul>
<li>We work hard to build consensus.</li>
</ul>
<h3 id="co70">CO70</h3>
<blockquote>
<p>The project strives to answer user questions in a timely manner.</p>
</blockquote>
<h4 id="yes-25">Yes.</h4>
<ul>
<li>We typically respond to issues within a few hours. These issues come to our attention through many different channels including our dev@datasketches.apache.org, users@datasketches.apache.org, and GitHub issues lists as well as the Apache Slack channels.</li>
</ul>
<h2 id="consensus-building">Consensus Building</h2>
<h3 id="cs10">CS10</h3>
<blockquote>
<p>The project maintains a public list of its contributors who have decision
power – the project’s PMC (Project Management Committee) consists of
those contributors.</p>
</blockquote>
<h4 id="yes-26">Yes.</h4>
<ul>
<li>See <a href="https://incubator.apache.org/projects/datasketches.html">DataSketches Project Incubation Status</a>.</li>
</ul>
<h3 id="cs20">CS20</h3>
<blockquote>
<p>Decisions are made by consensus among PMC members
and are documented on the project’s main communications channel.
Community opinions are taken into account but the PMC has the final word, if needed.</p>
</blockquote>
<h4 id="yes-27">Yes.</h4>
<ul>
<li>All major project decisions are documented via our <a href="mailto:dev@datasketches.apache.org">dev@datasketches.apache.org</a> mail list.</li>
</ul>
<h3 id="cs30">CS30</h3>
<blockquote>
<p>Documented voting rules are used to build consensus when discussion is not sufficient.</p>
</blockquote>
<h4 id="yes-28">Yes.</h4>
<ul>
<li>The project uses the standard ASF voting rules. Voting rules are clearly stated before the voting starts for each individual vote.</li>
</ul>
<h3 id="cs40">CS40</h3>
<blockquote>
<p>In Apache projects, vetoes are only valid for code commits and are
justified by a technical explanation, as per the Apache voting rules
defined in CS30.</p>
</blockquote>
<h4 id="yes-29">Yes.</h4>
<ul>
<li>We have had only one instance of a “-1” vote from a PPMC member on a code release. The issue was fixed and resubmitted. We support this policy.</li>
</ul>
<h3 id="cs50">CS50</h3>
<blockquote>
<p>All “important” discussions happen asynchronously in written form on the
project’s main communications channel. Offline, face-to-face or private discussions
that affect the project are also documented on that channel.</p>
</blockquote>
<h4 id="yes-30">Yes.</h4>
<ul>
<li>The project has been making important decisions on the project mailing lists. Minor decisions may occasionally happen during code reviews, which are also asynchronous and in written form. Any synchronous discussions that result in major decisions for the project are documented on our project <a href="mailto:dev@datasketches.apache.org">dev@datasketches.apache.org</a> mailing list.</li>
</ul>
<h2 id="independence">Independence</h2>
<h3 id="in10">IN10</h3>
<blockquote>
<p>The project is independent from any corporate or organizational influence.</p>
</blockquote>
<h4 id="yes-31">Yes.</h4>
<ul>
<li>Our project has committers and contributors from Yahoo, Inc.; Hypercube, Inc.; Permutive, Inc. UK;
Tableau (Salesforce, Inc.); Georgetown University, Washington, D.C.; Warwick University, UK;
UC Berkeley; Apache Druid, and other researchers and engineers from around the world.</li>
</ul>
<h3 id="in20">IN20</h3>
<blockquote>
<p>Contributors act as themselves as opposed to representatives of a corporation or organization.</p>
</blockquote>
<h4 id="yes-32">Yes.</h4>
<ul>
<li>The committers and contributors act on their own initiative without representing a corporation or organization.</li>
</ul>
<hr />
<p><sup>1</sup> List of Source Repositories that have Apache Releases:</p>
<ul>
<li><a href="https://github.com/apache/incubator-datasketches-java">incubator-datasketches-java</a></li>
<li><a href="https://github.com/apache/incubator-datasketches-cpp">incubator-datasketches-cpp</a></li>
<li><a href="https://github.com/apache/incubator-datasketches-memory">incubator-datasketches-memory</a></li>
<li><a href="https://github.com/apache/incubator-datasketches-hive">incubator-datasketches-hive</a></li>
<li><a href="https://github.com/apache/incubator-datasketches-pig">incubator-datasketches-pig</a></li>
<li><a href="https://github.com/apache/incubator-datasketches-postgresql">incubator-datasketches-postgresql</a></li>
</ul>
</div> <!-- End content -->
</div> <!-- End row -->
</div> <!-- End Container -->
<!-- Start _include/page_footer.html -->
<footer class="ds-footer">
<div class="container">
<div class="text-center">
<p>
<div>Copyright © 2024 <a href="https://www.apache.org">Apache Software Foundation</a>,
Licensed under the Apache License, Version 2.0. All Rights Reserved.
| <a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy Policy</a><br/>
Apache DataSketches, Apache, the Apache feather logo, and the Apache DataSketches project logos are trademarks of The Apache Software Foundation.<br/>
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
</div>
</p>
</div>
</div>
</footer>
<!-- End _include/page_footer.html -->
</body>
</html>
<!-- End _layouts/doc_page.html-->