blob: 47efb52c51a2f29e3bf38aa8f5ac5a7974eefba0 [file] [log] [blame]
<!DOCTYPE html>
<!-- Start _layouts/doc_page.html-->
<html lang="en">
<head>
<!-- Start _include/site_head.html -->
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="datasketches">
<title>DataSketches | </title>
<link rel="shortcut icon" href="/img/favicon.png">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.min.css">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css">
<link href='https://fonts.googleapis.com/css?family=Open+Sans+Condensed:300,700,300italic|Open+Sans:300italic,400italic,600italic,400,300,600'
rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/css/header.css">
<link rel="stylesheet" href="/css/footer.css">
<link rel="stylesheet" href="/css/syntax.css">
<link rel="stylesheet" href="/css/docs.css">
<script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML-full">
</script>
<script src="https://code.jquery.com/jquery.min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js"></script>
<!-- End _include/site_head.html -->
</head>
<body>
<!-- Start _include/nav_bar.html -->
<div class="navbar navbar-inverse navbar-static-top ds-nav">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="/" style="padding-top: 0px; padding-bottom: 0px;">
<span class="ds-small-h-logo"></span></a>
</div>
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav navbar-right">
<li>
<a href="/docs/Background/TheChallenge.html">
<span class="fa fa-info-circle"></span> DOCUMENTATION</a>
</li>
<li>
<a href="/docs/Community/Downloads.html">
<span class="fa fa-download"></span> DOWNLOAD</a>
</li>
<!--
<li>
<a href="/docs/Architecture/Components.html">
<span class="fa fa-github"></span> GITHUB</a>
</li>
-->
<li>
<a href="/docs/Community/Research.html">
<span class="fa fa-paper-plane"></span> RESEARCH</a>
</li>
<li>
<a href="/docs/Community/index.html" style="padding-top: 0; padding-bottom: 0;">
<img class="ds-small-man" src="/img/datasketches-ManWhite.svg"/>COMMUNITY</a>
</li>
<li>
<ul class="nav navbar-nav navbar-right ds-nav">
<li class="dropdown ds-nav" >
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false" style="padding-top: 0; padding-bottom: 0;"><img class="apache-logo" src="/img/feather.svg"/>Apache <span class="caret"></span></a>
<ul class="dropdown-menu ds-nav">
<li><a href="https://www.apache.org/" target="_blank">Foundation</a></li>
<li><a href="https://www.apache.org/events/current-event" target="_blank">Events</a></li>
<li><a href="https://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
<li><a href="https://www.apache.org/security/" target="_blank">Security</a></li>
<li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<!-- End _include/nav_bar.html -->
<!-- Start _include/javadocs.html -->
<div class="ds-header">
<div class="container">
<h4>API Snapshots:
<a href="https://apache.github.io/datasketches-java/4.1.0/">Java Core</a>,
<a href="https://apache.github.io/datasketches-cpp/master/">C++ Core</a>,
<a href="/api/memory/snapshot/apidocs/index.html">Memory</a>,
<a href="/api/pig/snapshot/apidocs/index.html">Pig</a>,
<a href="/api/hive/snapshot/apidocs/index.html">Hive</a>,
</h4>
</div>
</div>
<!-- End _include/javadocs.html -->
<div class="container">
<div class="row">
<!-- Start ToC Block -->
<div class="col-md-3">
<div class="searchbox" style="position:relative">
<gcse:searchbox-only></gcse:searchbox-only>
</div>
<!-- Start _includes/toc.html -->
<!-- Computer Generated File, Do Not Edit! -->
<link rel="stylesheet" href="/css/toc.css">
<div id="toc" class="nav toc hidden-print">
<p id="background">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_background">Background</a>
</p>
<div class="collapse" id="collapse_background">
<li><a href="/docs/Background/TheChallenge.html">•The Challenge</a></li>
<li><a href="/docs/Background/SketchOrigins.html">•Sketch Origins</a></li>
<li><a href="/docs/Background/SketchElements.html">•Sketch Elements</a></li>
<li><a href="/docs/Background/Presentations.html">•Presentations</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches_deck.pdf">•Overview Slide Deck</a></li>
</div>
<p id="architecture-and-design">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_architecture_and_design">Architecture And Design</a>
</p>
<div class="collapse" id="collapse_architecture_and_design">
<li><a href="/docs/Architecture/MajorSketchFamilies.html">•The Major Sketch Families</a></li>
<li><a href="/docs/Architecture/LargeScale.html">•Large Scale Computing</a></li>
<li><a href="/docs/Architecture/KeyFeatures.html">•Key Features</a></li>
<li><a href="/docs/Architecture/SketchFeaturesMatrix.html">•Sketch Features Matrix</a></li>
<li><a href="/docs/Architecture/Components.html">•Components</a></li>
<li><a href="/docs/Architecture/SketchesByComponent.html">•Sketches by Component</a></li>
<li><a href="/docs/Architecture/SketchCriteria.html">•Sketch Criteria</a></li>
<p id="memory-component">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_memory_component">Memory Component</a>
</p>
<div class="collapse" id="collapse_memory_component">
<li><a href="/docs/Memory/MemoryComponent.html">•Memory Componet</a></li>
<li><a href="/docs/Memory/MemoryPerformance.html">•Memory Component Performance</a></li>
</div>
<li><a href="/docs/Architecture/OrderSensitivity.html">•Notes on Order Sensitivity</a></li>
<li><a href="/docs/Architecture/Concurrency.html">•Notes on Concurrency</a></li>
</div>
<p id="sketch-families">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sketch_families">Sketch Families</a>
</p>
<div class="collapse" id="collapse_sketch_families">
<p id="distinct-counting">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_distinct_counting">Distinct Counting</a>
</p>
<div class="collapse" id="collapse_distinct_counting">
<li><a href="/docs/DistinctCountFeaturesMatrix.html">•Features Matrix</a></li>
<li><a href="/docs/DistinctCountMeritComparisons.html">•Figures-of-Merit Comparison</a></li>
<p id="cpc-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_sketches">CPC Sketches</a>
</p>
<div class="collapse" id="collapse_cpc_sketches">
<li><a href="/docs/CPC/CPC.html">•CPC Sketch</a></li>
<li><a href="/docs/CPC/CpcPerformance.html">•CPC Sketch Performance</a></li>
<p id="cpc-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_cpc_examples">CPC Examples</a>
</p>
<div class="collapse" id="collapse_cpc_examples">
<li><a href="/docs/CPC/CpcJavaExample.html">•CPC Sketch Java Example</a></li>
<li><a href="/docs/CPC/CpcCppExample.html">•CPC Sketch C++ Example</a></li>
<li><a href="/docs/CPC/CpcPigExample.html">•CPC Sketch Pig UDFs</a></li>
<li><a href="/docs/CPC/CpcHiveExample.html">•CPC Sketch Hive UDFs</a></li>
</div>
</div>
<p id="hyperloglog-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hyperloglog_sketches">HyperLogLog Sketches</a>
</p>
<div class="collapse" id="collapse_hyperloglog_sketches">
<li><a href="/docs/HLL/HLL.html">•HLL Sketch</a></li>
<li><a href="/docs/HLL/HllMap.html">•HLL Map Sketch</a></li>
<p id="hll-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_examples">HLL Examples</a>
</p>
<div class="collapse" id="collapse_hll_examples">
<li><a href="/docs/HLL/HllJavaExample.html">•HLL Sketch Java Example</a></li>
<li><a href="/docs/HLL/HllCppExample.html">•HLL Sketch C++ Example</a></li>
<li><a href="/docs/HLL/HllPigUDFs.html">•HLL Sketch Pig UDFs</a></li>
<li><a href="/docs/HLL/HllHiveUDFs.html">•HLL Sketch Hive UDFs</a></li>
</div>
<p id="hll-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_hll_studies">HLL Studies</a>
</p>
<div class="collapse" id="collapse_hll_studies">
<li><a href="/docs/HLL/HllPerformance.html">•HLL Sketch Performance</a></li>
<li><a href="/docs/HLL/Hll_vs_CS_Hllpp.html">•HLL vs Clearspring HLL++</a></li>
<li><a href="/docs/HLL/HllSketchVsDruidHyperLogLogCollector.html">•HLL Sketch vs Druid HyperLogLogCollector</a></li>
</div>
</div>
<p id="theta-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketches">Theta Sketches</a>
</p>
<div class="collapse" id="collapse_theta_sketches">
<li><a href="/docs/Theta/ThetaSketchFramework.html">•Theta Sketch Framework</a></li>
<p id="theta-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_examples">Theta Examples</a>
</p>
<div class="collapse" id="collapse_theta_examples">
<li><a href="/docs/Theta/ConcurrentThetaSketch.html">•Concurrent Theta Sketch</a></li>
<li><a href="/docs/Theta/ThetaJavaExample.html">•Theta Sketch Java Example</a></li>
<li><a href="/docs/Theta/ThetaSparkExample.html">•Theta Sketch Spark Example</a></li>
<li><a href="/docs/Theta/ThetaPigUDFs.html">•Theta Sketch Pig UDFs</a></li>
<li><a href="/docs/Theta/ThetaHiveUDFs.html">•Theta Sketch Hive UDFs</a></li>
</div>
<p id="kmv-tutorial">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_kmv_tutorial">KMV Tutorial</a>
</p>
<div class="collapse" id="collapse_kmv_tutorial">
<li><a href="/docs/Theta/InverseEstimate.html">•The Inverse Estimate</a></li>
<li><a href="/docs/Theta/KMVempty.html">•Empty Sketch</a></li>
<li><a href="/docs/Theta/KMVfirstEst.html">•First Estimator</a></li>
<li><a href="/docs/Theta/KMVbetterEst.html">•Better Estimator</a></li>
<li><a href="/docs/Theta/KMVrejection.html">•Rejection Rules</a></li>
<li><a href="/docs/Theta/KMVupdateVkth.html">•Update V(kth) Rule</a></li>
</div>
<p id="set-operations-and-p-sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_set_operations_and_p-sampling">Set Operations and P-sampling</a>
</p>
<div class="collapse" id="collapse_set_operations_and_p-sampling">
<li><a href="/docs/Theta/ThetaSketchSetOps.html">•Set Operations</a></li>
<li><a href="/docs/Theta/ThetaSetOpsCornerCases.html">•Model & Test Set Operations</a></li>
<li><a href="/docs/Theta/ThetaPSampling.html"><i>p</i>-Sampling</a></li>
</div>
<p id="accuracy">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_accuracy">Accuracy</a>
</p>
<div class="collapse" id="collapse_accuracy">
<li><a href="/docs/Theta/ThetaAccuracy.html">•Basic Accuracy</a></li>
<li><a href="/docs/Theta/ThetaAccuracyPlots.html">•Accuracy Plots</a></li>
<li><a href="/docs/Theta/ThetaErrorTable.html">•Relative Error Table</a></li>
<li><a href="/docs/Theta/ThetaSketchSetOpsAccuracy.html">•SetOp Accuracy</a></li>
<li><a href="/docs/Theta/AccuracyOfDifferentKUnions.html">•Unions With Different k</a></li>
</div>
<p id="size">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_size">Size</a>
</p>
<div class="collapse" id="collapse_size">
<li><a href="/docs/Theta/ThetaSize.html">•Theta Sketch Size</a></li>
</div>
<p id="speed">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_speed">Speed</a>
</p>
<div class="collapse" id="collapse_speed">
<li><a href="/docs/Theta/ThetaUpdateSpeed.html">•Update Speed</a></li>
<li><a href="/docs/Theta/ThetaMergeSpeed.html">•Merge Speed</a></li>
</div>
<p id="theta-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_theta_sketch_theory">Theta Sketch Theory</a>
</p>
<div class="collapse" id="collapse_theta_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchFramework.pdf">•Theta Sketch Framework (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/ThetaSketchEquations.pdf">•Theta Sketch Equations (PDF)</a></li>
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/DataSketches.pdf">•DataSketches (PDF)</a></li>
<li><a href="/docs/Theta/ThetaConfidenceIntervals.html">•Confidence Intervals Notes</a></li>
<li><a href="/docs/Theta/ThetaMergingAlgorithm.html">•Merging Algorithm Notes</a></li>
<li><a href="/docs/Theta/ThetaReferences.html">•Theta References</a></li>
</div>
</div>
<p id="tuple-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_sketches">Tuple Sketches</a>
</p>
<div class="collapse" id="collapse_tuple_sketches">
<li><a href="/docs/Tuple/TupleOverview.html">•Tuple Overview</a></li>
<p id="tuple-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_tuple_examples">Tuple Examples</a>
</p>
<div class="collapse" id="collapse_tuple_examples">
<li><a href="/docs/Tuple/TupleJavaExample.html">•Tuple Java Example</a></li>
<li><a href="/docs/Tuple/TupleEngagementExample.html">•Tuple Engagement Example</a></li>
<li><a href="/docs/Tuple/TuplePigUDFs.html">•Tuple Pig UDFs</a></li>
<li><a href="/docs/Tuple/TupleHiveUDFs.html">•Tuple Hive UDFs</a></li>
</div>
</div>
</div>
<p id="most-frequent">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent">Most Frequent</a>
</p>
<div class="collapse" id="collapse_most_frequent">
<li><a href="/docs/Frequency/FrequencySketchesOverview.html">•Frequency Sketches Overview</a></li>
<p id="frequent-item-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_item_sketches">Frequent Item Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_item_sketches">
<li><a href="/docs/Frequency/FrequentItemsOverview.html">•Frequent Items Overview</a></li>
<li><a href="/docs/Frequency/FrequentItemsErrorTable.html">•Frequent Items Error Table</a></li>
<li><a href="/docs/Frequency/FrequentItemsReferences.html">•Frequent Items References</a></li>
<li><a href="/docs/Frequency/FrequentItemsPerformance.html">•Frequent Items Performance</a></li>
<p id="most-frequent-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_most_frequent_examples">Most Frequent Examples</a>
</p>
<div class="collapse" id="collapse_most_frequent_examples">
<li><a href="/docs/Frequency/FrequentItemsJavaExample.html">•Frequent Items Java Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsCppExample.html">•Frequent Items C++ Example</a></li>
<li><a href="/docs/Frequency/FrequentItemsPigUDFs.html">•Frequent Items Pig UDFs</a></li>
<li><a href="/docs/Frequency/FrequentItemsHiveUDFs.html">•Frequent Items Hive UDFs</a></li>
</div>
</div>
<p id="frequent-distinct-sketches">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_frequent_distinct_sketches">Frequent Distinct Sketches</a>
</p>
<div class="collapse" id="collapse_frequent_distinct_sketches">
<li><a href="/docs/Frequency/FrequentDistinctTuplesSketch.html">•Frequent Distinct Tuples Sketch</a></li>
</div>
</div>
<p id="quantiles-and-histograms">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_and_histograms">Quantiles And Histograms</a>
</p>
<div class="collapse" id="collapse_quantiles_and_histograms">
<li><a href="/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">•Quantiles and Ranks Tutorial</a></li>
<li><a href="/docs/Quantiles/QuantilesOverview.html">•Quantiles Overview</a></li>
<li><a href="/docs/KLL/KLLSketch.html">•KLL Floats sketch</a></li>
<li><a href="/docs/KLL/KLLAccuracyAndSize.html">•KLL Sketch Accuracy and Size</a></li>
<li><a href="/docs/REQ/ReqSketch.html">•REQ Floats sketch</a></li>
<li><a href="/docs/Quantiles/OrigQuantilesSketch.html">•Original QuantilesSketch</a></li>
<p id="quantiles-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_examples">Quantiles Examples</a>
</p>
<div class="collapse" id="collapse_quantiles_examples">
<li><a href="/docs/Quantiles/QuantilesJavaExample.html">•Quantiles Sketch Java Example</a></li>
<li><a href="/docs/KLL/KLLCppExample.html">•KLL Quantiles Sketch C++ Example</a></li>
<li><a href="/docs/Quantiles/QuantilesPigUDFs.html">•Quantiles Sketch Pig UDFs</a></li>
<li><a href="/docs/Quantiles/QuantilesHiveUDFs.html">•Quantiles Sketch Hive UDFs</a></li>
</div>
<p id="quantiles-studies">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_studies">Quantiles Studies</a>
</p>
<div class="collapse" id="collapse_quantiles_studies">
<li><a href="/docs/QuantilesStudies/KllSketchVsTDigest.html">•KLL sketch vs t-digest</a></li>
<li><a href="/docs/QuantilesStudies/DruidApproxHistogramStudy.html">•Druid Approximate Histogram</a></li>
<li><a href="/docs/QuantilesStudies/MomentsSketchStudy.html">•Moments Sketch Study</a></li>
<li><a href="/docs/QuantilesStudies/QuantilesStreamAStudy.html">•Quantiles StreamA Study</a></li>
<li><a href="/docs/QuantilesStudies/ExactQuantiles.html">•Exact Quantiles for Studies</a></li>
</div>
<p id="quantiles-sketch-theory">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_quantiles_sketch_theory">Quantiles Sketch Theory</a>
</p>
<div class="collapse" id="collapse_quantiles_sketch_theory">
<li><a href="https://github.com/apache/datasketches-website/tree/master/docs/pdf/Quantiles_KLL.pdf">•Optimal Quantile Approximation in Streams</a></li>
<li><a href="/docs/Quantiles/QuantilesReferences.html">•Quantiles References</a></li>
</div>
</div>
<p id="sampling">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling">Sampling</a>
</p>
<div class="collapse" id="collapse_sampling">
<li><a href="/docs/Sampling/ReservoirSampling.html">•Reservoir Sampling</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPerformance.html">•Reservoir Sampling Performance</a></li>
<li><a href="/docs/Sampling/VarOptSampling.html">•VarOpt Sampling</a></li>
<p id="sampling-examples">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_sampling_examples">Sampling Examples</a>
</p>
<div class="collapse" id="collapse_sampling_examples">
<li><a href="/docs/Sampling/ReservoirSamplingJava.html">•Reservoir Sampling Java Example</a></li>
<li><a href="/docs/Sampling/ReservoirSamplingPigUDFs.html">•Reservoir Sampling Pig UDFs</a></li>
<li><a href="/docs/Sampling/VarOptSamplingJava.html">•VarOpt Sampling Java Example</a></li>
<li><a href="/docs/Sampling/VarOptPigUDFs.html">•VarOpt Sampling Pig UDFs</a></li>
</div>
</div>
</div>
<p id="system-integrations">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_system_integrations">System Integrations</a>
</p>
<div class="collapse" id="collapse_system_integrations">
<li><a href="/docs/SystemIntegrations/ApacheDruidIntegration.html">•Using Sketches in ApacheDruid</a></li>
<li><a href="/docs/SystemIntegrations/ApacheHiveIntegration.html">•Using Sketches in Apache Hive</a></li>
<li><a href="/docs/SystemIntegrations/ApachePigIntegration.html">•Using Sketches in Apache Pig</a></li>
<li><a href="/docs/SystemIntegrations/PostgreSQLIntegration.html">•Using Sketches in PostgreSQL</a></li>
</div>
<p id="community">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_community">Community</a>
</p>
<div class="collapse" id="collapse_community">
<li><a href="/docs/Community/index.html">•Community</a></li>
<li><a href="/docs/Community/Downloads.html">•Downloads</a></li>
<li><a href="/docs/Community/NewCommitterProcess.html">•Committer Process</a></li>
<li><a href="/docs/Community/ReleaseProcessForCppComponents.html">•Release Process For CPP Components</a></li>
<li><a href="/docs/Community/ReleaseProcessForJavaComponents.html">•Release Process For Java Components</a></li>
<li><a href="/docs/Community/Transitioning.html">•Transitioning from prior GitHub Site</a></li>
</div>
<p id="research">
<a data-toggle="collapse" class="menu collapsed" href="#collapse_research">Research</a>
</p>
<div class="collapse" id="collapse_research">
<li><a href="/docs/Community/Research.html">•Research</a></li>
</div>
</div>
<!-- End _includes/toc.html -->
<!-- Start _includes/tocScript.html -->
<script>
(function () {
var findLineItem = function (path) {
return document.querySelector(`#toc [href="${path}"]`);
};
function findNavItem(path) {
return document.querySelector(`.nav [href="${path}"]`);
}
var highlighLineItem = function (element) {
element.classList.add('highlight');
};
var checkHasClass = function (element, className) {
return element.className.split(' ').find(function (item) { return item === className || '' })
}
var findAllCollapseParents = function (element) {
var collapseMenus = [];
var elementPointer = element;
while (elementPointer !== document.body) {
if (checkHasClass(elementPointer, 'collapse')) {
collapseMenus.push(elementPointer);
}
elementPointer = elementPointer.parentElement
}
return collapseMenus
};
var openMenuItem = function (element) {
// $(element).collapse('show') would start a transition, adding `in` class instead.
element.classList.add('in');
};
var openAllFromList = function (elementList) {
elementList.forEach(openMenuItem);
};
var highlightAndOpenMenu = function () {
// Highlight & expand nav item in the TOC
var currentLineItem = findLineItem(document.location.pathname);
highlighLineItem(currentLineItem);
openAllFromList(findAllCollapseParents(currentLineItem));
// Highlight nav item in top navigation
highlighLineItem(findNavItem(document.location.pathname));
};
$(highlightAndOpenMenu);
}());
</script>
<!-- End _includes/tocScript.html -->
</div>
<!-- End ToC Block -->
<div class="col-md-9 doc-content">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<h1 id="release-process-for-java-components">Release Process For Java Components</h1>
<p><strong>NOTES:</strong></p>
<ul>
<li>This process covers major and minor releases only. Bug-fix releases, which increment the third digit, are performed on a A.B.X branch and not on master, but otherwise are similar.</li>
<li>Some of these operations can be performed either on the Command-Line or in your IDE, whatever you prefer.</li>
</ul>
<h2 id="preparation">Preparation</h2>
<ul>
<li>Confirm correctness for
<ul>
<li>LICENSE</li>
<li>NOTICE – check for copyright dates</li>
<li>README.md</li>
<li>.asf.yaml</li>
<li>.travis.yml (if used)</li>
<li>.gitattributes – used to exclude files from release zip, assumes .gitignore</li>
<li>.github/workflows</li>
<li>.gitignore – used to exclude files from origin</li>
<li>pom.xml / apache-rat-plugin – checks for license headers, assumes .gitignore</li>
<li>pom.xml</li>
</ul>
</li>
<li>From Command Line or IDE:
<ul>
<li>Run Unit tests</li>
<li>Run Code Coverage &gt; 90%</li>
<li>Run SpotBugs checks (is it properly configured?)</li>
<li>Run Checkstyle (is it properly configured?)</li>
<li>Confirm that all <strong>temporary</strong> branches are checked into master and/or deleted, both local and remote.</li>
<li>Confirm any new bug fixes have corresponding tests</li>
</ul>
</li>
<li>From Command Line at Component root:
<ul>
<li>To confirm <em>gpg-agent</em> is running type:
<ul>
<li><code class="highlighter-rouge">eval $(gpg-agent --daemon)</code>
<ul>
<li>if it is not running it will start it and you will see something like:</li>
<li>
<pre>9566 ?? 0:30.33 gpg-agent --daemon<br />20315 ttys000 0:00:00 grep gpg</pre>
</li>
<li>if it is already running you will see something like:</li>
<li><code class="highlighter-rouge">gpg-agent: a gpg-agent is already running - not starting a new one</code></li>
</ul>
</li>
</ul>
</li>
<li>Confirm GitHub repository is current and git status is clean:
<ul>
<li><code class="highlighter-rouge">git status</code> # should return:</li>
<li>“On branch master, your branch is up to date with ‘origin/master’, nothing to commit, working tree clean.”</li>
</ul>
</li>
<li>At major version releases, search for deprecated code and remove at <strong>Major Versions</strong> only.
<ul>
<li><code class="highlighter-rouge">find . -name "*.java" -type f -print | xargs grep -i -n -s -A0 "deprecated"</code></li>
<li><strong>Note:</strong> When first marking a segment of code deprecated, please add the current version number. This will make it easier to know when to remove the deprecated code.</li>
</ul>
</li>
<li>Check Maven Plugin, Dependency, Property Versions of the POM:
<ul>
<li><code class="highlighter-rouge">mvn versions:display-plugin-updates</code></li>
<li><code class="highlighter-rouge">mvn versions:display-dependency-updates</code></li>
<li><code class="highlighter-rouge">mvn versions:display-property-updates</code></li>
</ul>
</li>
<li>Maven Tests:
<ul>
<li><code class="highlighter-rouge">mvn apache-rat:check</code></li>
<li><code class="highlighter-rouge">mvn clean test</code></li>
<li><code class="highlighter-rouge">mvn clean test -P strict</code></li>
<li><code class="highlighter-rouge">mvn clean javadoc:javadoc</code></li>
<li><code class="highlighter-rouge">mvn clean install -DskipTests=true</code></li>
<li>Check that the /target/ directory has 5 jars: (may need to refresh)
<ul>
<li>datasketches-&lt;component&gt;-SNAPSHOT-javadoc.jar</li>
<li>datasketches-&lt;component&gt;-SNAPSHOT-sources.jar</li>
<li>datasketches-&lt;component&gt;-SNAPSHOT-test-sources.jar</li>
<li>datasketches-&lt;component&gt;-SNAPSHOT-tests.jar</li>
<li>datasketches-&lt;component&gt;-SNAPSHOT.jar</li>
</ul>
</li>
<li>Check your local Maven repository
<ul>
<li><em>~/.m2/repository/org/apache/datasketches/datasketches-&lt;component&gt;/A.B.0-SNAPSHOT/</em></li>
<li>It should have 5 new jars and a .pom file.</li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<h2 id="create-permanent-release-branch--pom-version-preparation">Create Permanent Release Branch &amp; POM Version Preparation</h2>
<ul>
<li>Assume current master POM version = A.B.0-SNAPSHOT</li>
<li>From IDE or Command Line:
<ul>
<li>Switch from Master to new <strong>Permanent Branch</strong>: “A.B.X”</li>
<li>Edit pom.xml version to A.B.0 (remove -SNAPSHOT, do not change A or B)</li>
<li>Commit the change. <strong>DO NOT PUSH!</strong></li>
<li>Create Annotated TAG: A.B.0-RC1 (or RCn)</li>
<li>Write down the Git hash : example: 40c6f4f</li>
<li>Now Push Branch “A.B.X” with edited pom.xml to origin</li>
<li><strong>DO NOT MERGE THIS PERMANENT BRANCH INTO MASTER</strong></li>
</ul>
</li>
<li>From IDE or Command-line:
<ul>
<li>Do explicit push of tags on branch “A.B.X” to origin:
<ul>
<li><code class="highlighter-rouge">git push origin --tags</code></li>
</ul>
</li>
</ul>
</li>
<li>From a web browser at origin web site: github.com/apache/datasketches-&lt;component&gt;
<ul>
<li>Select the A.B.X branch</li>
<li>Confirm that the tag: A.B.0-RC1 exists and that the tag is on the latest commit and with the correct Git hash.</li>
<li><strong>DO NOT CREATE PR OR MERGE THIS PERMANENT BRANCH INTO MASTER</strong></li>
</ul>
</li>
<li>From IDE or Command Line:
<ul>
<li>Confirm that the tag A.B.0-RC1 and the branch A.B.X, and HEAD coincide with the correct Git hash.</li>
<li>Confirm that there are no unstaged or staged changes.</li>
<li>Return to master branch</li>
<li>Edit master pom.xml to A’.B’.0-SNAPSHOT where A’ or B’ will be incremented by 1. (Bug fix releases will change the 3rd digit)</li>
<li>Commit and Push this change to origin/master with the comment “Release Process: Change pom version to A’.B’.0-SNAPSHOT.”</li>
<li>Create a tag A’.B’.0-SNAPSHOT on master at the HEAD.</li>
<li>Push the tag to origin: <code class="highlighter-rouge">git push origin --tags</code></li>
<li>Return to release branch A.B.X</li>
<li>You may minimize your IDE, pointing at the release branch.</li>
</ul>
</li>
</ul>
<h2 id="create-andor-checkout-local-distdev-directories-on-your-system">Create and/or Checkout Local <em>dist/dev</em> directories on your system</h2>
<ul>
<li>If you have not already, on your system create the two directory structures that mirror the dist.apache.org/repos/ directories:
<ul>
<li><code class="highlighter-rouge">mkdir dist/dev/datasketches/</code></li>
<li><code class="highlighter-rouge">mkdir dist/release/datasketches/</code></li>
</ul>
</li>
<li>Checkout both “dev” and “release” directories
<ul>
<li>Open a terminal in the dist/dev/datasketches directory and do a checkout:
<ul>
<li><code class="highlighter-rouge">svn co https://dist.apache.org/repos/dist/dev/datasketches/ .</code> #Note the DOT</li>
<li><code class="highlighter-rouge">svn status</code> # make sure it is clean</li>
</ul>
</li>
<li>Open a terminal in the dist/release/datasketches directory and do a checkout:
<ul>
<li><code class="highlighter-rouge">svn co https://dist.apache.org/repos/dist/release/datasketches/ .</code> #Note the DOT</li>
<li><code class="highlighter-rouge">svn status</code> # make sure it is clean</li>
</ul>
</li>
</ul>
</li>
</ul>
<h2 id="create-the-candidate-apache-release-distribution-on-distdev">Create the Candidate Apache Release Distribution on <em>dist/dev</em></h2>
<h3 id="create-primary-zip-files--signatures">Create primary zip files &amp; signatures</h3>
<ul>
<li>You will need the following arguments:
<ul>
<li>Absolute path of target project.basedir on your system</li>
<li>Project.artifactId : datasketches-&lt;component&gt; where component is e.g., java, pig, hive,…</li>
<li>GitHub Tag: A.B.0-RC1 (or RCn)</li>
<li>Have your GPG passphrase handy – you have only a few seconds to enter it!</li>
</ul>
</li>
<li>Start a new terminal in the above dist/dev/datasketches/scripts directory on your system:
<ul>
<li>To confirm <em>gpg-agent</em> is running type:
<ul>
<li><code class="highlighter-rouge">eval $(gpg-agent --daemon)</code>
<ul>
<li>if it is not running it will start it and you will see something like:</li>
<li>
<pre>9566 ?? 0:30.33 gpg-agent --daemon<br />20315 ttys000 0:00:00 grep gpg</pre>
</li>
<li>if it is already running you will see something like:</li>
<li><code class="highlighter-rouge">gpg-agent: a gpg-agent is already running - not starting a new one</code></li>
</ul>
</li>
</ul>
</li>
<li>Run something like:
<ul>
<li><code class="highlighter-rouge">./bashDeployToDist.sh /Users/\&lt;name\&gt;/dev/git/Apache/datasketches-\&lt;component\&gt; datasketches-\&lt;component\&gt; A.B.0-RC1</code></li>
<li>Follow the instructions.</li>
<li>NOTE: if you get the error “gpg: signing failed: No pinentry”:
<ul>
<li>open .gnupg/gpg-agent.conf</li>
<li>change to: pinentry-program <em>/usr/local/bin/pinentry-tty</em></li>
<li>reload the gpg agent in the terminal: <code class="highlighter-rouge">gpg-connect-agent reloadagent /bye</code></li>
<li>restart the <em>./bashDeployToDist</em> script</li>
</ul>
</li>
<li>Close the terminal</li>
</ul>
</li>
</ul>
</li>
<li>Check this web URL ~ <em>https://dist.apache.org/repos/dist/dev/datasketches/&lt;component&gt;/A.B.0-RC1/</em>
<ul>
<li>There should be 3 files: *-src.zip, *-src.zip.asc, *-src.zip.sha512</li>
<li>Copy the URL for later.</li>
</ul>
</li>
</ul>
<h3 id="java-push-jars-to-nexus-maven-central-staging">Java: Push Jars to Nexus (Maven Central) Staging</h3>
<ul>
<li>Return to original terminal at the project.basedir, still in the A.B.X branch.</li>
<li>If starting new terminal make sure GPG is running:
<ul>
<li>To confirm <em>gpg-agent</em> is running type:
<ul>
<li><code class="highlighter-rouge">eval $(gpg-agent --daemon)</code>
<ul>
<li>if it is not running it will start it and you will see something like:</li>
<li>
<pre>9566 ?? 0:30.33 gpg-agent --daemon<br />20315 ttys000 0:00:00 grep gpg</pre>
</li>
<li>if it is already running you will see something like:</li>
<li><code class="highlighter-rouge">gpg-agent: a gpg-agent is already running - not starting a new one</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li><code class="highlighter-rouge">git status</code> # make sure you are still on the release branch: A.B.X</li>
<li>TRIAL-RUN:
<ul>
<li><strong>Have your GPG passphrase handy – you have only a few seconds to enter it!</strong></li>
<li><code class="highlighter-rouge">mvn clean install -Pnexus-jars -DskipTests=true</code>
<ul>
<li>Check target/ that jars &amp; pom have .asc signatures</li>
</ul>
</li>
</ul>
</li>
<li>DEPLOY
<ul>
<li><strong>Have your GPG passphrase handy – you have only a few seconds to enter it, but it may be automatic!</strong></li>
<li><code class="highlighter-rouge">mvn clean deploy -Pnexus-jars -DskipTests=true</code>
<ul>
<li>Login to <a href="https://repository.apache.org/">repository.apache.org</a> / Staging Repositories for orgapachedatasketches-XXXX</li>
<li>Click Content and search to the end. Each jar &amp; pom should have .asc, .md5, .sha1 signatures</li>
<li>[CLOSE] the Staging Repository with a comment: “&lt;component&gt; A.B.0”</li>
<li>Confirm its existance under Repositories/Staging web-site URL (in the summary window)</li>
<li>Grab its URL while there. You will need it for the Vote Letter.</li>
<li>Check your local Maven repository
<ul>
<li><em>~/.m2/repository/org/apache/datasketches/datasketches-&lt;component&gt;/A.B.0/</em></li>
<li>It should have 5 new jars and a .pom file each with .asc, .md5, and .sha1 signatures</li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<h3 id="create-copy-of-external-artifact-distributions">Create Copy of External Artifact Distributions</h3>
<h4 id="java">JAVA</h4>
<ul>
<li>Place copies of the artifact jars deployed to Nexus under a “maven” directory. For example see <a href="https://dist.apache.org/repos/dist/dev/datasketches/memory/1.3.0-RC1/">https://dist.apache.org/repos/dist/dev/datasketches/memory/1.3.0-RC1/</a></li>
<li>Note that the <code class="highlighter-rouge">jar</code> files with their <code class="highlighter-rouge">asc</code>, <code class="highlighter-rouge">md5</code> and <code class="highlighter-rouge">sha1</code> signature are all together in the .md2 archive</li>
<li>Add a <code class="highlighter-rouge">maven</code> directory under the <code class="highlighter-rouge">dist/dev/datasketches/\&lt;component\&gt;/A.B.0/</code></li>
<li>Bulk copy the <code class="highlighter-rouge">jar, asc, md5</code> and <code class="highlighter-rouge">sha1</code> files into the <code class="highlighter-rouge">maven</code> directory.</li>
<li>Do: <code class="highlighter-rouge">svn status</code> # check to see if it is ready to add</li>
<li>Do: <code class="highlighter-rouge">svn add . --force</code></li>
<li>Do: <code class="highlighter-rouge">svn ci -m "add nexus jars to dist/dev/datasketches"</code></li>
</ul>
<h4 id="non-java">Non-Java</h4>
<ul>
<li>For external artifacts such as Python or Docker the subdirectory name should be relevant to the type.</li>
<li>These must be signed with GPG (.asc) and SHA512 (.sha512)</li>
</ul>
<h2 id="prepare--send-vote-letter-to-dev">Prepare &amp; Send [VOTE] Letter to dev@</h2>
<ul>
<li>See VoteTemplates directory for a recent example</li>
<li>If vote is not successful, fix the problem and repeat above steps.</li>
<li>After a successful vote return to <strong>this point</strong> and continue …</li>
</ul>
<h2 id="prepare--send-vote-result-letter-to-dev">Prepare &amp; Send [VOTE-RESULT] Letter to dev@</h2>
<ul>
<li>See VoteTemplates directory for a recent example</li>
<li>Declare that the vote is closed.</li>
<li>Summarize vote results</li>
</ul>
<h2 id="finalize-the-release">Finalize the Release</h2>
<h3 id="copy-files-from-distdev-to-distrelease">Copy files from <em>dist/dev</em> to <em>dist/release</em></h3>
<ul>
<li>In local <em>dist/<strong>dev</strong>/datasketches/</em>
<ul>
<li>Open Terminal #1
<ul>
<li>Confirm you are in the <code class="highlighter-rouge">/dev/</code> directory: <code class="highlighter-rouge">pwd</code></li>
<li>Perform SVN Checkout:
<ul>
<li><code class="highlighter-rouge">svn co https://dist.apache.org/repos/dist/dev/datasketches/ .</code> #note dot at end</li>
<li><code class="highlighter-rouge">svn status</code> #make sure checkout is clean</li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li>In local <em>dist/<strong>release</strong>/datasketches/</em>
<ul>
<li>Open Terminal #2
<ul>
<li>Confirm you are in the <code class="highlighter-rouge">/release/</code> directory: <code class="highlighter-rouge">pwd</code></li>
<li>Perform SVN Checkout:
<ul>
<li><code class="highlighter-rouge">svn co https://dist.apache.org/repos/dist/release/datasketches/ .</code> #note dot at end</li>
<li><code class="highlighter-rouge">svn status</code> #make sure checkout is clean</li>
</ul>
</li>
<li>Create new version directory under appropriate component directory:
<ul>
<li><code class="highlighter-rouge">mkdir -p \&lt;component\&gt;/A.B.0</code></li>
</ul>
</li>
</ul>
</li>
<li>Using local file system copy files
<ul>
<li>From … /dist/dev/datasketches/&lt;component&gt;/version-RCnn/*</li>
<li>To … /dist/release/datasketches/&lt;component&gt;/version (no RCnn)/*</li>
<li>Make sure to move External Artifact Distributions <em>dist/dev</em> to <em>dist/release</em></li>
</ul>
</li>
<li>Using Terminal #2 at … /dist/release/datasketches directory:
<ul>
<li><code class="highlighter-rouge">svn add . --force</code></li>
<li><code class="highlighter-rouge">svn ci -m "Release A.B.0"</code></li>
<li>Remove the prior release…</li>
<li><code class="highlighter-rouge">svn remove \&lt;component\&gt;/X.Y.0</code></li>
<li><code class="highlighter-rouge">svn ci -m "Remove Prior release"</code></li>
<li><code class="highlighter-rouge">svn status</code> # should be empty</li>
</ul>
</li>
<li>Using local file system
<ul>
<li>Delete the prior X.Y.0 directory if necessary.</li>
</ul>
</li>
</ul>
</li>
</ul>
<h3 id="java-release-jars-on-nexus-staging">Java: Release Jars on Nexus Staging</h3>
<ul>
<li>On Nexus <a href="https://repository.apache.org/">repository.apache.org</a> click on Staging Repositories</li>
<li>Select “orgapachedatasketches-XXXX” (If more than one make sure you select the right one!)</li>
<li>At the top of the window, select “Release”</li>
<li>Confirm that the attributes have moved to the “Releases” repository under “Repositories”
<ul>
<li>Browse to <em>Releases/org/apache/datasketches/…</em></li>
</ul>
</li>
</ul>
<h3 id="java-drop-any-previous-release-candidates-that-were-not-used">Java: Drop any previous Release Candidates that were not used.</h3>
<ul>
<li>On Nexus <a href="https://repository.apache.org/">repository.apache.org</a> click on Staging Repositories</li>
<li>Select “orgapachedatasketches-XXXX” (If more than one make sure you select the right one!)</li>
<li>At the top of the window, select “Drop”</li>
</ul>
<h3 id="if-necessary-update-branch-master-from-branch-abx">If necessary, update branch <em>master</em> from branch <em>A.B.X</em></h3>
<p>If you have gone through more than one Release Candidate, you may have changes that need to be
reflected in the master. Use the <strong>git cherry-pick</strong> command for this.</p>
<h2 id="finalize-release-documentation">Finalize Release Documentation</h2>
<h3 id="update-apache-reporter">Update Apache Reporter</h3>
<ul>
<li>Because of the commit to the <code class="highlighter-rouge">dist/release</code> branch, you should get an automated email requesting you to update the Apache DataBase about the releaase. The email should point you to the <a href="https://reporter.apache.org/addrelease.html?datasketches">Apache Committee Report Helper</a>. You can choose to go there directly without waiting for the notice, there is only one box to fillout.</li>
<li>Update the full name of the component release. For example: <code class="highlighter-rouge">Apache datasketches-memory-1.3.0</code></li>
</ul>
<h3 id="create--document-release-tag-on-github">Create &amp; Document Release Tag on GitHub</h3>
<ul>
<li>Open your IDE and switch to the recently created Release Branch A.B.X</li>
<li>Find the recently created A.B.0-RCn tag in that branch</li>
<li>At that same GitHub ID hash, create a new tag A.B.0 (without the RCn).</li>
<li>From the Command Line: Push the new tag to origin:
<ul>
<li><code class="highlighter-rouge">git push origin --tags</code></li>
</ul>
</li>
<li>On the GitHub component site document the release</li>
</ul>
<h3 id="update-website-downloadsmd-latest-source-zip-files-table">Update Website Downloads.md “Latest Source Zip Files” Table</h3>
<ul>
<li>This script assumes that the remote <em>…/dist/release/datasketches/…</em> directories are up-to-date with no old releases.</li>
<li>Start a new terminal in the <em>../dist/dev/datasketches/scripts</em> directory on your system:</li>
<li>Make sure your local website directory is pointing to master and up-to-date.</li>
<li>Run the following with the argument specifying the location of your local website directory:
<ul>
<li><code class="highlighter-rouge">./createDownloadsInclude.sh /Users/\&lt;name\&gt;/ ... /datasketches-website</code></li>
</ul>
</li>
<li>When this is done, be sure to commit the changes to the website.</li>
</ul>
<h3 id="update-javadocs-or-equivalent-on-website">Update Javadocs (or Equivalent) on Website</h3>
<h3 id="update-website-documentation-if-new-functionality">Update Website Documentation (if new functionality)</h3>
<h3 id="prepare-announce-letter-to-dev">Prepare Announce Letter to dev@</h3>
<ul>
<li>ASF requests that you wait 24 hours to publish Announce letter to allow the propagation to mirrors.</li>
<li>Use recent template</li>
<li>Summarize vote results</li>
</ul>
<h2 id="update-these-instructions">Update These Instructions</h2>
<ul>
<li>If you have updated this file or any of the scripts, please update this file on the <a href="https://datasketches.apache.org/docs/Community/ReleaseProcessForJavaComponents.html">website</a> and dist/dev/datasketches for the scripts.</li>
</ul>
</div> <!-- End content -->
</div> <!-- End row -->
</div> <!-- End Container -->
<!-- Start _include/page_footer.html -->
<footer class="ds-footer">
<div class="container">
<div class="text-center">
<p>
<div>Copyright © 2020 <a href="https://www.apache.org">Apache Software Foundation</a>,
Licensed under the Apache License, Version 2.0. All Rights Reserved.<br/>
Apache DataSketches, Apache, the Apache feather logo, and the Apache DataSketches project logos are trademarks of The Apache Software Foundation.<br/>
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
</div>
</p>
</div>
</div>
</footer>
<!-- End _include/page_footer.html -->
</body>
</html>
<!-- End _layouts/doc_page.html-->