|  | <!DOCTYPE html> | 
|  | <html lang="en"> | 
|  | <head> | 
|  | <meta charset="UTF-8" /> | 
|  | <meta name="viewport" content="width=device-width, initial-scale=1.0"> | 
|  | <meta name="description" content="Apache Druid"> | 
|  | <meta name="keywords" content="druid,kafka,database,analytics,streaming,real-time,real time,apache,open source"> | 
|  | <meta name="author" content="Apache Software Foundation"> | 
|  |  | 
|  | <title>Druid | DataSketches HLL Sketch module</title> | 
|  |  | 
|  | <link rel="alternate" type="application/atom+xml" href="/feed"> | 
|  | <link rel="shortcut icon" href="/img/favicon.png"> | 
|  |  | 
|  | <link rel="stylesheet" href="/assets/css/font-awesome-5.css"> | 
|  |  | 
|  | <link href='//fonts.googleapis.com/css?family=Open+Sans+Condensed:300,700,300italic|Open+Sans:300italic,400italic,600italic,400,300,600,700' rel='stylesheet' type='text/css'> | 
|  |  | 
|  | <link rel="stylesheet" href="/css/bootstrap-pure.css?v=1.1"> | 
|  | <link rel="stylesheet" href="/css/base.css?v=1.1"> | 
|  | <link rel="stylesheet" href="/css/header.css?v=1.1"> | 
|  | <link rel="stylesheet" href="/css/footer.css?v=1.1"> | 
|  | <link rel="stylesheet" href="/css/syntax.css?v=1.1"> | 
|  | <link rel="stylesheet" href="/css/docs.css?v=1.1"> | 
|  |  | 
|  | <script> | 
|  | (function() { | 
|  | var cx = '000162378814775985090:molvbm0vggm'; | 
|  | var gcse = document.createElement('script'); | 
|  | gcse.type = 'text/javascript'; | 
|  | gcse.async = true; | 
|  | gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') + | 
|  | '//cse.google.com/cse.js?cx=' + cx; | 
|  | var s = document.getElementsByTagName('script')[0]; | 
|  | s.parentNode.insertBefore(gcse, s); | 
|  | })(); | 
|  | </script> | 
|  |  | 
|  |  | 
|  | </head> | 
|  |  | 
|  | <body> | 
|  | <!-- Start page_header include --> | 
|  | <script src="//ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script> | 
|  |  | 
|  | <div class="top-navigator"> | 
|  | <div class="container"> | 
|  | <div class="left-cont"> | 
|  | <a class="logo" href="/"><span class="druid-logo"></span></a> | 
|  | </div> | 
|  | <div class="right-cont"> | 
|  | <ul class="links"> | 
|  | <li class=""><a href="/technology">Technology</a></li> | 
|  | <li class=""><a href="/use-cases">Use Cases</a></li> | 
|  | <li class=""><a href="/druid-powered">Powered By</a></li> | 
|  | <li class=""><a href="/docs/latest/design/">Docs</a></li> | 
|  | <li class=""><a href="/community/">Community</a></li> | 
|  | <li class="header-dropdown"> | 
|  | <a>Apache</a> | 
|  | <div class="header-dropdown-menu"> | 
|  | <a href="https://www.apache.org/" target="_blank">Foundation</a> | 
|  | <a href="https://www.apache.org/events/current-event" target="_blank">Events</a> | 
|  | <a href="https://www.apache.org/licenses/" target="_blank">License</a> | 
|  | <a href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> | 
|  | <a href="https://www.apache.org/security/" target="_blank">Security</a> | 
|  | <a href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Sponsorship</a> | 
|  | </div> | 
|  | </li> | 
|  | <li class=" button-link"><a href="/downloads.html">Download</a></li> | 
|  | </ul> | 
|  | </div> | 
|  | </div> | 
|  | <div class="action-button menu-icon"> | 
|  | <span class="fa fa-bars"></span> MENU | 
|  | </div> | 
|  | <div class="action-button menu-icon-close"> | 
|  | <span class="fa fa-times"></span> MENU | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <script type="text/javascript"> | 
|  | var $menu = $('.right-cont'); | 
|  | var $menuIcon = $('.menu-icon'); | 
|  | var $menuIconClose = $('.menu-icon-close'); | 
|  |  | 
|  | function showMenu() { | 
|  | $menu.fadeIn(100); | 
|  | $menuIcon.fadeOut(100); | 
|  | $menuIconClose.fadeIn(100); | 
|  | } | 
|  |  | 
|  | $menuIcon.click(showMenu); | 
|  |  | 
|  | function hideMenu() { | 
|  | $menu.fadeOut(100); | 
|  | $menuIconClose.fadeOut(100); | 
|  | $menuIcon.fadeIn(100); | 
|  | } | 
|  |  | 
|  | $menuIconClose.click(hideMenu); | 
|  |  | 
|  | $(window).resize(function() { | 
|  | if ($(window).width() >= 840) { | 
|  | $menu.fadeIn(100); | 
|  | $menuIcon.fadeOut(100); | 
|  | $menuIconClose.fadeOut(100); | 
|  | } | 
|  | else { | 
|  | $menu.fadeOut(100); | 
|  | $menuIcon.fadeIn(100); | 
|  | $menuIconClose.fadeOut(100); | 
|  | } | 
|  | }); | 
|  | </script> | 
|  |  | 
|  | <!-- Stop page_header include --> | 
|  |  | 
|  |  | 
|  | <div class="container doc-container"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <p> Looking for the <a href="/docs/26.0.0/">latest stable documentation</a>?</p> | 
|  |  | 
|  |  | 
|  | <div class="row"> | 
|  | <div class="col-md-9 doc-content"> | 
|  | <p> | 
|  | <a class="btn btn-default btn-xs visible-xs-inline-block visible-sm-inline-block" href="#toc">Table of Contents</a> | 
|  | </p> | 
|  | <!-- | 
|  | ~ Licensed to the Apache Software Foundation (ASF) under one | 
|  | ~ or more contributor license agreements.  See the NOTICE file | 
|  | ~ distributed with this work for additional information | 
|  | ~ regarding copyright ownership.  The ASF licenses this file | 
|  | ~ to you under the Apache License, Version 2.0 (the | 
|  | ~ "License"); you may not use this file except in compliance | 
|  | ~ with the License.  You may obtain a copy of the License at | 
|  | ~ | 
|  | ~   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | ~ | 
|  | ~ Unless required by applicable law or agreed to in writing, | 
|  | ~ software distributed under the License is distributed on an | 
|  | ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | ~ KIND, either express or implied.  See the License for the | 
|  | ~ specific language governing permissions and limitations | 
|  | ~ under the License. | 
|  | --> | 
|  |  | 
|  | <h1 id="datasketches-hll-sketch-module">DataSketches HLL Sketch module</h1> | 
|  |  | 
|  | <p>This module provides Apache Druid (incubating) aggregators for distinct counting based on HLL sketch from <a href="http://datasketches.github.io/">datasketches</a> library. At ingestion time, this aggregator creates the HLL sketch objects to be stored in Druid segments. At query time, sketches are read and merged together. In the end, by default, you receive the estimate of the number of distinct values presented to the sketch. Also, you can use post aggregator to produce a union of sketch columns in the same row. | 
|  | You can use the HLL sketch aggregator on columns of any identifiers. It will return estimated cardinality of the column.</p> | 
|  |  | 
|  | <p>To use this aggregator, make sure you <a href="../../operations/including-extensions.html">include</a> the extension in your config file:</p> | 
|  | <div class="highlight"><pre><code class="language-text" data-lang="text"><span></span>druid.extensions.loadList=["druid-datasketches"] | 
|  | </code></pre></div> | 
|  | <h3 id="aggregators">Aggregators</h3> | 
|  | <div class="highlight"><pre><code class="language-text" data-lang="text"><span></span>{ | 
|  | "type" : "HLLSketchBuild", | 
|  | "name" : <output name>, | 
|  | "fieldName" : <metric name>, | 
|  | "lgK" : <size and accuracy parameter>, | 
|  | "tgtHllType" : <target HLL type> | 
|  | } | 
|  | </code></pre></div><div class="highlight"><pre><code class="language-text" data-lang="text"><span></span>{ | 
|  | "type" : "HLLSketchMerge", | 
|  | "name" : <output name>, | 
|  | "fieldName" : <metric name>, | 
|  | "lgK" : <size and accuracy parameter>, | 
|  | "tgtHllType" : <target HLL type> | 
|  | } | 
|  | </code></pre></div> | 
|  | <table><thead> | 
|  | <tr> | 
|  | <th>property</th> | 
|  | <th>description</th> | 
|  | <th>required?</th> | 
|  | </tr> | 
|  | </thead><tbody> | 
|  | <tr> | 
|  | <td>type</td> | 
|  | <td>This String should be "HLLSketchBuild" or "HLLSketchMerge"</td> | 
|  | <td>yes</td> | 
|  | </tr> | 
|  | <tr> | 
|  | <td>name</td> | 
|  | <td>A String for the output (result) name of the calculation.</td> | 
|  | <td>yes</td> | 
|  | </tr> | 
|  | <tr> | 
|  | <td>fieldName</td> | 
|  | <td>A String for the name of the input field.</td> | 
|  | <td>yes</td> | 
|  | </tr> | 
|  | <tr> | 
|  | <td>lgK</td> | 
|  | <td>log2 of K that is the number of buckets in the sketch, parameter that controls the size and the accuracy. Must be a power of 2 from 4 to 21 inclusively.</td> | 
|  | <td>no, defaults to 12</td> | 
|  | </tr> | 
|  | <tr> | 
|  | <td>tgtHllType</td> | 
|  | <td>The type of the target HLL sketch. Must be "HLL_4", "HLL_6" or "HLL_8"</td> | 
|  | <td>no, defaults to "HLL_4"</td> | 
|  | </tr> | 
|  | </tbody></table> | 
|  |  | 
|  | <h3 id="post-aggregators">Post Aggregators</h3> | 
|  |  | 
|  | <h4 id="estimate-with-bounds">Estimate with bounds</h4> | 
|  | <div class="highlight"><pre><code class="language-text" data-lang="text"><span></span>{ | 
|  | "type"  : "HLLSketchEstimateWithBounds", | 
|  | "name": <output name>, | 
|  | "field"  : <post aggregator that returns an HLL Sketch>, | 
|  | "numStdDev" : <number of standard deviations: 1 (default), 2 or 3> | 
|  | } | 
|  | </code></pre></div> | 
|  | <h4 id="union">Union</h4> | 
|  | <div class="highlight"><pre><code class="language-text" data-lang="text"><span></span>{ | 
|  | "type"  : "HLLSketchUnion", | 
|  | "name": <output name>, | 
|  | "fields"  : <array of post aggregators that return HLL sketches>, | 
|  | "lgK": <log2 of K for the target sketch>, | 
|  | "tgtHllType" : <target HLL type> | 
|  | } | 
|  | </code></pre></div> | 
|  | <h4 id="sketch-to-string">Sketch to string</h4> | 
|  |  | 
|  | <p>Human-readable sketch summary for debugging</p> | 
|  | <div class="highlight"><pre><code class="language-text" data-lang="text"><span></span>{ | 
|  | "type"  : "HLLSketchToString", | 
|  | "name": <output name>, | 
|  | "field"  : <post aggregator that returns an HLL Sketch> | 
|  | } | 
|  | </code></pre></div> | 
|  | </div> | 
|  | <div class="col-md-3"> | 
|  | <div class="searchbox"> | 
|  | <gcse:searchbox-only></gcse:searchbox-only> | 
|  | </div> | 
|  | <div id="toc" class="nav toc hidden-print"> | 
|  | </div> | 
|  | </div> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <!-- Start page_footer include --> | 
|  | <footer class="druid-footer"> | 
|  | <div class="container"> | 
|  | <div class="text-center"> | 
|  | <p> | 
|  | <a href="/technology">Technology</a> ·  | 
|  | <a href="/use-cases">Use Cases</a> ·  | 
|  | <a href="/druid-powered">Powered by Druid</a> ·  | 
|  | <a href="/docs/latest/">Docs</a> ·  | 
|  | <a href="/community/">Community</a> ·  | 
|  | <a href="/downloads.html">Download</a> ·  | 
|  | <a href="/faq">FAQ</a> | 
|  | </p> | 
|  | </div> | 
|  | <div class="text-center"> | 
|  | <a title="Join the user group" href="https://groups.google.com/forum/#!forum/druid-user" target="_blank"><span class="fa fa-comments"></span></a> ·  | 
|  | <a title="Follow Druid" href="https://twitter.com/druidio" target="_blank"><span class="fab fa-twitter"></span></a> ·  | 
|  | <a title="GitHub" href="https://github.com/apache/druid" target="_blank"><span class="fab fa-github"></span></a> | 
|  | </div> | 
|  | <div class="text-center license"> | 
|  | Copyright © 2020 <a href="https://www.apache.org/" target="_blank">Apache Software Foundation</a>.<br> | 
|  | Except where otherwise noted, licensed under <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0</a>.<br> | 
|  | Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. | 
|  | </div> | 
|  | </div> | 
|  | </footer> | 
|  |  | 
|  | <script async src="https://www.googletagmanager.com/gtag/js?id=UA-131010415-1"></script> | 
|  | <script> | 
|  | window.dataLayer = window.dataLayer || []; | 
|  | function gtag(){dataLayer.push(arguments);} | 
|  | gtag('js', new Date()); | 
|  | gtag('config', 'UA-131010415-1'); | 
|  | </script> | 
|  | <script> | 
|  | function trackDownload(type, url) { | 
|  | ga('send', 'event', 'download', type, url); | 
|  | } | 
|  | </script> | 
|  | <script src="//code.jquery.com/jquery.min.js"></script> | 
|  | <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js"></script> | 
|  | <script src="/assets/js/druid.js"></script> | 
|  | <!-- stop page_footer include --> | 
|  |  | 
|  |  | 
|  | <script> | 
|  | $(function() { | 
|  | $(".toc").load("/docs/0.14.0-incubating/toc.html"); | 
|  |  | 
|  | // There is no way to tell when .gsc-input will be async loaded into the page so just try to set a placeholder until it works | 
|  | var tries = 0; | 
|  | var timer = setInterval(function() { | 
|  | tries++; | 
|  | if (tries > 300) clearInterval(timer); | 
|  | var searchInput = $('input.gsc-input'); | 
|  | if (searchInput.length) { | 
|  | searchInput.attr('placeholder', 'Search'); | 
|  | clearInterval(timer); | 
|  | } | 
|  | }, 100); | 
|  | }); | 
|  | </script> | 
|  | </body> | 
|  | </html> |