blob: cf9685f1d5ee8ffe97bf850521c1a4475937ffc4 [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[6228],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>c});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{};t%2?l(Object(n),!0).forEach((function(t){r(e,t,n[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):l(Object(n)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(n,t))}))}return e}function i(e,t){if(null==e)return{};var n,a,r=function(e,t){if(null==e)return{};var n,a,r={},l=Object.keys(e);for(a=0;a<l.length;a++)n=l[a],t.indexOf(n)>=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a<l.length;a++)n=l[a],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),g=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=g(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=g(n),m=r,c=p["".concat(s,".").concat(m)]||p[m]||d[m]||l;return n?a.createElement(c,o(o({ref:t},u),{},{components:n})):a.createElement(c,o({ref:t},u))}));function c(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,o=new Array(l);o[0]=m;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:r,o[1]=i;for(var g=2;g<l;g++)o[g]=n[g];return a.createElement.apply(null,o)}return a.createElement.apply(null,n)}m.displayName="MDXCreateElement"},82854:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>c,frontMatter:()=>i,metadata:()=>g,toc:()=>p});var a=n(58168),r=n(98587),l=(n(96540),n(15680)),o=["components"],i={id:"ddsketch-quantiles",title:"DDSketches for Approximate Quantiles module"},s=void 0,g={unversionedId:"development/extensions-contrib/ddsketch-quantiles",id:"development/extensions-contrib/ddsketch-quantiles",title:"DDSketches for Approximate Quantiles module",description:"\x3c!--",source:"@site/docs/latest/development/extensions-contrib/ddsketch-quantiles.md",sourceDirName:"development/extensions-contrib",slug:"/development/extensions-contrib/ddsketch-quantiles",permalink:"/docs/latest/development/extensions-contrib/ddsketch-quantiles",draft:!1,tags:[],version:"current",frontMatter:{id:"ddsketch-quantiles",title:"DDSketches for Approximate Quantiles module"}},u={},p=[{value:"Aggregator",id:"aggregator",level:3},{value:"Post Aggregators",id:"post-aggregators",level:3},{value:"quantilesFromDDSketch",id:"quantilesfromddsketch",level:4},{value:"quantileFromDDSketch",id:"quantilefromddsketch",level:4},{value:"Example",id:"example",level:3}],d={toc:p},m="wrapper";function c(e){var t=e.components,n=(0,r.A)(e,o);return(0,l.yg)(m,(0,a.A)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.yg)("p",null,"This module provides aggregators for approximate quantile queries using the ",(0,l.yg)("a",{parentName:"p",href:"https://github.com/datadog/sketches-java"},"DDSketch")," library. The DDSketch library provides a fast, and fully-mergeable quantile sketch with relative error. If the true quantile is 100, a sketch with relative error of 1% guarantees a quantile value between 101 and 99. This is important and highly valuable behavior for long tail distributions. The best use case for these sketches is for accurately describing the upper quantiles of long tailed distributions such as network latencies."),(0,l.yg)("p",null,"To use this Apache Druid extension, ",(0,l.yg)("a",{parentName:"p",href:"/docs/latest/configuration/extensions#loading-extensions"},"include")," in the extensions load list."),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre"},'druid.extensions.loadList=["druid-ddsketch", ...]\n')),(0,l.yg)("h3",{id:"aggregator"},"Aggregator"),(0,l.yg)("p",null,"The result of the aggregation is a DDSketch that is the union of all sketches either built from raw data or read from the segments. The single number that is returned represents the total number of included data points. The default aggregator type of ",(0,l.yg)("inlineCode",{parentName:"p"},"ddSketch")," uses the collapsingLowestDense strategy for storing and merging sketch. This means that in favor of keeping the highest values represented at the highest accuracy, the sketch will collapse and merge lower, smaller values in the sketch. Collapsed bins will lose accuracy guarantees. The default number of bins is 1000. Sketches can only be merged when using the same relativeError values."),(0,l.yg)("p",null,"The ",(0,l.yg)("inlineCode",{parentName:"p"},"ddSketch")," aggregator operates over raw data and precomputed sketches."),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-json"},'{\n "type" : "ddSketch",\n "name" : <output_name>,\n "fieldName" : <input_name>,\n "relativeError" : <double(0, 1)>,\n "numBins": <int>\n }\n')),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:null},"property"),(0,l.yg)("th",{parentName:"tr",align:null},"description"),(0,l.yg)("th",{parentName:"tr",align:null},"required?"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"type"),(0,l.yg)("td",{parentName:"tr",align:null},'Must be "ddSketch"'),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"name"),(0,l.yg)("td",{parentName:"tr",align:null},"A String for the output (result) name of the calculation."),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"fieldName"),(0,l.yg)("td",{parentName:"tr",align:null},"A String for the name of the input field (can contain sketches or raw numeric values)."),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"relativeError"),(0,l.yg)("td",{parentName:"tr",align:null},"Describes the precision in which to store the sketch. Must be a number between 0 and 1."),(0,l.yg)("td",{parentName:"tr",align:null},"no, defaults to 0.01 (1% error)")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"numBins"),(0,l.yg)("td",{parentName:"tr",align:null},"Total number of bins the sketch is allowed to use to describe the distribution. This has a direct impact on max memory used. The more total bins available, the larger the range of accurate quantiles. With relative accuracy of 2%, only 275 bins are required to cover values between 1 millisecond and 1 minute. 800 bins are required to cover values between 1 nanosecond and 1 day."),(0,l.yg)("td",{parentName:"tr",align:null},"no, defaults to 1000")))),(0,l.yg)("h3",{id:"post-aggregators"},"Post Aggregators"),(0,l.yg)("p",null,"To compute approximate quantiles, use ",(0,l.yg)("inlineCode",{parentName:"p"},"quantilesFromDDSketch")," to query for a set of quantiles or ",(0,l.yg)("inlineCode",{parentName:"p"},"quantileFromDDSketch")," to query for a single quantile. Call these post-aggregators on the sketches created by the ",(0,l.yg)("inlineCode",{parentName:"p"},"ddSketch")," aggregators."),(0,l.yg)("h4",{id:"quantilesfromddsketch"},"quantilesFromDDSketch"),(0,l.yg)("p",null,"Use ",(0,l.yg)("inlineCode",{parentName:"p"},"quantilesFromDDSketch")," to fetch multiple quantiles."),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-json"},'{\n "type" : "quantilesFromDDSketch",\n "name" : <output_name>,\n "field" : <reference to DDSketch>,\n "fractions" : <array of doubles in [0,1]>\n}\n')),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:null},"property"),(0,l.yg)("th",{parentName:"tr",align:null},"description"),(0,l.yg)("th",{parentName:"tr",align:null},"required?"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"type"),(0,l.yg)("td",{parentName:"tr",align:null},'Must be "quantilesFromDDSketch"'),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"name"),(0,l.yg)("td",{parentName:"tr",align:null},"A String for the output (result) name of the calculation."),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"field"),(0,l.yg)("td",{parentName:"tr",align:null},"A computed ddSketch."),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"fractions"),(0,l.yg)("td",{parentName:"tr",align:null},"Array of doubles from 0 to 1 of the quantiles to compute"),(0,l.yg)("td",{parentName:"tr",align:null},"yes")))),(0,l.yg)("h4",{id:"quantilefromddsketch"},"quantileFromDDSketch"),(0,l.yg)("p",null,"Use ",(0,l.yg)("inlineCode",{parentName:"p"},"quantileFromDDSketch")," to fetch a single quantile."),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-json"},'{\n "type" : "quantileFromDDSketch",\n "name" : <output_name>,\n "field" : <reference to DDsketch>,\n "fraction" : <double [0,1]>\n}\n')),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:null},"property"),(0,l.yg)("th",{parentName:"tr",align:null},"description"),(0,l.yg)("th",{parentName:"tr",align:null},"required?"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"type"),(0,l.yg)("td",{parentName:"tr",align:null},'Must be "quantileFromDDSketch"'),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"name"),(0,l.yg)("td",{parentName:"tr",align:null},"A String for the output (result) name of the calculation."),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"field"),(0,l.yg)("td",{parentName:"tr",align:null},"A computed ddSketch."),(0,l.yg)("td",{parentName:"tr",align:null},"yes")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:null},"fraction"),(0,l.yg)("td",{parentName:"tr",align:null},"A double from 0 to 1 of the quantile to compute"),(0,l.yg)("td",{parentName:"tr",align:null},"yes")))),(0,l.yg)("h3",{id:"example"},"Example"),(0,l.yg)("p",null,"As an example of a query with sketches pre-aggregated at ingestion time, one could set up the following aggregator at ingest:"),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-json"},'{\n "type": "ddSketch",\n "name": "sketch",\n "fieldName": "value",\n "relativeError": 0.01,\n "numBins": 1000,\n}\n')),(0,l.yg)("p",null,"Compute quantiles from the pre-aggregated sketches using the following aggregator and post-aggregator."),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-json"},'{\n "aggregations": [{\n "type": "ddSketch",\n "name": "sketch",\n "fieldName": "sketch",\n }],\n "postAggregations": [\n {\n "type": "quantilesFromDDSketch",\n "name": "quantiles",\n "fractions": [0.5, 0.75, 0.9, 0.99],\n "field": {\n "type": "fieldAccess",\n "fieldName": "sketch"\n }\n }]\n}\n')))}c.isMDXComponent=!0}}]);