assets/js/f2fce74c.73e5a2d0.js - druid-website - Git at Google

 "use strict";(self.webpackChunk=self.webpackChunk||[]).push([[9498],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>h});var n=a(96540);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?r(Object(a),!0).forEach((function(t){i(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):r(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function l(e,t){if(null==e)return{};var a,n,i=function(e,t){if(null==e)return{};var a,n,i={},r=Object.keys(e);for(n=0;n<r.length;n++)a=r[n],t.indexOf(a)>=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n<r.length;n++)a=r[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),g=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},u=function(e){var t=g(e.components);return n.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),m=g(a),d=i,h=m["".concat(s,".").concat(d)]||m[d]||p[d]||r;return a?n.createElement(h,o(o({ref:t},u),{},{components:a})):n.createElement(h,o({ref:t},u))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=d;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:i,o[1]=l;for(var g=2;g<r;g++)o[g]=a[g];return n.createElement.apply(null,o)}return n.createElement.apply(null,a)}d.displayName="MDXCreateElement"},40425:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>h,frontMatter:()=>l,metadata:()=>g,toc:()=>m});var n=a(58168),i=a(98587),r=(a(96540),a(15680)),o=["components"],l={id:"approximate-histograms",title:"Approximate Histogram aggregators"},s=void 0,g={unversionedId:"development/extensions-core/approximate-histograms",id:"development/extensions-core/approximate-histograms",title:"Approximate Histogram aggregators",description:"\x3c!--",source:"@site/docs/latest/development/extensions-core/approximate-histograms.md",sourceDirName:"development/extensions-core",slug:"/development/extensions-core/approximate-histograms",permalink:"/docs/latest/development/extensions-core/approximate-histograms",draft:!1,tags:[],version:"current",frontMatter:{id:"approximate-histograms",title:"Approximate Histogram aggregators"}},u={},m=[{value:"Approximate Histogram aggregator (Deprecated)",id:"approximate-histogram-aggregator-deprecated",level:2},{value:"Creating approximate histogram sketches at ingestion time",id:"creating-approximate-histogram-sketches-at-ingestion-time",level:3},{value:"Fixed Buckets Histogram",id:"fixed-buckets-histogram",level:2},{value:"When to use",id:"when-to-use",level:3},{value:"Properties",id:"properties",level:3},{value:"Outlier handling modes",id:"outlier-handling-modes",level:3},{value:"Output fields",id:"output-fields",level:3},{value:"Ingesting existing histograms",id:"ingesting-existing-histograms",level:3},{value:"Serialization formats",id:"serialization-formats",level:3},{value:"Full serialization format",id:"full-serialization-format",level:4},{value:"Sparse serialization format",id:"sparse-serialization-format",level:4},{value:"Combining histograms with different bucketing schemes",id:"combining-histograms-with-different-bucketing-schemes",level:3},{value:"Null handling",id:"null-handling",level:3},{value:"Histogram post-aggregators",id:"histogram-post-aggregators",level:2},{value:"Equal buckets post-aggregator",id:"equal-buckets-post-aggregator",level:3},{value:"Buckets post-aggregator",id:"buckets-post-aggregator",level:3},{value:"Custom buckets post-aggregator",id:"custom-buckets-post-aggregator",level:3},{value:"min post-aggregator",id:"min-post-aggregator",level:3},{value:"max post-aggregator",id:"max-post-aggregator",level:3},{value:"quantile post-aggregator",id:"quantile-post-aggregator",level:4},{value:"quantiles post-aggregator",id:"quantiles-post-aggregator",level:4}],p={toc:m},d="wrapper";function h(e){var t=e.components,a=(0,i.A)(e,o);return(0,r.yg)(d,(0,n.A)({},p,a,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("p",null,"To use this Apache Druid extension, ",(0,r.yg)("a",{parentName:"p",href:"/docs/latest/configuration/extensions#loading-extensions"},"include")," ",(0,r.yg)("inlineCode",{parentName:"p"},"druid-histogram")," in the extensions load list."),(0,r.yg)("p",null,"The ",(0,r.yg)("inlineCode",{parentName:"p"},"druid-histogram")," extension provides an approximate histogram aggregator and a fixed buckets histogram aggregator."),(0,r.yg)("a",{name:"approximate-histogram-aggregator"}),(0,r.yg)("h2",{id:"approximate-histogram-aggregator-deprecated"},"Approximate Histogram aggregator (Deprecated)"),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"}," The Approximate Histogram aggregator is deprecated. Please use ",(0,r.yg)("a",{parentName:"p",href:"/docs/latest/development/extensions-core/datasketches-quantiles"},"DataSketches Quantiles")," instead which provides a superior distribution-independent algorithm with formal error guarantees.")),(0,r.yg)("p",null,"This aggregator is based on\n",(0,r.yg)("a",{parentName:"p",href:"http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf"},"http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf"),"\nto compute approximate histograms, with the following modifications:"),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},"some tradeoffs in accuracy were made in the interest of speed (see below)"),(0,r.yg)("li",{parentName:"ul"},"the sketch maintains the exact original data as long as the number of\ndistinct data points is fewer than the resolutions (number of centroids),\nincreasing accuracy when there are few data points, or when dealing with\ndiscrete data points. You can find some of the details in ",(0,r.yg)("a",{parentName:"li",href:"https://metamarkets.com/2013/histograms/"},"this post"),".")),(0,r.yg)("p",null,"Here are a few things to note before using approximate histograms:"),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},"As indicated in the original paper, there are no formal error bounds on the\napproximation. In practice, the approximation gets worse if the distribution\nis skewed."),(0,r.yg)("li",{parentName:"ul"},"The algorithm is order-dependent, so results can vary for the same query, due\nto variations in the order in which results are merged."),(0,r.yg)("li",{parentName:"ul"},"In general, the algorithm only works well if the data that comes is randomly\ndistributed (i.e. if data points end up sorted in a column, approximation\nwill be horrible)"),(0,r.yg)("li",{parentName:"ul"},"We traded accuracy for aggregation speed, taking some shortcuts when adding\nhistograms together, which can lead to pathological cases if your data is\nordered in some way, or if your distribution has long tails. It should be\ncheaper to increase the resolution of the sketch to get the accuracy you need.")),(0,r.yg)("p",null,"That being said, those sketches can be useful to get a first order approximation\nwhen averages are not good enough. Assuming most rows in your segment store\nfewer data points than the resolution of histogram, you should be able to use\nthem for monitoring purposes and detect meaningful variations with a few\nhundred centroids. To get good accuracy readings on 95th percentiles with\nmillions of rows of data, you may want to use several thousand centroids,\nespecially with long tails, since that's where the approximation will be worse."),(0,r.yg)("h3",{id:"creating-approximate-histogram-sketches-at-ingestion-time"},"Creating approximate histogram sketches at ingestion time"),(0,r.yg)("p",null,'To use this feature, an "approxHistogram" or "approxHistogramFold" aggregator must be included at\nindexing time. The ingestion aggregator can only apply to numeric values. If you use "approxHistogram"\nthen any input rows missing the value will be considered to have a value of 0, while with "approxHistogramFold"\nsuch rows will be ignored.'),(0,r.yg)("p",null,'To query for results, an "approxHistogramFold" aggregator must be included in the\nquery.'),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{\n  "type" : "approxHistogram or approxHistogramFold (at ingestion time), approxHistogramFold (at query time)",\n  "name" : <output_name>,\n  "fieldName" : <metric_name>,\n  "resolution" : <integer>,\n  "numBuckets" : <integer>,\n  "lowerLimit" : <float>,\n  "upperLimit" : <float>\n}\n')),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Property"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Default"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"resolution")),(0,r.yg)("td",{parentName:"tr",align:null},"Number of centroids (data points) to store. The higher the resolution, the more accurate results are, but the slower the computation will be."),(0,r.yg)("td",{parentName:"tr",align:null},"50")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"numBuckets")),(0,r.yg)("td",{parentName:"tr",align:null},"Number of output buckets for the resulting histogram. Bucket intervals are dynamic, based on the range of the underlying data. Use a post-aggregator to have finer control over the bucketing scheme"),(0,r.yg)("td",{parentName:"tr",align:null},"7")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"lowerLimit"),"/",(0,r.yg)("inlineCode",{parentName:"td"},"upperLimit")),(0,r.yg)("td",{parentName:"tr",align:null},"Restrict the approximation to the given range. The values outside this range will be aggregated into two centroids. Counts of values outside this range are still maintained."),(0,r.yg)("td",{parentName:"tr",align:null},"-INF/+INF")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"finalizeAsBase64Binary")),(0,r.yg)("td",{parentName:"tr",align:null},"If true, the finalized aggregator value will be a Base64-encoded byte array containing the serialized form of the histogram. If false, the finalized aggregator value will be a JSON representation of the histogram."),(0,r.yg)("td",{parentName:"tr",align:null},"false")))),(0,r.yg)("h2",{id:"fixed-buckets-histogram"},"Fixed Buckets Histogram"),(0,r.yg)("p",null,"The fixed buckets histogram aggregator builds a histogram on a numeric column, with evenly-sized buckets across a specified value range. Values outside of the range are handled based on a user-specified outlier handling mode."),(0,r.yg)("p",null,"This histogram supports the min/max/quantiles post-aggregators but does not support the bucketing post-aggregators."),(0,r.yg)("h3",{id:"when-to-use"},"When to use"),(0,r.yg)("p",null,"The accuracy/usefulness of the fixed buckets histogram is extremely data-dependent; it is provided to support special use cases where the user has a great deal of prior information about the data being aggregated and knows that a fixed buckets implementation is suitable."),(0,r.yg)("p",null,"For general histogram and quantile use cases, the ",(0,r.yg)("a",{parentName:"p",href:"/docs/latest/development/extensions-core/datasketches-quantiles"},"DataSketches Quantiles Sketch")," extension is recommended."),(0,r.yg)("h3",{id:"properties"},"Properties"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Property"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Default"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"type")),(0,r.yg)("td",{parentName:"tr",align:null},"Type of the aggregator. Must ",(0,r.yg)("inlineCode",{parentName:"td"},"fixedBucketsHistogram"),"."),(0,r.yg)("td",{parentName:"tr",align:null},"No default, must be specified")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"name")),(0,r.yg)("td",{parentName:"tr",align:null},"Column name for the aggregator."),(0,r.yg)("td",{parentName:"tr",align:null},"No default, must be specified")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"fieldName")),(0,r.yg)("td",{parentName:"tr",align:null},"Column name of the input to the aggregator."),(0,r.yg)("td",{parentName:"tr",align:null},"No default, must be specified")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"lowerLimit")),(0,r.yg)("td",{parentName:"tr",align:null},"Lower limit of the histogram."),(0,r.yg)("td",{parentName:"tr",align:null},"No default, must be specified")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"upperLimit")),(0,r.yg)("td",{parentName:"tr",align:null},"Upper limit of the histogram."),(0,r.yg)("td",{parentName:"tr",align:null},"No default, must be specified")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"numBuckets")),(0,r.yg)("td",{parentName:"tr",align:null},"Number of buckets for the histogram. The range ","[lowerLimit, upperLimit]"," will be divided into ",(0,r.yg)("inlineCode",{parentName:"td"},"numBuckets")," intervals of equal size."),(0,r.yg)("td",{parentName:"tr",align:null},"10")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"outlierHandlingMode")),(0,r.yg)("td",{parentName:"tr",align:null},"Specifies how values outside of ","[lowerLimit, upperLimit]",' will be handled. Supported modes are "ignore", "overflow", and "clip". See ',(0,r.yg)("a",{parentName:"td",href:"#outlier-handling-modes"},"outlier handling modes")," for more details."),(0,r.yg)("td",{parentName:"tr",align:null},"No default, must be specified")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"finalizeAsBase64Binary")),(0,r.yg)("td",{parentName:"tr",align:null},"If true, the finalized aggregator value will be a Base64-encoded byte array containing the ",(0,r.yg)("a",{parentName:"td",href:"#serialization-formats"},"serialized form")," of the histogram. If false, the finalized aggregator value will be a JSON representation of the histogram."),(0,r.yg)("td",{parentName:"tr",align:null},"false")))),(0,r.yg)("p",null,"An example aggregator spec is shown below:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{\n  "type" : "fixedBucketsHistogram",\n  "name" : <output_name>,\n  "fieldName" : <metric_name>,\n  "numBuckets" : <integer>,\n  "lowerLimit" : <double>,\n  "upperLimit" : <double>,\n  "outlierHandlingMode": <mode>\n}\n')),(0,r.yg)("h3",{id:"outlier-handling-modes"},"Outlier handling modes"),(0,r.yg)("p",null,"The outlier handling mode specifies what should be done with values outside of the histogram's range. There are three supported modes:"),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"ignore"),": Throw away outlier values."),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"overflow"),": A count of outlier values will be tracked by the histogram, available in the ",(0,r.yg)("inlineCode",{parentName:"li"},"lowerOutlierCount")," and ",(0,r.yg)("inlineCode",{parentName:"li"},"upperOutlierCount")," fields."),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"clip"),": Outlier values will be clipped to the ",(0,r.yg)("inlineCode",{parentName:"li"},"lowerLimit")," or the ",(0,r.yg)("inlineCode",{parentName:"li"},"upperLimit")," and included in the histogram.")),(0,r.yg)("p",null,"If you don't care about outliers, ",(0,r.yg)("inlineCode",{parentName:"p"},"ignore")," is the cheapest option performance-wise. There is currently no difference in storage size among the modes."),(0,r.yg)("h3",{id:"output-fields"},"Output fields"),(0,r.yg)("p",null,"The histogram aggregator's output object has the following fields:"),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"lowerLimit"),": Lower limit of the histogram"),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"upperLimit"),": Upper limit of the histogram"),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"numBuckets"),": Number of histogram buckets"),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"outlierHandlingMode"),": Outlier handling mode"),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"count"),": Total number of values contained in the histogram, excluding outliers"),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"lowerOutlierCount"),": Count of outlier values below ",(0,r.yg)("inlineCode",{parentName:"li"},"lowerLimit"),". Only used if the outlier mode is ",(0,r.yg)("inlineCode",{parentName:"li"},"overflow"),"."),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"upperOutlierCount"),": Count of outlier values above ",(0,r.yg)("inlineCode",{parentName:"li"},"upperLimit"),". Only used if the outlier mode is ",(0,r.yg)("inlineCode",{parentName:"li"},"overflow"),"."),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"missingValueCount"),": Count of null values seen by the histogram."),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"max"),": Max value seen by the histogram. This does not include outlier values."),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"min"),": Min value seen by the histogram. This does not include outlier values."),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("inlineCode",{parentName:"li"},"histogram"),": An array of longs with size ",(0,r.yg)("inlineCode",{parentName:"li"},"numBuckets"),", containing the bucket counts")),(0,r.yg)("h3",{id:"ingesting-existing-histograms"},"Ingesting existing histograms"),(0,r.yg)("p",null,'It is also possible to ingest existing fixed buckets histograms. The input must be a Base64 string encoding a byte array that contains a serialized histogram object. Both "full" and "sparse" formats can be used. Please see ',(0,r.yg)("a",{parentName:"p",href:"#serialization-formats"},"Serialization formats")," below for details."),(0,r.yg)("h3",{id:"serialization-formats"},"Serialization formats"),(0,r.yg)("h4",{id:"full-serialization-format"},"Full serialization format"),(0,r.yg)("p",null,"This format includes the full histogram bucket count array in the serialization format."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"byte: serialization version, must be 0x01\nbyte: encoding mode, 0x01 for full\ndouble: lowerLimit\ndouble: upperLimit\nint: numBuckets\nbyte: outlier handling mode (0x00 for `ignore`, 0x01 for `overflow`, and 0x02 for `clip`)\nlong: count, total number of values contained in the histogram, excluding outliers\nlong: lowerOutlierCount\nlong: upperOutlierCount\nlong: missingValueCount\ndouble: max\ndouble: min\narray of longs: bucket counts for the histogram\n")),(0,r.yg)("h4",{id:"sparse-serialization-format"},"Sparse serialization format"),(0,r.yg)("p",null,"This format represents the histogram bucket counts as (bucketNum, count) pairs. This serialization format is used when less than half of the histogram's buckets have values."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"byte: serialization version, must be 0x01\nbyte: encoding mode, 0x02 for sparse\ndouble: lowerLimit\ndouble: upperLimit\nint: numBuckets\nbyte: outlier handling mode (0x00 for `ignore`, 0x01 for `overflow`, and 0x02 for `clip`)\nlong: count, total number of values contained in the histogram, excluding outliers\nlong: lowerOutlierCount\nlong: upperOutlierCount\nlong: missingValueCount\ndouble: max\ndouble: min\nint: number of following (bucketNum, count) pairs\nsequence of (int, long) pairs:\n  int: bucket number\n  count: bucket count\n")),(0,r.yg)("h3",{id:"combining-histograms-with-different-bucketing-schemes"},"Combining histograms with different bucketing schemes"),(0,r.yg)("p",null,"It is possible to combine two histograms with different bucketing schemes (lowerLimit, upperLimit, numBuckets) together."),(0,r.yg)("p",null,'The bucketing scheme of the "left hand" histogram will be preserved (i.e., when running a query, the bucketing schemes specified in the query\'s histogram aggregators will be preserved).'),(0,r.yg)("p",null,'When merging, we assume that values are evenly distributed within the buckets of the "right hand" histogram.'),(0,r.yg)("p",null,"When the right-hand histogram contains outliers (when using ",(0,r.yg)("inlineCode",{parentName:"p"},"overflow")," mode), we assume that all of the outliers counted in the right-hand histogram will be outliers in the left-hand histogram as well."),(0,r.yg)("p",null,"For performance and accuracy reasons, we recommend avoiding aggregation of histograms with different bucketing schemes if possible."),(0,r.yg)("h3",{id:"null-handling"},"Null handling"),(0,r.yg)("p",null,"If ",(0,r.yg)("inlineCode",{parentName:"p"},"druid.generic.useDefaultValueForNull")," is false, null values will be tracked in the ",(0,r.yg)("inlineCode",{parentName:"p"},"missingValueCount")," field of the histogram."),(0,r.yg)("p",null,"If ",(0,r.yg)("inlineCode",{parentName:"p"},"druid.generic.useDefaultValueForNull")," is true, null values will be added to the histogram as the default 0.0 value."),(0,r.yg)("h2",{id:"histogram-post-aggregators"},"Histogram post-aggregators"),(0,r.yg)("p",null,"Post-aggregators are used to transform opaque approximate histogram sketches\ninto bucketed histogram representations, as well as to compute various\ndistribution metrics such as quantiles, min, and max."),(0,r.yg)("h3",{id:"equal-buckets-post-aggregator"},"Equal buckets post-aggregator"),(0,r.yg)("p",null,"Computes a visual representation of the approximate histogram with a given number of equal-sized bins.\nBucket intervals are based on the range of the underlying data. This aggregator is not supported for the fixed buckets histogram."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{\n  "type": "equalBuckets",\n  "name": "<output_name>",\n  "fieldName": "<aggregator_name>",\n  "numBuckets": <count>\n}\n')),(0,r.yg)("h3",{id:"buckets-post-aggregator"},"Buckets post-aggregator"),(0,r.yg)("p",null,"Computes a visual representation given an initial breakpoint, offset, and a bucket size."),(0,r.yg)("p",null,"Bucket size determines the width of the binning interval."),(0,r.yg)("p",null,"Offset determines the value on which those interval bins align."),(0,r.yg)("p",null,"This aggregator is not supported for the fixed buckets histogram."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{\n  "type": "buckets",\n  "name": "<output_name>",\n  "fieldName": "<aggregator_name>",\n  "bucketSize": <bucket_size>,\n  "offset": <offset>\n}\n')),(0,r.yg)("h3",{id:"custom-buckets-post-aggregator"},"Custom buckets post-aggregator"),(0,r.yg)("p",null,"Computes a visual representation of the approximate histogram with bins laid out according to the given breaks."),(0,r.yg)("p",null,"This aggregator is not supported for the fixed buckets histogram."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{ "type" : "customBuckets", "name" : <output_name>, "fieldName" : <aggregator_name>,\n  "breaks" : [ <value>, <value>, ... ] }\n')),(0,r.yg)("h3",{id:"min-post-aggregator"},"min post-aggregator"),(0,r.yg)("p",null,"Returns the minimum value of the underlying approximate or fixed buckets histogram aggregator"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{ "type" : "min", "name" : <output_name>, "fieldName" : <aggregator_name> }\n')),(0,r.yg)("h3",{id:"max-post-aggregator"},"max post-aggregator"),(0,r.yg)("p",null,"Returns the maximum value of the underlying approximate or fixed buckets histogram aggregator"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{ "type" : "max", "name" : <output_name>, "fieldName" : <aggregator_name> }\n')),(0,r.yg)("h4",{id:"quantile-post-aggregator"},"quantile post-aggregator"),(0,r.yg)("p",null,"Computes a single quantile based on the underlying approximate or fixed buckets histogram aggregator"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{ "type" : "quantile", "name" : <output_name>, "fieldName" : <aggregator_name>,\n  "probability" : <quantile> }\n')),(0,r.yg)("h4",{id:"quantiles-post-aggregator"},"quantiles post-aggregator"),(0,r.yg)("p",null,"Computes an array of quantiles based on the underlying approximate or fixed buckets histogram aggregator"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'{ "type" : "quantiles", "name" : <output_name>, "fieldName" : <aggregator_name>,\n  "probabilities" : [ <quantile>, <quantile>, ... ] }\n')))}h.isMDXComponent=!0}}]);