| "use strict";(self.webpackChunk=self.webpackChunk||[]).push([[5837],{28453:(e,t,n)=>{n.d(t,{R:()=>a,x:()=>i});var s=n(96540);const r={},o=s.createContext(r);function a(e){const t=s.useContext(o);return s.useMemo((function(){return"function"==typeof e?e(t):{...t,...e}}),[t,e])}function i(e){let t;return t=e.disableParentContext?"function"==typeof e.components?e.components(r):e.components||r:a(e.components),s.createElement(o.Provider,{value:t},e.children)}},78208:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>c,contentTitle:()=>i,default:()=>h,frontMatter:()=>a,metadata:()=>s,toc:()=>l});const s=JSON.parse('{"id":"development/extensions-contrib/momentsketch-quantiles","title":"Moment Sketches for Approximate Quantiles module","description":"\x3c!--","source":"@site/docs/33.0.0/development/extensions-contrib/momentsketch-quantiles.md","sourceDirName":"development/extensions-contrib","slug":"/development/extensions-contrib/momentsketch-quantiles","permalink":"/docs/33.0.0/development/extensions-contrib/momentsketch-quantiles","draft":false,"unlisted":false,"tags":[],"version":"current","frontMatter":{"id":"momentsketch-quantiles","title":"Moment Sketches for Approximate Quantiles module"}}');var r=n(74848),o=n(28453);const a={id:"momentsketch-quantiles",title:"Moment Sketches for Approximate Quantiles module"},i=void 0,c={},l=[{value:"Aggregator",id:"aggregator",level:3},{value:"Post Aggregators",id:"post-aggregators",level:3},{value:"Example",id:"example",level:3}];function d(e){const t={a:"a",code:"code",h3:"h3",p:"p",pre:"pre",table:"table",tbody:"tbody",td:"td",th:"th",thead:"thead",tr:"tr",...(0,o.R)(),...e.components};return(0,r.jsxs)(r.Fragment,{children:[(0,r.jsxs)(t.p,{children:["This module provides aggregators for approximate quantile queries using the ",(0,r.jsx)(t.a,{href:"https://github.com/stanford-futuredata/momentsketch",children:"momentsketch"})," library.\nThe momentsketch provides coarse quantile estimates with less space and aggregation time overheads than traditional sketches, approaching the performance of counts and sums by reconstructing distributions from computed statistics."]}),"\n",(0,r.jsxs)(t.p,{children:["To use this Apache Druid extension, ",(0,r.jsx)(t.a,{href:"/docs/33.0.0/configuration/extensions#loading-extensions",children:"include"})," in the extensions load list."]}),"\n",(0,r.jsx)(t.h3,{id:"aggregator",children:"Aggregator"}),"\n",(0,r.jsx)(t.p,{children:"The result of the aggregation is a momentsketch that is the union of all sketches either built from raw data or read from the segments."}),"\n",(0,r.jsxs)(t.p,{children:["The ",(0,r.jsx)(t.code,{children:"momentSketch"})," aggregator operates over raw data while the ",(0,r.jsx)(t.code,{children:"momentSketchMerge"})," aggregator should be used when aggregating precomputed sketches."]}),"\n",(0,r.jsx)(t.pre,{children:(0,r.jsx)(t.code,{className:"language-json",children:'{\n "type" : <aggregator_type>,\n "name" : <output_name>,\n "fieldName" : <input_name>,\n "k" : <int>,\n "compress" : <boolean>\n }\n'})}),"\n",(0,r.jsxs)(t.table,{children:[(0,r.jsx)(t.thead,{children:(0,r.jsxs)(t.tr,{children:[(0,r.jsx)(t.th,{children:"property"}),(0,r.jsx)(t.th,{children:"description"}),(0,r.jsx)(t.th,{children:"required?"})]})}),(0,r.jsxs)(t.tbody,{children:[(0,r.jsxs)(t.tr,{children:[(0,r.jsx)(t.td,{children:"type"}),(0,r.jsx)(t.td,{children:'Type of aggregator desired. Either "momentSketch" or "momentSketchMerge"'}),(0,r.jsx)(t.td,{children:"yes"})]}),(0,r.jsxs)(t.tr,{children:[(0,r.jsx)(t.td,{children:"name"}),(0,r.jsx)(t.td,{children:"A String for the output (result) name of the calculation."}),(0,r.jsx)(t.td,{children:"yes"})]}),(0,r.jsxs)(t.tr,{children:[(0,r.jsx)(t.td,{children:"fieldName"}),(0,r.jsx)(t.td,{children:"A String for the name of the input field (can contain sketches or raw numeric values)."}),(0,r.jsx)(t.td,{children:"yes"})]}),(0,r.jsxs)(t.tr,{children:[(0,r.jsx)(t.td,{children:"k"}),(0,r.jsx)(t.td,{children:"Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Usable range is generally [3,15]"}),(0,r.jsx)(t.td,{children:"no, defaults to 13."})]}),(0,r.jsxs)(t.tr,{children:[(0,r.jsx)(t.td,{children:"compress"}),(0,r.jsx)(t.td,{children:"Flag for whether the aggregator compresses numeric values using arcsinh. Can improve robustness to skewed and long-tailed distributions, but reduces accuracy slightly on more uniform distributions."}),(0,r.jsx)(t.td,{children:"no, defaults to true"})]})]})]}),"\n",(0,r.jsx)(t.h3,{id:"post-aggregators",children:"Post Aggregators"}),"\n",(0,r.jsxs)(t.p,{children:["Users can query for a set of quantiles using the ",(0,r.jsx)(t.code,{children:"momentSketchSolveQuantiles"})," post-aggregator on the sketches created by the ",(0,r.jsx)(t.code,{children:"momentSketch"})," or ",(0,r.jsx)(t.code,{children:"momentSketchMerge"})," aggregators."]}),"\n",(0,r.jsx)(t.pre,{children:(0,r.jsx)(t.code,{className:"language-json",children:'{\n "type" : "momentSketchSolveQuantiles",\n "name" : <output_name>,\n "field" : <reference to moment sketch>,\n "fractions" : <array of doubles in [0,1]>\n}\n'})}),"\n",(0,r.jsx)(t.p,{children:"Users can also query for the min/max of a distribution:"}),"\n",(0,r.jsx)(t.pre,{children:(0,r.jsx)(t.code,{className:"language-json",children:'{\n "type" : "momentSketchMin" | "momentSketchMax",\n "name" : <output_name>,\n "field" : <reference to moment sketch>,\n}\n'})}),"\n",(0,r.jsx)(t.h3,{id:"example",children:"Example"}),"\n",(0,r.jsx)(t.p,{children:"As an example of a query with sketches pre-aggregated at ingestion time, one could set up the following aggregator at ingest:"}),"\n",(0,r.jsx)(t.pre,{children:(0,r.jsx)(t.code,{className:"language-json",children:'{\n "type": "momentSketch",\n "name": "sketch",\n "fieldName": "value",\n "k": 10,\n "compress": true,\n}\n'})}),"\n",(0,r.jsx)(t.p,{children:"and make queries using the following aggregator + post-aggregator:"}),"\n",(0,r.jsx)(t.pre,{children:(0,r.jsx)(t.code,{className:"language-json",children:'{\n "aggregations": [{\n "type": "momentSketchMerge",\n "name": "sketch",\n "fieldName": "sketch",\n "k": 10,\n "compress": true\n }],\n "postAggregations": [\n {\n "type": "momentSketchSolveQuantiles",\n "name": "quantiles",\n "fractions": [0.1, 0.5, 0.9],\n "field": {\n "type": "fieldAccess",\n "fieldName": "sketch"\n }\n },\n {\n "type": "momentSketchMin",\n "name": "min",\n "field": {\n "type": "fieldAccess",\n "fieldName": "sketch"\n }\n }]\n}\n'})})]})}function h(e={}){const{wrapper:t}={...(0,o.R)(),...e.components};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(d,{...e})}):d(e)}}}]); |