blob: 948bed92ef8a117059bdf7185f5e4a673324f676 [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[4724],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>h});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{};t%2?i(Object(n),!0).forEach((function(t){r(e,t,n[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):i(Object(n)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(n,t))}))}return e}function s(e,t){if(null==e)return{};var n,a,r=function(e,t){if(null==e)return{};var n,a,r={},i=Object.keys(e);for(a=0;a<i.length;a++)n=i[a],t.indexOf(n)>=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a<i.length;a++)n=i[a],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var m=a.createContext({}),l=function(e){var t=a.useContext(m),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=l(e.components);return a.createElement(m.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,m=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),c=l(n),d=r,h=c["".concat(m,".").concat(d)]||c[d]||u[d]||i;return n?a.createElement(h,o(o({ref:t},p),{},{components:n})):a.createElement(h,o({ref:t},p))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=d;var s={};for(var m in t)hasOwnProperty.call(t,m)&&(s[m]=t[m]);s.originalType=e,s[c]="string"==typeof e?e:r,o[1]=s;for(var l=2;l<i;l++)o[l]=n[l];return a.createElement.apply(null,o)}return a.createElement.apply(null,n)}d.displayName="MDXCreateElement"},29414:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>p,contentTitle:()=>m,default:()=>h,frontMatter:()=>s,metadata:()=>l,toc:()=>c});var a=n(87462),r=n(63366),i=(n(67294),n(3905)),o=["components"],s={id:"schema-model",title:"Druid schema model",sidebar_label:"Schema model",description:"Introduces concepts of datasources, primary timestamp, dimensions, and metrics."},m=void 0,l={unversionedId:"ingestion/schema-model",id:"ingestion/schema-model",title:"Druid schema model",description:"Introduces concepts of datasources, primary timestamp, dimensions, and metrics.",source:"@site/docs/27.0.0/ingestion/schema-model.md",sourceDirName:"ingestion",slug:"/ingestion/schema-model",permalink:"/docs/27.0.0/ingestion/schema-model",draft:!1,tags:[],version:"current",frontMatter:{id:"schema-model",title:"Druid schema model",sidebar_label:"Schema model",description:"Introduces concepts of datasources, primary timestamp, dimensions, and metrics."},sidebar:"docs",previous:{title:"Input sources",permalink:"/docs/27.0.0/ingestion/input-sources"},next:{title:"Rollup",permalink:"/docs/27.0.0/ingestion/rollup"}},p={},c=[{value:"Primary timestamp",id:"primary-timestamp",level:2},{value:"Dimensions",id:"dimensions",level:2},{value:"Metrics",id:"metrics",level:2}],u={toc:c},d="wrapper";function h(e){var t=e.components,n=(0,r.Z)(e,o);return(0,i.kt)(d,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Druid stores data in datasources, which are similar to tables in a traditional relational database management system (RDBMS). Druid's data model shares similarities with both relational and timeseries data models."),(0,i.kt)("h2",{id:"primary-timestamp"},"Primary timestamp"),(0,i.kt)("p",null,"Druid schemas must always include a primary timestamp. Druid uses the primary timestamp to ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/partitioning"},"partition and sort")," your data. Druid uses the primary timestamp to rapidly identify and retrieve data within the time range of queries. Druid also uses the primary timestamp column\nfor time-based ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/data-management/"},"data management operations")," such as dropping time chunks, overwriting time chunks, and time-based retention rules."),(0,i.kt)("p",null,"Druid parses the primary timestamp based on the ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/ingestion-spec#timestampspec"},(0,i.kt)("inlineCode",{parentName:"a"},"timestampSpec"))," configuration at ingestion time. Regardless of the source field for the primary timestamp, Druid always stores the timestamp in the ",(0,i.kt)("inlineCode",{parentName:"p"},"__time")," column in your Druid datasource."),(0,i.kt)("p",null,"You can control other important operations that are based on the primary timestamp in the\n",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/ingestion-spec#granularityspec"},(0,i.kt)("inlineCode",{parentName:"a"},"granularitySpec")),". If you have more than one timestamp column, you can store the others as\n",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/schema-design#secondary-timestamps"},"secondary timestamps"),"."),(0,i.kt)("h2",{id:"dimensions"},"Dimensions"),(0,i.kt)("p",null,'Dimensions are columns that Druid stores "as-is". You can use dimensions for any purpose. For example, you can group, filter, or apply aggregators to dimensions at query time when necessary.'),(0,i.kt)("p",null,"If you disable ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/rollup"},"rollup"),", then Druid treats the set of\ndimensions like a set of columns to ingest. The dimensions behave exactly as you would expect from any database that does not support a rollup feature."),(0,i.kt)("p",null,"At ingestion time, you configure dimensions in the ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/ingestion-spec#dimensionsspec"},(0,i.kt)("inlineCode",{parentName:"a"},"dimensionsSpec")),"."),(0,i.kt)("h2",{id:"metrics"},"Metrics"),(0,i.kt)("p",null,"Metrics are columns that Druid stores in an aggregated form. Metrics are most useful when you enable ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/rollup"},"rollup"),". If you specify a metric, you can apply an aggregation function to each row during ingestion. This\nhas the following benefits:"),(0,i.kt)("p",null,'Rollup is a form of aggregation that collapses dimensions while aggregating the values in the metrics, that is, it collapses rows but retains its summary information."'),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/rollup"},"Rollup")," is a form of aggregation that combines multiple rows with the same timestamp value and dimension values. For example, the ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/tutorials/tutorial-rollup"},"rollup tutorial")," demonstrates using rollup to collapse netflow data to a single row per ",(0,i.kt)("inlineCode",{parentName:"p"},"(minute, srcIP, dstIP)")," tuple, while retaining aggregate information about total packet and byte counts.")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},"Druid can compute some aggregators, especially approximate ones, more quickly at query time if they are partially computed at ingestion time, including data that has not been rolled up."),(0,i.kt)("p",{parentName:"li"},"At ingestion time, you configure Metrics in the ",(0,i.kt)("a",{parentName:"p",href:"/docs/27.0.0/ingestion/ingestion-spec#metricsspec"},(0,i.kt)("inlineCode",{parentName:"a"},"metricsSpec")),"."))))}h.isMDXComponent=!0}}]);