blob: d10fb8316a91bf7c6ad1260585ad0b930faccb0e [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[7678],{3905:(e,t,a)=>{a.d(t,{Zo:()=>p,kt:()=>h});var n=a(67294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function s(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?i(Object(a),!0).forEach((function(t){r(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):i(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function o(e,t){if(null==e)return{};var a,n,r=function(e,t){if(null==e)return{};var a,n,r={},i=Object.keys(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var l=n.createContext({}),m=function(e){var t=n.useContext(l),a=t;return e&&(a="function"==typeof e?e(t):s(s({},t),e)),a},p=function(e){var t=m(e.components);return n.createElement(l.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,l=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),c=m(a),d=r,h=c["".concat(l,".").concat(d)]||c[d]||u[d]||i;return a?n.createElement(h,s(s({ref:t},p),{},{components:a})):n.createElement(h,s({ref:t},p))}));function h(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=a.length,s=new Array(i);s[0]=d;var o={};for(var l in t)hasOwnProperty.call(t,l)&&(o[l]=t[l]);o.originalType=e,o[c]="string"==typeof e?e:r,s[1]=o;for(var m=2;m<i;m++)s[m]=a[m];return n.createElement.apply(null,s)}return n.createElement.apply(null,a)}d.displayName="MDXCreateElement"},21657:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>l,default:()=>h,frontMatter:()=>o,metadata:()=>m,toc:()=>c});var n=a(87462),r=a(63366),i=(a(67294),a(3905)),s=["components"],o={id:"schema-model",title:"Druid schema model",sidebar_label:"Schema model",description:"Introduces concepts of datasources, primary timestamp, dimensions, and metrics."},l=void 0,m={unversionedId:"ingestion/schema-model",id:"ingestion/schema-model",title:"Druid schema model",description:"Introduces concepts of datasources, primary timestamp, dimensions, and metrics.",source:"@site/docs/latest/ingestion/schema-model.md",sourceDirName:"ingestion",slug:"/ingestion/schema-model",permalink:"/docs/latest/ingestion/schema-model",draft:!1,tags:[],version:"current",frontMatter:{id:"schema-model",title:"Druid schema model",sidebar_label:"Schema model",description:"Introduces concepts of datasources, primary timestamp, dimensions, and metrics."},sidebar:"docs",previous:{title:"Input sources",permalink:"/docs/latest/ingestion/input-sources"},next:{title:"Rollup",permalink:"/docs/latest/ingestion/rollup"}},p={},c=[{value:"Primary timestamp",id:"primary-timestamp",level:2},{value:"Dimensions",id:"dimensions",level:2},{value:"Metrics",id:"metrics",level:2}],u={toc:c},d="wrapper";function h(e){var t=e.components,a=(0,r.Z)(e,s);return(0,i.kt)(d,(0,n.Z)({},u,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Druid stores data in datasources, which are similar to tables in a traditional relational database management system (RDBMS). Druid's data model shares similarities with both relational and timeseries data models."),(0,i.kt)("h2",{id:"primary-timestamp"},"Primary timestamp"),(0,i.kt)("p",null,"Druid schemas must always include a primary timestamp. Druid uses the primary timestamp to ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/partitioning"},"partition and sort")," your data. Druid uses the primary timestamp to rapidly identify and retrieve data within the time range of queries. Druid also uses the primary timestamp column\nfor time-based ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/data-management/"},"data management operations")," such as dropping time chunks, overwriting time chunks, and time-based retention rules."),(0,i.kt)("p",null,"Druid parses the primary timestamp based on the ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/ingestion-spec#timestampspec"},(0,i.kt)("inlineCode",{parentName:"a"},"timestampSpec"))," configuration at ingestion time. Regardless of the source field for the primary timestamp, Druid always stores the timestamp in the ",(0,i.kt)("inlineCode",{parentName:"p"},"__time")," column in your Druid datasource."),(0,i.kt)("p",null,"You can control other important operations that are based on the primary timestamp in the\n",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/ingestion-spec#granularityspec"},(0,i.kt)("inlineCode",{parentName:"a"},"granularitySpec")),". If you have more than one timestamp column, you can store the others as\n",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/schema-design#secondary-timestamps"},"secondary timestamps"),"."),(0,i.kt)("h2",{id:"dimensions"},"Dimensions"),(0,i.kt)("p",null,'Dimensions are columns that Druid stores "as-is". You can use dimensions for any purpose. For example, you can group, filter, or apply aggregators to dimensions at query time when necessary.'),(0,i.kt)("p",null,"If you disable ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/rollup"},"rollup"),", then Druid treats the set of\ndimensions like a set of columns to ingest. The dimensions behave exactly as you would expect from any database that does not support a rollup feature."),(0,i.kt)("p",null,"At ingestion time, you configure dimensions in the ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/ingestion-spec#dimensionsspec"},(0,i.kt)("inlineCode",{parentName:"a"},"dimensionsSpec")),"."),(0,i.kt)("h2",{id:"metrics"},"Metrics"),(0,i.kt)("p",null,"Metrics are columns that Druid stores in an aggregated form. Metrics are most useful when you enable ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/rollup"},"rollup"),". If you specify a metric, you can apply an aggregation function to each row during ingestion. This\nhas the following benefits:"),(0,i.kt)("p",null,'Rollup is a form of aggregation that collapses dimensions while aggregating the values in the metrics, that is, it collapses rows but retains its summary information."'),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/rollup"},"Rollup")," is a form of aggregation that combines multiple rows with the same timestamp value and dimension values. For example, the ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/tutorials/tutorial-rollup"},"rollup tutorial")," demonstrates using rollup to collapse netflow data to a single row per ",(0,i.kt)("inlineCode",{parentName:"p"},"(minute, srcIP, dstIP)")," tuple, while retaining aggregate information about total packet and byte counts.")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},"Druid can compute some aggregators, especially approximate ones, more quickly at query time if they are partially computed at ingestion time, including data that has not been rolled up."),(0,i.kt)("p",{parentName:"li"},"At ingestion time, you configure Metrics in the ",(0,i.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/ingestion-spec#metricsspec"},(0,i.kt)("inlineCode",{parentName:"a"},"metricsSpec")),"."))))}h.isMDXComponent=!0}}]);