| "use strict";(self.webpackChunk=self.webpackChunk||[]).push([[4343],{7228:(e,t,n)=>{n.d(t,{A:()=>i});const i=n.p+"assets/images/tutorial-retention-03-4082b62d45d1ffdf102f31209ab141f2.png"},23242:(e,t,n)=>{n.d(t,{A:()=>i});const i=n.p+"assets/images/tutorial-retention-05-54500f6be8508fb4ddf4468830bd94cd.png"},28453:(e,t,n)=>{n.d(t,{R:()=>a,x:()=>o});var i=n(96540);const r={},s=i.createContext(r);function a(e){const t=i.useContext(s);return i.useMemo((function(){return"function"==typeof e?e(t):{...t,...e}}),[t,e])}function o(e){let t;return t=e.disableParentContext?"function"==typeof e.components?e.components(r):e.components||r:a(e.components),i.createElement(s.Provider,{value:t},e.children)}},36110:(e,t,n)=>{n.d(t,{A:()=>i});const i=n.p+"assets/images/tutorial-retention-01-09f120b6b0b2bc901f08801e508e778c.png"},53825:(e,t,n)=>{n.d(t,{A:()=>i});const i=n.p+"assets/images/tutorial-retention-06-8c3d36ef058fbe029fe3ef423d2341de.png"},77513:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>l,contentTitle:()=>o,default:()=>h,frontMatter:()=>a,metadata:()=>i,toc:()=>d});const i=JSON.parse('{"id":"tutorials/tutorial-retention","title":"Configure data retention","description":"\x3c!--","source":"@site/docs/latest/tutorials/tutorial-retention.md","sourceDirName":"tutorials","slug":"/tutorials/tutorial-retention","permalink":"/docs/latest/tutorials/tutorial-retention","draft":false,"unlisted":false,"tags":[],"version":"current","frontMatter":{"id":"tutorial-retention","title":"Configure data retention","sidebar_label":"Configure data retention"},"sidebar":"docs","previous":{"title":"Convert ingestion spec to SQL","permalink":"/docs/latest/tutorials/tutorial-msq-convert-spec"},"next":{"title":"Append data","permalink":"/docs/latest/tutorials/tutorial-append-data"}}');var r=n(74848),s=n(28453);const a={id:"tutorial-retention",title:"Configure data retention",sidebar_label:"Configure data retention"},o=void 0,l={},d=[{value:"Load the example data",id:"load-the-example-data",level:2},{value:"Set retention rules",id:"set-retention-rules",level:2},{value:"Further reading",id:"further-reading",level:2}];function c(e){const t={a:"a",code:"code",h2:"h2",img:"img",li:"li",ol:"ol",p:"p",pre:"pre",ul:"ul",...(0,s.R)(),...e.components};return(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(t.p,{children:"This tutorial demonstrates how to configure retention rules on a datasource to set the time intervals of data that will be retained or dropped."}),"\n",(0,r.jsxs)(t.p,{children:["For this tutorial, we'll assume you've already downloaded Apache Druid as described in\nthe ",(0,r.jsx)(t.a,{href:"/docs/latest/tutorials/",children:"single-machine quickstart"})," and have it running on your local machine."]}),"\n",(0,r.jsxs)(t.p,{children:["It will also be helpful to have finished ",(0,r.jsx)(t.a,{href:"/docs/latest/tutorials/tutorial-batch",children:"Load a file"})," and ",(0,r.jsx)(t.a,{href:"/docs/latest/tutorials/tutorial-query",children:"Query data"})," tutorials."]}),"\n",(0,r.jsx)(t.h2,{id:"load-the-example-data",children:"Load the example data"}),"\n",(0,r.jsx)(t.p,{children:"For this tutorial, we'll be using the Wikipedia edits sample data, with an ingestion task spec that will create a separate segment for each hour in the input data."}),"\n",(0,r.jsxs)(t.p,{children:["The ingestion spec can be found at ",(0,r.jsx)(t.code,{children:"quickstart/tutorial/retention-index.json"}),". Let's submit that spec, which will create a datasource called ",(0,r.jsx)(t.code,{children:"retention-tutorial"}),":"]}),"\n",(0,r.jsx)(t.pre,{children:(0,r.jsx)(t.code,{className:"language-bash",children:"bin/post-index-task --file quickstart/tutorial/retention-index.json --url http://localhost:8081\n"})}),"\n",(0,r.jsxs)(t.p,{children:["After the ingestion completes, go to ",(0,r.jsx)(t.a,{href:"http://localhost:8888/unified-console.html#datasources",children:"http://localhost:8888/unified-console.html#datasources"})," in a browser to access the web console's datasource view."]}),"\n",(0,r.jsx)(t.p,{children:"This view shows the available datasources and a summary of the retention rules for each datasource:"}),"\n",(0,r.jsx)(t.p,{children:(0,r.jsx)(t.img,{alt:"Summary",src:n(36110).A+"",title:"Summary",width:"1136",height:"218"})}),"\n",(0,r.jsxs)(t.p,{children:["Currently there are no rules set for the ",(0,r.jsx)(t.code,{children:"retention-tutorial"})," datasource. Note that there are default rules for the cluster: load forever with 2 replicas in ",(0,r.jsx)(t.code,{children:"_default_tier"}),"."]}),"\n",(0,r.jsx)(t.p,{children:"This means that all data will be loaded regardless of timestamp, and each segment will be replicated to two Historical processes in the default tier."}),"\n",(0,r.jsx)(t.p,{children:"In this tutorial, we will ignore the tiering and redundancy concepts for now."}),"\n",(0,r.jsxs)(t.p,{children:["Let's view the segments for the ",(0,r.jsx)(t.code,{children:"retention-tutorial"}),' datasource by clicking the "24 Segments" link next to "Fully Available".']}),"\n",(0,r.jsxs)(t.p,{children:["The segments view (",(0,r.jsx)(t.a,{href:"http://localhost:8888/unified-console.html#segments",children:"http://localhost:8888/unified-console.html#segments"}),") provides information about what segments a datasource contains. The page shows that there are 24 segments, each one containing data for a specific hour of 2015-09-12:"]}),"\n",(0,r.jsx)(t.p,{children:(0,r.jsx)(t.img,{alt:"Original segments",src:n(90117).A+"",title:"Original segments",width:"1971",height:"944"})}),"\n",(0,r.jsx)(t.h2,{id:"set-retention-rules",children:"Set retention rules"}),"\n",(0,r.jsx)(t.p,{children:"Suppose we want to drop data for the first 12 hours of 2015-09-12 and keep data for the later 12 hours of 2015-09-12."}),"\n",(0,r.jsxs)(t.p,{children:["Go to the ",(0,r.jsx)(t.a,{href:"http://localhost:8888/unified-console.html#datasources",children:"datasources view"})," and click the blue pencil icon next to ",(0,r.jsx)(t.code,{children:"Cluster default: loadForever"})," for the ",(0,r.jsx)(t.code,{children:"retention-tutorial"})," datasource."]}),"\n",(0,r.jsx)(t.p,{children:"A rule configuration window will appear:"}),"\n",(0,r.jsx)(t.p,{children:(0,r.jsx)(t.img,{alt:"Rule configuration",src:n(7228).A+"",title:"Rule configuration",width:"792",height:"307"})}),"\n",(0,r.jsxs)(t.p,{children:["Now click the ",(0,r.jsx)(t.code,{children:"+ New rule"})," button twice."]}),"\n",(0,r.jsxs)(t.p,{children:["In the upper rule box, select ",(0,r.jsx)(t.code,{children:"Load"})," and ",(0,r.jsx)(t.code,{children:"by interval"}),", and then enter ",(0,r.jsx)(t.code,{children:"2015-09-12T12:00:00.000Z/2015-09-13T00:00:00.000Z"})," in field next to ",(0,r.jsx)(t.code,{children:"by interval"}),". Replicas can remain at 2 in the ",(0,r.jsx)(t.code,{children:"_default_tier"}),"."]}),"\n",(0,r.jsxs)(t.p,{children:["In the lower rule box, select ",(0,r.jsx)(t.code,{children:"Drop"})," and ",(0,r.jsx)(t.code,{children:"forever"}),"."]}),"\n",(0,r.jsx)(t.p,{children:"The rules should look like this:"}),"\n",(0,r.jsx)(t.p,{children:(0,r.jsx)(t.img,{alt:"Set rules",src:n(78451).A+"",title:"Set rules",width:"787",height:"560"})}),"\n",(0,r.jsxs)(t.p,{children:["Now click ",(0,r.jsx)(t.code,{children:"Next"}),". The rule configuration process will ask for a user name and comment, for change logging purposes. You can enter ",(0,r.jsx)(t.code,{children:"tutorial"})," for both."]}),"\n",(0,r.jsxs)(t.p,{children:["Now click ",(0,r.jsx)(t.code,{children:"Save"}),". You can see the new rules in the datasources view:"]}),"\n",(0,r.jsx)(t.p,{children:(0,r.jsx)(t.img,{alt:"New rules",src:n(23242).A+"",title:"New rules",width:"1968",height:"218"})}),"\n",(0,r.jsxs)(t.p,{children:["Give the cluster a few minutes to apply the rule change, and go to the ",(0,r.jsx)(t.a,{href:"http://localhost:8888/unified-console.html#segments",children:"segments view"})," in the web console.\nThe segments for the first 12 hours of 2015-09-12 are now gone:"]}),"\n",(0,r.jsx)(t.p,{children:(0,r.jsx)(t.img,{alt:"New segments",src:n(53825).A+"",title:"New segments",width:"1973",height:"567"})}),"\n",(0,r.jsx)(t.p,{children:"The resulting retention rule chain is the following:"}),"\n",(0,r.jsxs)(t.ol,{children:["\n",(0,r.jsxs)(t.li,{children:["\n",(0,r.jsx)(t.p,{children:"loadByInterval 2015-09-12T12/2015-09-13 (12 hours)"}),"\n"]}),"\n",(0,r.jsxs)(t.li,{children:["\n",(0,r.jsx)(t.p,{children:"dropForever"}),"\n"]}),"\n",(0,r.jsxs)(t.li,{children:["\n",(0,r.jsx)(t.p,{children:"loadForever (default rule)"}),"\n"]}),"\n"]}),"\n",(0,r.jsx)(t.p,{children:"The rule chain is evaluated from top to bottom, with the default rule chain always added at the bottom."}),"\n",(0,r.jsx)(t.p,{children:"The tutorial rule chain we just created loads data if it is within the specified 12 hour interval."}),"\n",(0,r.jsxs)(t.p,{children:["If data is not within the 12 hour interval, the rule chain evaluates ",(0,r.jsx)(t.code,{children:"dropForever"})," next, which will drop any data."]}),"\n",(0,r.jsxs)(t.p,{children:["The ",(0,r.jsx)(t.code,{children:"dropForever"})," terminates the rule chain, effectively overriding the default ",(0,r.jsx)(t.code,{children:"loadForever"})," rule, which will never be reached in this rule chain."]}),"\n",(0,r.jsx)(t.p,{children:"Note that in this tutorial we defined a load rule on a specific interval."}),"\n",(0,r.jsx)(t.p,{children:"If instead you want to retain data based on how old it is (e.g., retain data that ranges from 3 months in the past to the present time), you would define a Period load rule instead."}),"\n",(0,r.jsx)(t.h2,{id:"further-reading",children:"Further reading"}),"\n",(0,r.jsxs)(t.ul,{children:["\n",(0,r.jsx)(t.li,{children:(0,r.jsx)(t.a,{href:"/docs/latest/operations/rule-configuration",children:"Load rules"})}),"\n"]})]})}function h(e={}){const{wrapper:t}={...(0,s.R)(),...e.components};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(c,{...e})}):c(e)}},78451:(e,t,n)=>{n.d(t,{A:()=>i});const i=n.p+"assets/images/tutorial-retention-04-b0841ac2bfbb857680b28192306b1931.png"},90117:(e,t,n)=>{n.d(t,{A:()=>i});const i=n.p+"assets/images/tutorial-retention-02-c388e3c07c39dc0f5ff33b78b68db565.png"}}]); |