blob: 65474d8dfb46da26c875bf315ba9b03f56b314a6 [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[5752],{15680:(e,t,a)=>{a.d(t,{xA:()=>m,yg:()=>y});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?i(Object(a),!0).forEach((function(t){r(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):i(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function s(e,t){if(null==e)return{};var a,n,r=function(e,t){if(null==e)return{};var a,n,r={},i=Object.keys(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n<i.length;n++)a=i[n],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var o=n.createContext({}),d=function(e){var t=n.useContext(o),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},m=function(e){var t=d(e.components);return n.createElement(o.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},g=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,o=e.parentName,m=s(e,["components","mdxType","originalType","parentName"]),p=d(a),g=r,y=p["".concat(o,".").concat(g)]||p[g]||u[g]||i;return a?n.createElement(y,l(l({ref:t},m),{},{components:a})):n.createElement(y,l({ref:t},m))}));function y(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=a.length,l=new Array(i);l[0]=g;var s={};for(var o in t)hasOwnProperty.call(t,o)&&(s[o]=t[o]);s.originalType=e,s[p]="string"==typeof e?e:r,l[1]=s;for(var d=2;d<i;d++)l[d]=a[d];return n.createElement.apply(null,l)}return n.createElement.apply(null,a)}g.displayName="MDXCreateElement"},29272:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>m,contentTitle:()=>o,default:()=>y,frontMatter:()=>s,metadata:()=>d,toc:()=>p});var n=a(58168),r=a(98587),i=(a(96540),a(15680)),l=["components"],s={id:"delete",title:"Data deletion"},o=void 0,d={unversionedId:"data-management/delete",id:"data-management/delete",title:"Data deletion",description:"\x3c!--",source:"@site/docs/latest/data-management/delete.md",sourceDirName:"data-management",slug:"/data-management/delete",permalink:"/docs/latest/data-management/delete",draft:!1,tags:[],version:"current",frontMatter:{id:"delete",title:"Data deletion"},sidebar:"docs",previous:{title:"Data updates",permalink:"/docs/latest/data-management/update"},next:{title:"Schema changes",permalink:"/docs/latest/data-management/schema-changes"}},m={},p=[{value:"By time range, manually",id:"by-time-range-manually",level:2},{value:"By time range, automatically",id:"by-time-range-automatically",level:2},{value:"Specific records",id:"specific-records",level:2},{value:"Entire table",id:"entire-table",level:2},{value:"Permanently (<code>kill</code> task)",id:"permanently-kill-task",level:2}],u={toc:p},g="wrapper";function y(e){var t=e.components,a=(0,r.A)(e,l);return(0,i.yg)(g,(0,n.A)({},u,a,{components:t,mdxType:"MDXLayout"}),(0,i.yg)("h2",{id:"by-time-range-manually"},"By time range, manually"),(0,i.yg)("p",null,"Apache Druid stores data ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/design/storage"},"partitioned by time chunk")," and supports\ndeleting data for time chunks by dropping segments. This is a fast, metadata-only operation."),(0,i.yg)("p",null,"Deletion by time range happens in two steps:"),(0,i.yg)("ol",null,(0,i.yg)("li",{parentName:"ol"},"Segments to be deleted must first be marked as ",(0,i.yg)("a",{parentName:"li",href:"/docs/latest/design/storage#segment-lifecycle"},'"unused"'),". This can\nhappen when a segment is dropped by a ",(0,i.yg)("a",{parentName:"li",href:"/docs/latest/operations/rule-configuration"},"drop rule")," or when you manually mark a\nsegment unused through the Coordinator API or web console. This is a soft delete: the data is not available for\nquerying, but the segment files remains in deep storage, and the segment records remains in the metadata store."),(0,i.yg)("li",{parentName:"ol"},'Once a segment is marked "unused", you can use a ',(0,i.yg)("a",{parentName:"li",href:"#kill-task"},(0,i.yg)("inlineCode",{parentName:"a"},"kill")," task")," to permanently delete the segment file from\ndeep storage and remove its record from the metadata store. This is a hard delete: the data is unrecoverable unless\nyou have a backup.")),(0,i.yg)("p",null,"For documentation on disabling segments using the Coordinator API, see the\n",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/api-reference/legacy-metadata-api#datasources"},"Legacy metadata API reference"),"."),(0,i.yg)("p",null,"A data deletion tutorial is available at ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/tutorials/tutorial-delete-data"},"Tutorial: Deleting data"),"."),(0,i.yg)("h2",{id:"by-time-range-automatically"},"By time range, automatically"),(0,i.yg)("p",null,"Druid supports ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/operations/rule-configuration"},"load and drop rules"),", which are used to define intervals of time\nwhere data should be preserved, and intervals where data should be discarded. Data that falls under a drop rule is\nmarked unused, in the same manner as if you ",(0,i.yg)("a",{parentName:"p",href:"#by-time-range-manually"},"manually mark that time range unused"),". This is a\nfast, metadata-only operation."),(0,i.yg)("p",null,"Data that is dropped in this way is marked unused, but remains in deep storage. To permanently delete it, use a\n",(0,i.yg)("a",{parentName:"p",href:"#kill-task"},(0,i.yg)("inlineCode",{parentName:"a"},"kill")," task"),"."),(0,i.yg)("h2",{id:"specific-records"},"Specific records"),(0,i.yg)("p",null,"Druid supports deleting specific records using ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/data-management/update#reindex"},"reindexing")," with a filter. The filter specifies which\ndata remains after reindexing, so it must be the inverse of the data you want to delete. Because segments must be\nrewritten to delete data in this way, it can be a time-consuming operation."),(0,i.yg)("p",null,"For example, to delete records where ",(0,i.yg)("inlineCode",{parentName:"p"},"userName")," is ",(0,i.yg)("inlineCode",{parentName:"p"},"'bob'")," with native batch indexing, use a\n",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/ingestion/ingestion-spec#transformspec"},(0,i.yg)("inlineCode",{parentName:"a"},"transformSpec"))," with filter ",(0,i.yg)("inlineCode",{parentName:"p"},'{"type": "not", "field": {"type":\n"selector", "dimension": "userName", "value": "bob"}}'),"."),(0,i.yg)("p",null,"To delete the same records using SQL, use ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/multi-stage-query/concepts#replace"},"REPLACE")," with ",(0,i.yg)("inlineCode",{parentName:"p"},"WHERE userName <> 'bob'"),"."),(0,i.yg)("p",null,"To reindex using ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/ingestion/native-batch"},"native batch"),", use the ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/ingestion/input-sources#druid-input-source"},(0,i.yg)("inlineCode",{parentName:"a"},"druid")," input\nsource"),". If needed,\n",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/ingestion/ingestion-spec#transformspec"},(0,i.yg)("inlineCode",{parentName:"a"},"transformSpec"))," can be used to filter or modify data during the\nreindexing job. To reindex with SQL, use ",(0,i.yg)("a",{parentName:"p",href:"/docs/latest/multi-stage-query/reference#replace"},(0,i.yg)("inlineCode",{parentName:"a"},"REPLACE <table> OVERWRITE")),"\nwith ",(0,i.yg)("inlineCode",{parentName:"p"},"SELECT ... FROM <table>"),". (Druid does not have ",(0,i.yg)("inlineCode",{parentName:"p"},"UPDATE")," or ",(0,i.yg)("inlineCode",{parentName:"p"},"ALTER TABLE")," statements.) Any SQL SELECT query can be\nused to filter, modify, or enrich the data during the reindexing job."),(0,i.yg)("p",null,"Data that is deleted in this way is marked unused, but remains in deep storage. To permanently delete it, use a ",(0,i.yg)("a",{parentName:"p",href:"#kill-task"},(0,i.yg)("inlineCode",{parentName:"a"},"kill"),"\ntask"),"."),(0,i.yg)("h2",{id:"entire-table"},"Entire table"),(0,i.yg)("p",null,"Deleting an entire table works the same way as ",(0,i.yg)("a",{parentName:"p",href:"#by-time-range-manually"},"deleting part of a table by time range"),". First,\nmark all segments unused using the Coordinator API or web console. Then, optionally, delete it permanently using a\n",(0,i.yg)("a",{parentName:"p",href:"#kill-task"},(0,i.yg)("inlineCode",{parentName:"a"},"kill")," task"),"."),(0,i.yg)("a",{name:"kill-task"}),(0,i.yg)("h2",{id:"permanently-kill-task"},"Permanently (",(0,i.yg)("inlineCode",{parentName:"h2"},"kill")," task)"),(0,i.yg)("p",null,"Data that has been overwritten or soft-deleted still remains as segments that have been marked unused. You can use a\n",(0,i.yg)("inlineCode",{parentName:"p"},"kill")," task to permanently delete this data."),(0,i.yg)("p",null,"The available grammar is:"),(0,i.yg)("pre",null,(0,i.yg)("code",{parentName:"pre",className:"language-json"},'{\n "type": "kill",\n "id": <task_id>,\n "dataSource": <task_datasource>,\n "interval" : <all_unused_segments_in_this_interval_will_die!>,\n "context": <task_context>,\n "batchSize": <optional_batch_size>,\n "limit": <optional_maximum_number_of_segments_to_delete>,\n "maxUsedStatusLastUpdatedTime": <optional_maximum_timestamp_when_segments_were_marked_as_unused>\n}\n')),(0,i.yg)("p",null,"Some of the parameters used in the task payload are further explained below:"),(0,i.yg)("table",null,(0,i.yg)("thead",{parentName:"table"},(0,i.yg)("tr",{parentName:"thead"},(0,i.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,i.yg)("th",{parentName:"tr",align:null},"Default"),(0,i.yg)("th",{parentName:"tr",align:null},"Explanation"))),(0,i.yg)("tbody",{parentName:"table"},(0,i.yg)("tr",{parentName:"tbody"},(0,i.yg)("td",{parentName:"tr",align:null},(0,i.yg)("inlineCode",{parentName:"td"},"batchSize")),(0,i.yg)("td",{parentName:"tr",align:null},"100"),(0,i.yg)("td",{parentName:"tr",align:null},"Maximum number of segments that are deleted in one kill batch. Some operations on the Overlord may get stuck while a ",(0,i.yg)("inlineCode",{parentName:"td"},"kill")," task is in progress due to concurrency constraints (such as in ",(0,i.yg)("inlineCode",{parentName:"td"},"TaskLockbox"),"). Thus, a ",(0,i.yg)("inlineCode",{parentName:"td"},"kill")," task splits the list of unused segments to be deleted into smaller batches to yield the Overlord resources intermittently to other task operations.")),(0,i.yg)("tr",{parentName:"tbody"},(0,i.yg)("td",{parentName:"tr",align:null},(0,i.yg)("inlineCode",{parentName:"td"},"limit")),(0,i.yg)("td",{parentName:"tr",align:null},"null (no limit)"),(0,i.yg)("td",{parentName:"tr",align:null},"Maximum number of segments for the kill task to delete.")),(0,i.yg)("tr",{parentName:"tbody"},(0,i.yg)("td",{parentName:"tr",align:null},(0,i.yg)("inlineCode",{parentName:"td"},"maxUsedStatusLastUpdatedTime")),(0,i.yg)("td",{parentName:"tr",align:null},"null (no cutoff)"),(0,i.yg)("td",{parentName:"tr",align:null},"Maximum timestamp used as a cutoff to include unused segments. The kill task only considers segments which lie in the specified ",(0,i.yg)("inlineCode",{parentName:"td"},"interval")," and were marked as unused no later than this time. The default behavior is to kill all unused segments in the ",(0,i.yg)("inlineCode",{parentName:"td"},"interval")," regardless of when they where marked as unused.")))),(0,i.yg)("p",null,(0,i.yg)("strong",{parentName:"p"},"WARNING:")," The ",(0,i.yg)("inlineCode",{parentName:"p"},"kill")," task permanently removes all information about the affected segments from the metadata store and\ndeep storage. This operation cannot be undone."))}y.isMDXComponent=!0}}]);