"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[8289],{10528:(e,r,t)=>{t.r(r),t.d(r,{assets:()=>d,contentTitle:()=>u,default:()=>p,frontMatter:()=>l,metadata:()=>n,toc:()=>c});const n=JSON.parse('{"id":"tutorials/tutorial-extern","title":"Export query results","description":"How to use EXTERN to export query results.","source":"@site/docs/33.0.0/tutorials/tutorial-extern.md","sourceDirName":"tutorials","slug":"/tutorials/tutorial-extern","permalink":"/docs/33.0.0/tutorials/tutorial-extern","draft":false,"unlisted":false,"tags":[],"version":"current","frontMatter":{"id":"tutorial-extern","title":"Export query results","sidebar_label":"Export results","description":"How to use EXTERN to export query results."},"sidebar":"docs","previous":{"title":"Query for latest data","permalink":"/docs/33.0.0/tutorials/tutorial-latest-by"},"next":{"title":"Theta sketches tutorial","permalink":"/docs/33.0.0/tutorials/tutorial-sketches-theta"}}');var o=t(74848),s=t(28453),a=t(65537),i=t(79329);const l={id:"tutorial-extern",title:"Export query results",sidebar_label:"Export results",description:"How to use EXTERN to export query results."},u=void 0,d={},c=[{value:"Prerequisites",id:"prerequisites",level:2},{value:"Export query results to the local file system",id:"export-query-results-to-the-local-file-system",level:2},{value:"Configure Druid local export directory",id:"configure-druid-local-export-directory",level:3},{value:"Start Druid and load sample data",id:"start-druid-and-load-sample-data",level:3},{value:"Query to export data",id:"query-to-export-data",level:3},{value:"Export query results to cloud storage",id:"export-query-results-to-cloud-storage",level:2},{value:"Learn more",id:"learn-more",level:2}];function h(e){const r={a:"a",code:"code",h2:"h2",h3:"h3",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,s.R)(),...e.components};return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsxs)(r.p,{children:["This tutorial demonstrates how to use the Apache Druid\xae SQL ",(0,o.jsx)(r.a,{href:"/docs/33.0.0/multi-stage-query/reference#extern-function",children:"EXTERN"})," function to export data."]}),"\n",(0,o.jsx)(r.h2,{id:"prerequisites",children:"Prerequisites"}),"\n",(0,o.jsxs)(r.p,{children:["Before you follow the steps in this tutorial, download Druid as described in the ",(0,o.jsx)(r.a,{href:"/docs/33.0.0/tutorials/",children:"Local quickstart"}),".\nDon't start Druid, you'll do that as part of the tutorial."]}),"\n",(0,o.jsxs)(r.p,{children:["You should be familiar with ingesting and querying data in Druid.\nIf you haven't already, go through the ",(0,o.jsx)(r.a,{href:"/docs/33.0.0/tutorials/tutorial-query",children:"Query data"})," tutorial first."]}),"\n",(0,o.jsx)(r.h2,{id:"export-query-results-to-the-local-file-system",children:"Export query results to the local file system"}),"\n",(0,o.jsx)(r.p,{children:"This example demonstrates how to configure Druid to export data to the local file system.\nWhile you can use this approach to learn about EXTERN syntax for exporting data, it's not suitable for production scenarios."}),"\n",(0,o.jsx)(r.h3,{id:"configure-druid-local-export-directory",children:"Configure Druid local export directory"}),"\n",(0,o.jsxs)(r.p,{children:["The following commands set the base path for the Druid exports to ",(0,o.jsx)(r.code,{children:"/tmp/druid/"}),".\nIf the account running Druid doesn't have access to ",(0,o.jsx)(r.code,{children:"/tmp/druid/"}),", change the path.\nFor example: ",(0,o.jsx)(r.code,{children:"/Users/Example/druid"}),".\nIf you change the path in this step, use the updated path in all subsequent steps."]}),"\n",(0,o.jsx)(r.p,{children:"From the root of the Druid distribution, run the following:"}),"\n",(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{className:"language-bash",children:"export export_path=\"/tmp/druid\"\nsed -i -e $'$a\\\\\\n\\\\\\n\\\\\\n#\\\\\\n###Local export\\\\\\n#\\\\\\ndruid.export.storage.baseDir='$export_path' conf/druid/auto/_common/common.runtime.properties\n"})}),"\n",(0,o.jsxs)(r.p,{children:["This adds the following section to the Druid ",(0,o.jsx)(r.code,{children:"common.runtime.properties"})," configuration file located in ",(0,o.jsx)(r.code,{children:"conf/druid/auto/_common"}),":"]}),"\n",(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{children:"#\n###Local export\n#\ndruid.export.storage.baseDir=/tmp/druid/\n"})}),"\n",(0,o.jsx)(r.h3,{id:"start-druid-and-load-sample-data",children:"Start Druid and load sample data"}),"\n",(0,o.jsxs)(r.ol,{children:["\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsx)(r.p,{children:"From the root of the Druid distribution, launch Druid as follows:"}),"\n",(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{className:"language-bash",children:"./bin/start-druid\n"})}),"\n"]}),"\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsxs)(r.p,{children:["After Druid starts, open ",(0,o.jsx)(r.a,{href:"http://localhost:8888/",children:"http://localhost:8888/"})," in your browser to access the Web Console."]}),"\n"]}),"\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsxs)(r.p,{children:["From the ",(0,o.jsx)(r.a,{href:"http://localhost:8888/unified-console.html#workbench",children:"Query view"}),", run the following command to load the Wikipedia example data set:"]}),"\n",(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{className:"language-sql",children:'REPLACE INTO "wikipedia" OVERWRITE ALL\nWITH "ext" AS (\n SELECT *\n FROM TABLE(\n EXTERN(\n \'{"type":"http","uris":["https://druid.apache.org/data/wikipedia.json.gz"]}\',\n \'{"type":"json"}\'\n )\n ) EXTEND ("isRobot" VARCHAR, "channel" VARCHAR, "timestamp" VARCHAR, "flags" VARCHAR, "isUnpatrolled" VARCHAR, "page" VARCHAR, "diffUrl" VARCHAR, "added" BIGINT, "comment" VARCHAR, "commentLength" BIGINT, "isNew" VARCHAR, "isMinor" VARCHAR, "delta" BIGINT, "isAnonymous" VARCHAR, "user" VARCHAR, "deltaBucket" BIGINT, "deleted" BIGINT, "namespace" VARCHAR, "cityName" VARCHAR, "countryName" VARCHAR, "regionIsoCode" VARCHAR, "metroCode" BIGINT, "countryIsoCode" VARCHAR, "regionName" VARCHAR)\n)\nSELECT\n TIME_PARSE("timestamp") AS "__time",\n "isRobot",\n "channel",\n "flags",\n "isUnpatrolled",\n "page",\n "diffUrl",\n "added",\n "comment",\n "commentLength",\n "isNew",\n "isMinor",\n "delta",\n "isAnonymous",\n "user",\n "deltaBucket",\n "deleted",\n "namespace",\n "cityName",\n "countryName",\n "regionIsoCode",\n "metroCode",\n "countryIsoCode",\n "regionName"\nFROM "ext"\nPARTITIONED BY DAY\n'})}),"\n"]}),"\n"]}),"\n",(0,o.jsx)(r.h3,{id:"query-to-export-data",children:"Query to export data"}),"\n",(0,o.jsxs)(r.p,{children:["Open a new tab and run the following query to export query results to the path:\n",(0,o.jsx)(r.code,{children:"/tmp/druid/wiki_example"}),".\nThe path must be a subdirectory of the ",(0,o.jsx)(r.code,{children:"druid.export.storage.baseDir"}),"."]}),"\n",(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{className:"language-sql",children:'INSERT INTO\n EXTERN(\n local(exportPath => \'/tmp/druid/wiki_example\')\n )\nAS CSV\nSELECT "channel",\n SUM("delta") AS "changes"\nFROM "wikipedia"\nGROUP BY 1\nLIMIT 10\n'})}),"\n",(0,o.jsxs)(r.p,{children:["Druid exports the results of the query to the ",(0,o.jsx)(r.code,{children:"/tmp/druid/wiki_example"})," directory.\nRun the following command to list the contents of"]}),"\n",(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{className:"language-bash",children:"ls /tmp/druid/wiki_example\n"})}),"\n",(0,o.jsx)(r.p,{children:"The results are a CSV file export of the data and a directory."}),"\n",(0,o.jsx)(r.h2,{id:"export-query-results-to-cloud-storage",children:"Export query results to cloud storage"}),"\n",(0,o.jsx)(r.p,{children:"The steps to export to cloud storage are similar to exporting to the local file system.\nDruid supports Amazon S3 or Google Cloud Storage (GCS) as cloud storage destinations."}),"\n",(0,o.jsxs)(r.ol,{children:["\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsxs)(r.p,{children:["Enable the extension for your cloud storage destination. See ",(0,o.jsx)(r.a,{href:"/docs/33.0.0/configuration/extensions#loading-core-extensions",children:"Loading core extensions"}),"."]}),"\n",(0,o.jsxs)(r.ul,{children:["\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.strong,{children:"Amazon S3"}),": ",(0,o.jsx)(r.code,{children:"druid-s3-extensions"})]}),"\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.strong,{children:"GCS"}),": ",(0,o.jsx)(r.code,{children:"google-extensions"}),"\nSee ",(0,o.jsx)(r.a,{href:"/docs/33.0.0/configuration/extensions#loading-core-extensions",children:"Loading core extensions"})," for more information."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsxs)(r.p,{children:["Configure the additional properties for your cloud storage destination. Replace ",(0,o.jsx)(r.code,{children:"{CLOUD}"})," with ",(0,o.jsx)(r.code,{children:"s3"})," or ",(0,o.jsx)(r.code,{children:"google"})," accordingly:"]}),"\n",(0,o.jsxs)(r.ul,{children:["\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.code,{children:"druid.export.storage.{CLOUD}.tempLocalDir"}),": Local temporary directory where the query engine stages files to export."]}),"\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.code,{children:"druid.export.storage.{CLOUD}.allowedExportPaths"}),": S3 or GS prefixes allowed as Druid export locations. For example ",(0,o.jsx)(r.code,{children:'[\\"s3://bucket1/export/\\",\\"s3://bucket2/export/\\"]'})," or ",(0,o.jsx)(r.code,{children:'[\\"gs://bucket1/export/\\", \\"gs://bucket2/export/\\"]'}),"."]}),"\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.code,{children:"druid.export.storage.{CLOUD}.maxRetry"}),": Maximum number of times to attempt cloud API calls to avoid failures from transient errors."]}),"\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.code,{children:"druid.export.storage.s3.chunkSize"}),": Maximum size of individual data chunks to store in the temporary directory."]}),"\n"]}),"\n"]}),"\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsxs)(r.p,{children:["Verify the instance role has the correct permissions to the bucket and folders: read, write, create, and delete. See ",(0,o.jsx)(r.a,{href:"/docs/33.0.0/multi-stage-query/security#permissions-for-durable-storage",children:"Permissions for durable storage"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsx)(r.p,{children:"Use the query syntax for your cloud storage type. For example:"}),"\n",(0,o.jsxs)(a.A,{children:[(0,o.jsx)(i.A,{value:"1",label:"S3",children:(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{className:"language-sql",children:'INSERT INTO\nEXTERN(\n s3(bucket => \'your_bucket\', prefix => \'prefix/to/files\'))\nAS CSV\nSELECT "channel",\nSUM("delta") AS "changes"\nFROM "wikipedia"\nGROUP BY 1\nLIMIT 10\n'})})}),(0,o.jsx)(i.A,{value:"2",label:"GCS",children:(0,o.jsx)(r.pre,{children:(0,o.jsx)(r.code,{className:"language-sql",children:'INSERT INTO\nEXTERN\n google(bucket => \'your_bucket\', prefix => \'prefix/to/files\')\nAS CSV\nSELECT "channel",\nSUM("delta") AS "changes"\nFROM "wikipedia"\nGROUP BY 1\nLIMIT 10\n'})})})]}),"\n"]}),"\n",(0,o.jsxs)(r.li,{children:["\n",(0,o.jsxs)(r.p,{children:["When querying, use the ",(0,o.jsx)(r.code,{children:"rowsPerPage"})," query context parameter to restrict the output file size. While it's possible to add a very large LIMIT at the end of your query to force Druid to create a single file, we don't recommend this technique."]}),"\n"]}),"\n"]}),"\n",(0,o.jsx)(r.h2,{id:"learn-more",children:"Learn more"}),"\n",(0,o.jsx)(r.p,{children:"See the following topics for more information:"}),"\n",(0,o.jsxs)(r.ul,{children:["\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.a,{href:"/docs/33.0.0/multi-stage-query/reference#extern-to-export-to-a-destination",children:"Export to a destination"})," for a reference of the EXTERN."]}),"\n",(0,o.jsxs)(r.li,{children:[(0,o.jsx)(r.a,{href:"/docs/33.0.0/multi-stage-query/security#permissions-for-durable-storage",children:"SQL-based ingestion security"})," for cloud permission requirements for MSQ."]}),"\n"]})]})}function p(e={}){const{wrapper:r}={...(0,s.R)(),...e.components};return r?(0,o.jsx)(r,{...e,children:(0,o.jsx)(h,{...e})}):h(e)}},28453:(e,r,t)=>{t.d(r,{R:()=>a,x:()=>i});var n=t(96540);const o={},s=n.createContext(o);function a(e){const r=n.useContext(s);return n.useMemo((function(){return"function"==typeof e?e(r):{...r,...e}}),[r,e])}function i(e){let r;return r=e.disableParentContext?"function"==typeof e.components?e.components(o):e.components||o:a(e.components),n.createElement(s.Provider,{value:r},e.children)}},65537:(e,r,t)=>{t.d(r,{A:()=>A});var n=t(96540),o=t(18215),s=t(65627),a=t(56347),i=t(50372),l=t(30604),u=t(11861),d=t(78749);function c(e){var r,t;return null!=(r=null==(t=n.Children.toArray(e).filter((function(e){return"\n"!==e})).map((function(e){if(!e||(0,n.isValidElement)(e)&&((r=e.props)&&"object"==typeof r&&"value"in r))return e;var r;throw new Error("Docusaurus error: Bad <Tabs> child <"+("string"==typeof e.type?e.type:e.type.name)+'>: all children of the <Tabs> component should be <TabItem>, and every <TabItem> should have a unique "value" prop.')})))?void 0:t.filter(Boolean))?r:[]}function h(e){var r=e.values,t=e.children;return(0,n.useMemo)((function(){var e=null!=r?r:function(e){return c(e).map((function(e){var r=e.props;return{value:r.value,label:r.label,attributes:r.attributes,default:r.default}}))}(t);return function(e){var r=(0,u.XI)(e,(function(e,r){return e.value===r.value}));if(r.length>0)throw new Error('Docusaurus error: Duplicate values "'+r.map((function(e){return e.value})).join(", ")+'" found in <Tabs>. Every value needs to be unique.')}(e),e}),[r,t])}function p(e){var r=e.value;return e.tabValues.some((function(e){return e.value===r}))}function x(e){var r=e.queryString,t=void 0!==r&&r,o=e.groupId,s=(0,a.W6)(),i=function(e){var r=e.queryString,t=void 0!==r&&r,n=e.groupId;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!n)throw new Error('Docusaurus error: The <Tabs> component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return null!=n?n:null}({queryString:t,groupId:o});return[(0,l.aZ)(i),(0,n.useCallback)((function(e){if(i){var r=new URLSearchParams(s.location.search);r.set(i,e),s.replace(Object.assign({},s.location,{search:r.toString()}))}}),[i,s])]}function f(e){var r,t,o,s,a=e.defaultValue,l=e.queryString,u=void 0!==l&&l,c=e.groupId,f=h(e),m=(0,n.useState)((function(){return function(e){var r,t=e.defaultValue,n=e.tabValues;if(0===n.length)throw new Error("Docusaurus error: the <Tabs> component requires at least one <TabItem> children component");if(t){if(!p({value:t,tabValues:n}))throw new Error('Docusaurus error: The <Tabs> has a defaultValue "'+t+'" but none of its children has the corresponding value. Available values are: '+n.map((function(e){return e.value})).join(", ")+". If you intend to show no default tab, use defaultValue={null} instead.");return t}var o=null!=(r=n.find((function(e){return e.default})))?r:n[0];if(!o)throw new Error("Unexpected error: 0 tabValues");return o.value}({defaultValue:a,tabValues:f})})),g=m[0],j=m[1],b=x({queryString:u,groupId:c}),y=b[0],v=b[1],A=(r=function(e){return e?"docusaurus.tab."+e:null}({groupId:c}.groupId),t=(0,d.Dv)(r),o=t[0],s=t[1],[o,(0,n.useCallback)((function(e){r&&s.set(e)}),[r,s])]),R=A[0],E=A[1],T=function(){var e=null!=y?y:R;return p({value:e,tabValues:f})?e:null}();return(0,i.A)((function(){T&&j(T)}),[T]),{selectedValue:g,selectValue:(0,n.useCallback)((function(e){if(!p({value:e,tabValues:f}))throw new Error("Can't select invalid tab value="+e);j(e),v(e),E(e)}),[v,E,f]),tabValues:f}}var m=t(9136);const g={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};var j=t(74848);function b(e){var r=e.className,t=e.block,n=e.selectedValue,a=e.selectValue,i=e.tabValues,l=[],u=(0,s.a_)().blockElementScrollPositionUntilNextRender,d=function(e){var r=e.currentTarget,t=l.indexOf(r),o=i[t].value;o!==n&&(u(r),a(o))},c=function(e){var r,t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":var n,o=l.indexOf(e.currentTarget)+1;t=null!=(n=l[o])?n:l[0];break;case"ArrowLeft":var s,a=l.indexOf(e.currentTarget)-1;t=null!=(s=l[a])?s:l[l.length-1]}null==(r=t)||r.focus()};return(0,j.jsx)("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":t},r),children:i.map((function(e){var r=e.value,t=e.label,s=e.attributes;return(0,j.jsx)("li",Object.assign({role:"tab",tabIndex:n===r?0:-1,"aria-selected":n===r,ref:function(e){l.push(e)},onKeyDown:c,onClick:d},s,{className:(0,o.A)("tabs__item",g.tabItem,null==s?void 0:s.className,{"tabs__item--active":n===r}),children:null!=t?t:r}),r)}))})}function y(e){var r=e.lazy,t=e.children,s=e.selectedValue,a=(Array.isArray(t)?t:[t]).filter(Boolean);if(r){var i=a.find((function(e){return e.props.value===s}));return i?(0,n.cloneElement)(i,{className:(0,o.A)("margin-top--md",i.props.className)}):null}return(0,j.jsx)("div",{className:"margin-top--md",children:a.map((function(e,r){return(0,n.cloneElement)(e,{key:r,hidden:e.props.value!==s})}))})}function v(e){var r=f(e);return(0,j.jsxs)("div",{className:(0,o.A)("tabs-container",g.tabList),children:[(0,j.jsx)(b,Object.assign({},r,e)),(0,j.jsx)(y,Object.assign({},r,e))]})}function A(e){var r=(0,m.A)();return(0,j.jsx)(v,Object.assign({},e,{children:c(e.children)}),String(r))}},79329:(e,r,t)=>{t.d(r,{A:()=>a});t(96540);var n=t(18215);const o={tabItem:"tabItem_Ymn6"};var s=t(74848);function a(e){var r=e.children,t=e.hidden,a=e.className;return(0,s.jsx)("div",{role:"tabpanel",className:(0,n.A)(o.tabItem,a),hidden:t,children:r})}}}]); |