blob: 85bb35b66a4acc81161bcb396405d27253b3d427 [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[9564],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>y});var r=n(96540);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{};t%2?o(Object(n),!0).forEach((function(t){a(e,t,n[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):o(Object(n)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(n,t))}))}return e}function l(e,t){if(null==e)return{};var n,r,a=function(e,t){if(null==e)return{};var n,r,a={},o=Object.keys(e);for(r=0;r<o.length;r++)n=o[r],t.indexOf(n)>=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r<o.length;r++)n=o[r],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),s=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=s(e.components);return r.createElement(p.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),d=s(n),m=a,y=d["".concat(p,".").concat(m)]||d[m]||c[m]||o;return n?r.createElement(y,i(i({ref:t},u),{},{components:n})):r.createElement(y,i({ref:t},u))}));function y(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:a,i[1]=l;for(var s=2;s<o;s++)i[s]=n[s];return r.createElement.apply(null,i)}return r.createElement.apply(null,n)}m.displayName="MDXCreateElement"},3764:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>u,contentTitle:()=>p,default:()=>y,frontMatter:()=>l,metadata:()=>s,toc:()=>d});var r=n(58168),a=n(98587),o=(n(96540),n(15680)),i=["components"],l={id:"thrift",title:"Thrift"},p=void 0,s={unversionedId:"development/extensions-contrib/thrift",id:"development/extensions-contrib/thrift",title:"Thrift",description:"\x3c!--",source:"@site/docs/29.0.0/development/extensions-contrib/thrift.md",sourceDirName:"development/extensions-contrib",slug:"/development/extensions-contrib/thrift",permalink:"/docs/29.0.0/development/extensions-contrib/thrift",draft:!1,tags:[],version:"current",frontMatter:{id:"thrift",title:"Thrift"}},u={},d=[{value:"LZO Support",id:"lzo-support",level:2},{value:"Thrift Parser",id:"thrift-parser",level:2}],c={toc:d},m="wrapper";function y(e){var t=e.components,n=(0,a.A)(e,i);return(0,o.yg)(m,(0,r.A)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,o.yg)("p",null,"To use this Apache Druid extension, ",(0,o.yg)("a",{parentName:"p",href:"/docs/29.0.0/configuration/extensions#loading-extensions"},"include")," ",(0,o.yg)("inlineCode",{parentName:"p"},"druid-thrift-extensions")," in the extensions load list."),(0,o.yg)("p",null,"This extension enables Druid to ingest thrift compact data online (",(0,o.yg)("inlineCode",{parentName:"p"},"ByteBuffer"),") and offline (SequenceFile of type ",(0,o.yg)("inlineCode",{parentName:"p"},"<Writable, BytesWritable>")," or LzoThriftBlock File)."),(0,o.yg)("p",null,"You may want to use another version of thrift, change the dependency in pom and compile yourself."),(0,o.yg)("h2",{id:"lzo-support"},"LZO Support"),(0,o.yg)("p",null,"If you plan to read LZO-compressed Thrift files, you will need to download version 0.4.19 of the ",(0,o.yg)("a",{parentName:"p",href:"https://mvnrepository.com/artifact/com.hadoop.gplcompression/hadoop-lzo/0.4.19"},"hadoop-lzo JAR")," and place it in your ",(0,o.yg)("inlineCode",{parentName:"p"},"extensions/druid-thrift-extensions")," directory."),(0,o.yg)("h2",{id:"thrift-parser"},"Thrift Parser"),(0,o.yg)("table",null,(0,o.yg)("thead",{parentName:"table"},(0,o.yg)("tr",{parentName:"thead"},(0,o.yg)("th",{parentName:"tr",align:null},"Field"),(0,o.yg)("th",{parentName:"tr",align:null},"Type"),(0,o.yg)("th",{parentName:"tr",align:null},"Description"),(0,o.yg)("th",{parentName:"tr",align:null},"Required"))),(0,o.yg)("tbody",{parentName:"table"},(0,o.yg)("tr",{parentName:"tbody"},(0,o.yg)("td",{parentName:"tr",align:null},"type"),(0,o.yg)("td",{parentName:"tr",align:null},"String"),(0,o.yg)("td",{parentName:"tr",align:null},"This should say ",(0,o.yg)("inlineCode",{parentName:"td"},"thrift")),(0,o.yg)("td",{parentName:"tr",align:null},"yes")),(0,o.yg)("tr",{parentName:"tbody"},(0,o.yg)("td",{parentName:"tr",align:null},"parseSpec"),(0,o.yg)("td",{parentName:"tr",align:null},"JSON Object"),(0,o.yg)("td",{parentName:"tr",align:null},"Specifies the timestamp and dimensions of the data. Should be a JSON parseSpec."),(0,o.yg)("td",{parentName:"tr",align:null},"yes")),(0,o.yg)("tr",{parentName:"tbody"},(0,o.yg)("td",{parentName:"tr",align:null},"thriftJar"),(0,o.yg)("td",{parentName:"tr",align:null},"String"),(0,o.yg)("td",{parentName:"tr",align:null},"path of thrift jar, if not provided, it will try to find the thrift class in classpath. Thrift jar in batch ingestion should be uploaded to HDFS first and configure ",(0,o.yg)("inlineCode",{parentName:"td"},"jobProperties")," with ",(0,o.yg)("inlineCode",{parentName:"td"},'"tmpjars":"/path/to/your/thrift.jar"')),(0,o.yg)("td",{parentName:"tr",align:null},"no")),(0,o.yg)("tr",{parentName:"tbody"},(0,o.yg)("td",{parentName:"tr",align:null},"thriftClass"),(0,o.yg)("td",{parentName:"tr",align:null},"String"),(0,o.yg)("td",{parentName:"tr",align:null},"classname of thrift"),(0,o.yg)("td",{parentName:"tr",align:null},"yes")))),(0,o.yg)("ul",null,(0,o.yg)("li",{parentName:"ul"},"Batch Ingestion example - ",(0,o.yg)("inlineCode",{parentName:"li"},"inputFormat")," and ",(0,o.yg)("inlineCode",{parentName:"li"},"tmpjars")," should be set.")),(0,o.yg)("p",null,"This is for batch ingestion using the HadoopDruidIndexer. The inputFormat of inputSpec in ioConfig could be one of ",(0,o.yg)("inlineCode",{parentName:"p"},'"org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"')," and ",(0,o.yg)("inlineCode",{parentName:"p"},"com.twitter.elephantbird.mapreduce.input.LzoThriftBlockInputFormat"),". Be careful, when ",(0,o.yg)("inlineCode",{parentName:"p"},"LzoThriftBlockInputFormat")," is used, thrift class must be provided twice."),(0,o.yg)("pre",null,(0,o.yg)("code",{parentName:"pre",className:"language-json"},'{\n "type": "index_hadoop",\n "spec": {\n "dataSchema": {\n "dataSource": "book",\n "parser": {\n "type": "thrift",\n "jarPath": "book.jar",\n "thriftClass": "org.apache.druid.data.input.thrift.Book",\n "protocol": "compact",\n "parseSpec": {\n "format": "json",\n ...\n }\n },\n "metricsSpec": [],\n "granularitySpec": {}\n },\n "ioConfig": {\n "type": "hadoop",\n "inputSpec": {\n "type": "static",\n "inputFormat": "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",\n // "inputFormat": "com.twitter.elephantbird.mapreduce.input.LzoThriftBlockInputFormat",\n "paths": "/user/to/some/book.seq"\n }\n },\n "tuningConfig": {\n "type": "hadoop",\n "jobProperties": {\n "tmpjars":"/user/h_user_profile/du00/druid/test/book.jar",\n // "elephantbird.class.for.MultiInputFormat" : "${YOUR_THRIFT_CLASS_NAME}"\n }\n }\n }\n}\n')))}y.isMDXComponent=!0}}]);