blob: a825199b338af7b0ebf6fe612ee77cc5819f7efd [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[389],{15680:(e,n,t)=>{t.d(n,{xA:()=>d,yg:()=>m});var r=t(96540);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);n&&(r=r.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,r)}return t}function i(e){for(var n=1;n<arguments.length;n++){var t=null!=arguments[n]?arguments[n]:{};n%2?o(Object(t),!0).forEach((function(n){a(e,n,t[n])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):o(Object(t)).forEach((function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(t,n))}))}return e}function s(e,n){if(null==e)return{};var t,r,a=function(e,n){if(null==e)return{};var t,r,a={},o=Object.keys(e);for(r=0;r<o.length;r++)t=o[r],n.indexOf(t)>=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r<o.length;r++)t=o[r],n.indexOf(t)>=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var l=r.createContext({}),p=function(e){var n=r.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=p(e.components);return r.createElement(l.Provider,{value:n},e.children)},y="mdxType",u={inlineCode:"code",wrapper:function(e){var n=e.children;return r.createElement(r.Fragment,{},n)}},c=r.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),y=p(t),c=a,m=y["".concat(l,".").concat(c)]||y[c]||u[c]||o;return t?r.createElement(m,i(i({ref:n},d),{},{components:t})):r.createElement(m,i({ref:n},d))}));function m(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,i=new Array(o);i[0]=c;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[y]="string"==typeof e?e:a,i[1]=s;for(var p=2;p<o;p++)i[p]=t[p];return r.createElement.apply(null,i)}return r.createElement.apply(null,t)}c.displayName="MDXCreateElement"},65665:(e,n,t)=>{t.r(n),t.d(n,{assets:()=>d,contentTitle:()=>l,default:()=>m,frontMatter:()=>s,metadata:()=>p,toc:()=>y});var r=t(58168),a=t(98587),o=(t(96540),t(15680)),i=["components"],s={id:"avro",title:"Apache Avro"},l=void 0,p={unversionedId:"development/extensions-core/avro",id:"development/extensions-core/avro",title:"Apache Avro",description:"\x3c!--",source:"@site/docs/29.0.0/development/extensions-core/avro.md",sourceDirName:"development/extensions-core",slug:"/development/extensions-core/avro",permalink:"/docs/29.0.0/development/extensions-core/avro",draft:!1,tags:[],version:"current",frontMatter:{id:"avro",title:"Apache Avro"}},d={},y=[{value:"Load the Avro extension",id:"load-the-avro-extension",level:2},{value:"Avro types",id:"avro-types",level:2},{value:"Unions",id:"unions",level:3},{value:"Binary types",id:"binary-types",level:3},{value:"Enums",id:"enums",level:3},{value:"Complex types",id:"complex-types",level:3},{value:"Logical types",id:"logical-types",level:3}],u={toc:y},c="wrapper";function m(e){var n=e.components,t=(0,a.A)(e,i);return(0,o.yg)(c,(0,r.A)({},u,t,{components:n,mdxType:"MDXLayout"}),(0,o.yg)("p",null,"This Apache Druid extension enables Druid to ingest and parse the Apache Avro data format as follows:"),(0,o.yg)("ul",null,(0,o.yg)("li",{parentName:"ul"},(0,o.yg)("a",{parentName:"li",href:"/docs/29.0.0/ingestion/data-formats#avro-stream"},"Avro stream input format")," for Kafka and Kinesis."),(0,o.yg)("li",{parentName:"ul"},(0,o.yg)("a",{parentName:"li",href:"/docs/29.0.0/ingestion/data-formats#avro-ocf"},"Avro OCF input format")," for native batch ingestion."),(0,o.yg)("li",{parentName:"ul"},(0,o.yg)("a",{parentName:"li",href:"/docs/29.0.0/ingestion/data-formats#avro-hadoop-parser"},"Avro Hadoop Parser"),".")),(0,o.yg)("p",null,"The ",(0,o.yg)("a",{parentName:"p",href:"/docs/29.0.0/ingestion/data-formats#avro-stream-parser"},"Avro Stream Parser")," is deprecated."),(0,o.yg)("h2",{id:"load-the-avro-extension"},"Load the Avro extension"),(0,o.yg)("p",null,"To use the Avro extension, add the ",(0,o.yg)("inlineCode",{parentName:"p"},"druid-avro-extensions")," to the list of loaded extensions. See ",(0,o.yg)("a",{parentName:"p",href:"/docs/29.0.0/configuration/extensions#loading-extensions"},"Loading extensions")," for more information."),(0,o.yg)("h2",{id:"avro-types"},"Avro types"),(0,o.yg)("p",null,"Druid supports most Avro types natively. This section describes some exceptions."),(0,o.yg)("h3",{id:"unions"},"Unions"),(0,o.yg)("p",null,"Druid has two modes for supporting ",(0,o.yg)("inlineCode",{parentName:"p"},"union")," types."),(0,o.yg)("p",null,"The default mode treats unions as a single value regardless of the type of data populating the union."),(0,o.yg)("p",null,"If you want to operate on individual members of a union, set ",(0,o.yg)("inlineCode",{parentName:"p"},"extractUnionsByType")," on the Avro parser. This configuration expands union values into nested objects according to the following rules:"),(0,o.yg)("ul",null,(0,o.yg)("li",{parentName:"ul"},"Primitive types and unnamed complex types are keyed by their type name, such as ",(0,o.yg)("inlineCode",{parentName:"li"},"int")," and ",(0,o.yg)("inlineCode",{parentName:"li"},"string"),"."),(0,o.yg)("li",{parentName:"ul"},"Complex named types are keyed by their names, this includes ",(0,o.yg)("inlineCode",{parentName:"li"},"record"),", ",(0,o.yg)("inlineCode",{parentName:"li"},"fixed"),", and ",(0,o.yg)("inlineCode",{parentName:"li"},"enum"),"."),(0,o.yg)("li",{parentName:"ul"},"The Avro null type is elided as its value can only ever be null.")),(0,o.yg)("p",null,"This is safe because an Avro union can only contain a single member of each unnamed type and duplicates of the same named type are not allowed. For example, only a single array is allowed, multiple records (or other named types) are allowed as long as each has a unique name."),(0,o.yg)("p",null,"You can then access the members of the union with a ",(0,o.yg)("a",{parentName:"p",href:"/docs/29.0.0/ingestion/data-formats#flattenspec"},"flattenSpec")," like you would for other nested types."),(0,o.yg)("h3",{id:"binary-types"},"Binary types"),(0,o.yg)("p",null,"The extension returns ",(0,o.yg)("inlineCode",{parentName:"p"},"bytes")," and ",(0,o.yg)("inlineCode",{parentName:"p"},"fixed")," Avro types as base64 encoded strings by default. To decode these types as UTF-8 strings, enable the ",(0,o.yg)("inlineCode",{parentName:"p"},"binaryAsString")," option on the Avro parser."),(0,o.yg)("h3",{id:"enums"},"Enums"),(0,o.yg)("p",null,"The extension returns ",(0,o.yg)("inlineCode",{parentName:"p"},"enum")," types as ",(0,o.yg)("inlineCode",{parentName:"p"},"string")," of the enum symbol."),(0,o.yg)("h3",{id:"complex-types"},"Complex types"),(0,o.yg)("p",null,"You can ingest ",(0,o.yg)("inlineCode",{parentName:"p"},"record")," and ",(0,o.yg)("inlineCode",{parentName:"p"},"map")," types representing nested data with a ",(0,o.yg)("a",{parentName:"p",href:"/docs/29.0.0/ingestion/data-formats#flattenspec"},"flattenSpec")," on the parser."),(0,o.yg)("h3",{id:"logical-types"},"Logical types"),(0,o.yg)("p",null,"Druid does not currently support Avro logical types. It ignores them and handles fields according to the underlying primitive type."))}m.isMDXComponent=!0}}]);