blob: 72523b526716f4649cb9364b93443e505eaff52c [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[7034],{15680:(e,n,t)=>{t.d(n,{xA:()=>d,yg:()=>m});var a=t(96540);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n<arguments.length;n++){var t=null!=arguments[n]?arguments[n]:{};n%2?o(Object(t),!0).forEach((function(n){r(e,n,t[n])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):o(Object(t)).forEach((function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(t,n))}))}return e}function s(e,n){if(null==e)return{};var t,a,r=function(e,n){if(null==e)return{};var t,a,r={},o=Object.keys(e);for(a=0;a<o.length;a++)t=o[a],n.indexOf(t)>=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a<o.length;a++)t=o[a],n.indexOf(t)>=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var l=a.createContext({}),p=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=p(e.components);return a.createElement(l.Provider,{value:n},e.children)},y="mdxType",u={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},c=a.forwardRef((function(e,n){var t=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),y=p(t),c=r,m=y["".concat(l,".").concat(c)]||y[c]||u[c]||o;return t?a.createElement(m,i(i({ref:n},d),{},{components:t})):a.createElement(m,i({ref:n},d))}));function m(e,n){var t=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var o=t.length,i=new Array(o);i[0]=c;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[y]="string"==typeof e?e:r,i[1]=s;for(var p=2;p<o;p++)i[p]=t[p];return a.createElement.apply(null,i)}return a.createElement.apply(null,t)}c.displayName="MDXCreateElement"},49511:(e,n,t)=>{t.r(n),t.d(n,{assets:()=>d,contentTitle:()=>l,default:()=>m,frontMatter:()=>s,metadata:()=>p,toc:()=>y});var a=t(58168),r=t(98587),o=(t(96540),t(15680)),i=["components"],s={id:"avro",title:"Apache Avro"},l=void 0,p={unversionedId:"development/extensions-core/avro",id:"development/extensions-core/avro",title:"Apache Avro",description:"\x3c!--",source:"@site/docs/latest/development/extensions-core/avro.md",sourceDirName:"development/extensions-core",slug:"/development/extensions-core/avro",permalink:"/docs/latest/development/extensions-core/avro",draft:!1,tags:[],version:"current",frontMatter:{id:"avro",title:"Apache Avro"}},d={},y=[{value:"Load the Avro extension",id:"load-the-avro-extension",level:2},{value:"Avro types",id:"avro-types",level:2},{value:"Unions",id:"unions",level:3},{value:"Binary types",id:"binary-types",level:3},{value:"Enums",id:"enums",level:3},{value:"Complex types",id:"complex-types",level:3},{value:"Logical types",id:"logical-types",level:3}],u={toc:y},c="wrapper";function m(e){var n=e.components,t=(0,r.A)(e,i);return(0,o.yg)(c,(0,a.A)({},u,t,{components:n,mdxType:"MDXLayout"}),(0,o.yg)("p",null,"This Apache Druid extension enables Druid to ingest and parse the Apache Avro data format as follows:"),(0,o.yg)("ul",null,(0,o.yg)("li",{parentName:"ul"},(0,o.yg)("a",{parentName:"li",href:"/docs/latest/ingestion/data-formats#avro-stream"},"Avro stream input format")," for Kafka and Kinesis."),(0,o.yg)("li",{parentName:"ul"},(0,o.yg)("a",{parentName:"li",href:"/docs/latest/ingestion/data-formats#avro-ocf"},"Avro OCF input format")," for native batch ingestion."),(0,o.yg)("li",{parentName:"ul"},(0,o.yg)("a",{parentName:"li",href:"/docs/latest/ingestion/data-formats#avro-hadoop-parser"},"Avro Hadoop Parser"),".")),(0,o.yg)("p",null,"The ",(0,o.yg)("a",{parentName:"p",href:"/docs/latest/ingestion/data-formats#avro-stream-parser"},"Avro Stream Parser")," is deprecated."),(0,o.yg)("h2",{id:"load-the-avro-extension"},"Load the Avro extension"),(0,o.yg)("p",null,"To use the Avro extension, add the ",(0,o.yg)("inlineCode",{parentName:"p"},"druid-avro-extensions")," to the list of loaded extensions. See ",(0,o.yg)("a",{parentName:"p",href:"/docs/latest/configuration/extensions#loading-extensions"},"Loading extensions")," for more information."),(0,o.yg)("h2",{id:"avro-types"},"Avro types"),(0,o.yg)("p",null,"Druid supports most Avro types natively. This section describes some exceptions."),(0,o.yg)("h3",{id:"unions"},"Unions"),(0,o.yg)("p",null,"Druid has two modes for supporting ",(0,o.yg)("inlineCode",{parentName:"p"},"union")," types."),(0,o.yg)("p",null,"The default mode treats unions as a single value regardless of the type of data populating the union."),(0,o.yg)("p",null,"If you want to operate on individual members of a union, set ",(0,o.yg)("inlineCode",{parentName:"p"},"extractUnionsByType")," on the Avro parser. This configuration expands union values into nested objects according to the following rules:"),(0,o.yg)("ul",null,(0,o.yg)("li",{parentName:"ul"},"Primitive types and unnamed complex types are keyed by their type name, such as ",(0,o.yg)("inlineCode",{parentName:"li"},"int")," and ",(0,o.yg)("inlineCode",{parentName:"li"},"string"),"."),(0,o.yg)("li",{parentName:"ul"},"Complex named types are keyed by their names, this includes ",(0,o.yg)("inlineCode",{parentName:"li"},"record"),", ",(0,o.yg)("inlineCode",{parentName:"li"},"fixed"),", and ",(0,o.yg)("inlineCode",{parentName:"li"},"enum"),"."),(0,o.yg)("li",{parentName:"ul"},"The Avro null type is elided as its value can only ever be null.")),(0,o.yg)("p",null,"This is safe because an Avro union can only contain a single member of each unnamed type and duplicates of the same named type are not allowed. For example, only a single array is allowed, multiple records (or other named types) are allowed as long as each has a unique name."),(0,o.yg)("p",null,"You can then access the members of the union with a ",(0,o.yg)("a",{parentName:"p",href:"/docs/latest/ingestion/data-formats#flattenspec"},"flattenSpec")," like you would for other nested types."),(0,o.yg)("h3",{id:"binary-types"},"Binary types"),(0,o.yg)("p",null,"The extension returns ",(0,o.yg)("inlineCode",{parentName:"p"},"bytes")," and ",(0,o.yg)("inlineCode",{parentName:"p"},"fixed")," Avro types as base64 encoded strings by default. To decode these types as UTF-8 strings, enable the ",(0,o.yg)("inlineCode",{parentName:"p"},"binaryAsString")," option on the Avro parser."),(0,o.yg)("h3",{id:"enums"},"Enums"),(0,o.yg)("p",null,"The extension returns ",(0,o.yg)("inlineCode",{parentName:"p"},"enum")," types as ",(0,o.yg)("inlineCode",{parentName:"p"},"string")," of the enum symbol."),(0,o.yg)("h3",{id:"complex-types"},"Complex types"),(0,o.yg)("p",null,"You can ingest ",(0,o.yg)("inlineCode",{parentName:"p"},"record")," and ",(0,o.yg)("inlineCode",{parentName:"p"},"map")," types representing nested data with a ",(0,o.yg)("a",{parentName:"p",href:"/docs/latest/ingestion/data-formats#flattenspec"},"flattenSpec")," on the parser."),(0,o.yg)("h3",{id:"logical-types"},"Logical types"),(0,o.yg)("p",null,"Druid does not currently support Avro logical types. It ignores them and handles fields according to the underlying primitive type."))}m.isMDXComponent=!0}}]);