blob: 09f2008e181fd4558bf3fdd07457550fd9e91d3a [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[868],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>f});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t<arguments.length;t++){var n=null!=arguments[t]?arguments[t]:{};t%2?o(Object(n),!0).forEach((function(t){r(e,t,n[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):o(Object(n)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(n,t))}))}return e}function s(e,t){if(null==e)return{};var n,a,r=function(e,t){if(null==e)return{};var n,a,r={},o=Object.keys(e);for(a=0;a<o.length;a++)n=o[a],t.indexOf(n)>=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a<o.length;a++)n=o[a],t.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var l=a.createContext({}),p=function(e){var t=a.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=p(e.components);return a.createElement(l.Provider,{value:t},e.children)},u="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),u=p(n),m=r,f=u["".concat(l,".").concat(m)]||u[m]||c[m]||o;return n?a.createElement(f,i(i({ref:t},d),{},{components:n})):a.createElement(f,i({ref:t},d))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[u]="string"==typeof e?e:r,i[1]=s;for(var p=2;p<o;p++)i[p]=n[p];return a.createElement.apply(null,i)}return a.createElement.apply(null,n)}m.displayName="MDXCreateElement"},17744:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>d,contentTitle:()=>l,default:()=>f,frontMatter:()=>s,metadata:()=>p,toc:()=>u});var a=n(87462),r=n(63366),o=(n(67294),n(3905)),i=["components"],s={id:"avro",title:"Apache Avro"},l=void 0,p={unversionedId:"development/extensions-core/avro",id:"development/extensions-core/avro",title:"Apache Avro",description:"\x3c!--",source:"@site/docs/latest/development/extensions-core/avro.md",sourceDirName:"development/extensions-core",slug:"/development/extensions-core/avro",permalink:"/docs/latest/development/extensions-core/avro",draft:!1,tags:[],version:"current",frontMatter:{id:"avro",title:"Apache Avro"}},d={},u=[{value:"Load the Avro extension",id:"load-the-avro-extension",level:2},{value:"Avro types",id:"avro-types",level:2},{value:"Unions",id:"unions",level:3},{value:"Binary types",id:"binary-types",level:3},{value:"Enums",id:"enums",level:3},{value:"Complex types",id:"complex-types",level:3},{value:"Logical types",id:"logical-types",level:3}],c={toc:u},m="wrapper";function f(e){var t=e.components,n=(0,r.Z)(e,i);return(0,o.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("p",null,"This Apache Druid extension enables Druid to ingest and parse the Apache Avro data format as follows:"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},(0,o.kt)("a",{parentName:"li",href:"/docs/latest/ingestion/data-formats#avro-stream"},"Avro stream input format")," for Kafka and Kinesis."),(0,o.kt)("li",{parentName:"ul"},(0,o.kt)("a",{parentName:"li",href:"/docs/latest/ingestion/data-formats#avro-ocf"},"Avro OCF input format")," for native batch ingestion."),(0,o.kt)("li",{parentName:"ul"},(0,o.kt)("a",{parentName:"li",href:"/docs/latest/ingestion/data-formats#avro-hadoop-parser"},"Avro Hadoop Parser"),".")),(0,o.kt)("p",null,"The ",(0,o.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/data-formats#avro-stream-parser"},"Avro Stream Parser")," is deprecated."),(0,o.kt)("h2",{id:"load-the-avro-extension"},"Load the Avro extension"),(0,o.kt)("p",null,"To use the Avro extension, add the ",(0,o.kt)("inlineCode",{parentName:"p"},"druid-avro-extensions")," to the list of loaded extensions. See ",(0,o.kt)("a",{parentName:"p",href:"/docs/latest/configuration/extensions#loading-extensions"},"Loading extensions")," for more information."),(0,o.kt)("h2",{id:"avro-types"},"Avro types"),(0,o.kt)("p",null,"Druid supports most Avro types natively. This section describes some exceptions."),(0,o.kt)("h3",{id:"unions"},"Unions"),(0,o.kt)("p",null,"Druid has two modes for supporting ",(0,o.kt)("inlineCode",{parentName:"p"},"union")," types."),(0,o.kt)("p",null,"The default mode treats unions as a single value regardless of the type of data populating the union."),(0,o.kt)("p",null,"If you want to operate on individual members of a union, set ",(0,o.kt)("inlineCode",{parentName:"p"},"extractUnionsByType")," on the Avro parser. This configuration expands union values into nested objects according to the following rules:"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Primitive types and unnamed complex types are keyed by their type name, such as ",(0,o.kt)("inlineCode",{parentName:"li"},"int")," and ",(0,o.kt)("inlineCode",{parentName:"li"},"string"),"."),(0,o.kt)("li",{parentName:"ul"},"Complex named types are keyed by their names, this includes ",(0,o.kt)("inlineCode",{parentName:"li"},"record"),", ",(0,o.kt)("inlineCode",{parentName:"li"},"fixed"),", and ",(0,o.kt)("inlineCode",{parentName:"li"},"enum"),"."),(0,o.kt)("li",{parentName:"ul"},"The Avro null type is elided as its value can only ever be null.")),(0,o.kt)("p",null,"This is safe because an Avro union can only contain a single member of each unnamed type and duplicates of the same named type are not allowed. For example, only a single array is allowed, multiple records (or other named types) are allowed as long as each has a unique name."),(0,o.kt)("p",null,"You can then access the members of the union with a ",(0,o.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/data-formats#flattenspec"},"flattenSpec")," like you would for other nested types."),(0,o.kt)("h3",{id:"binary-types"},"Binary types"),(0,o.kt)("p",null,"The extension returns ",(0,o.kt)("inlineCode",{parentName:"p"},"bytes")," and ",(0,o.kt)("inlineCode",{parentName:"p"},"fixed")," Avro types as base64 encoded strings by default. To decode these types as UTF-8 strings, enable the ",(0,o.kt)("inlineCode",{parentName:"p"},"binaryAsString")," option on the Avro parser."),(0,o.kt)("h3",{id:"enums"},"Enums"),(0,o.kt)("p",null,"The extension returns ",(0,o.kt)("inlineCode",{parentName:"p"},"enum")," types as ",(0,o.kt)("inlineCode",{parentName:"p"},"string")," of the enum symbol."),(0,o.kt)("h3",{id:"complex-types"},"Complex types"),(0,o.kt)("p",null,"You can ingest ",(0,o.kt)("inlineCode",{parentName:"p"},"record")," and ",(0,o.kt)("inlineCode",{parentName:"p"},"map")," types representing nested data with a ",(0,o.kt)("a",{parentName:"p",href:"/docs/latest/ingestion/data-formats#flattenspec"},"flattenSpec")," on the parser."),(0,o.kt)("h3",{id:"logical-types"},"Logical types"),(0,o.kt)("p",null,"Druid does not currently support Avro logical types. It ignores them and handles fields according to the underlying primitive type."))}f.isMDXComponent=!0}}]);