blob: 036b5873e5e4f33f54c897599a4e0035c319226b [file] [log] [blame]
"use strict";(self.webpackChunk=self.webpackChunk||[]).push([[7034],{19315:(e,n,s)=>{s.r(n),s.d(n,{assets:()=>d,contentTitle:()=>a,default:()=>h,frontMatter:()=>i,metadata:()=>t,toc:()=>l});const t=JSON.parse('{"id":"development/extensions-core/avro","title":"Apache Avro","description":"\x3c!--","source":"@site/docs/latest/development/extensions-core/avro.md","sourceDirName":"development/extensions-core","slug":"/development/extensions-core/avro","permalink":"/docs/latest/development/extensions-core/avro","draft":false,"unlisted":false,"tags":[],"version":"current","frontMatter":{"id":"avro","title":"Apache Avro"}}');var o=s(74848),r=s(28453);const i={id:"avro",title:"Apache Avro"},a=void 0,d={},l=[{value:"Load the Avro extension",id:"load-the-avro-extension",level:2},{value:"Avro types",id:"avro-types",level:2},{value:"Unions",id:"unions",level:3},{value:"Binary types",id:"binary-types",level:3},{value:"Enums",id:"enums",level:3},{value:"Complex types",id:"complex-types",level:3},{value:"Logical types",id:"logical-types",level:3}];function c(e){const n={a:"a",code:"code",h2:"h2",h3:"h3",li:"li",p:"p",ul:"ul",...(0,r.R)(),...e.components};return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)(n.p,{children:"This Apache Druid extension enables Druid to ingest and parse the Apache Avro data format as follows:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.a,{href:"/docs/latest/ingestion/data-formats#avro-stream",children:"Avro stream input format"})," for Kafka and Kinesis."]}),"\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.a,{href:"/docs/latest/ingestion/data-formats#avro-ocf",children:"Avro OCF input format"})," for native batch ingestion."]}),"\n",(0,o.jsxs)(n.li,{children:[(0,o.jsx)(n.a,{href:"/docs/latest/ingestion/data-formats#avro-hadoop-parser",children:"Avro Hadoop Parser"}),"."]}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["The ",(0,o.jsx)(n.a,{href:"/docs/latest/ingestion/data-formats#avro-stream-parser",children:"Avro Stream Parser"})," is deprecated."]}),"\n",(0,o.jsx)(n.h2,{id:"load-the-avro-extension",children:"Load the Avro extension"}),"\n",(0,o.jsxs)(n.p,{children:["To use the Avro extension, add the ",(0,o.jsx)(n.code,{children:"druid-avro-extensions"})," to the list of loaded extensions. See ",(0,o.jsx)(n.a,{href:"/docs/latest/configuration/extensions#loading-extensions",children:"Loading extensions"})," for more information."]}),"\n",(0,o.jsx)(n.h2,{id:"avro-types",children:"Avro types"}),"\n",(0,o.jsx)(n.p,{children:"Druid supports most Avro types natively. This section describes some exceptions."}),"\n",(0,o.jsx)(n.h3,{id:"unions",children:"Unions"}),"\n",(0,o.jsxs)(n.p,{children:["Druid has two modes for supporting ",(0,o.jsx)(n.code,{children:"union"})," types."]}),"\n",(0,o.jsx)(n.p,{children:"The default mode treats unions as a single value regardless of the type of data populating the union."}),"\n",(0,o.jsxs)(n.p,{children:["If you want to operate on individual members of a union, set ",(0,o.jsx)(n.code,{children:"extractUnionsByType"})," on the Avro parser. This configuration expands union values into nested objects according to the following rules:"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsxs)(n.li,{children:["Primitive types and unnamed complex types are keyed by their type name, such as ",(0,o.jsx)(n.code,{children:"int"})," and ",(0,o.jsx)(n.code,{children:"string"}),"."]}),"\n",(0,o.jsxs)(n.li,{children:["Complex named types are keyed by their names, this includes ",(0,o.jsx)(n.code,{children:"record"}),", ",(0,o.jsx)(n.code,{children:"fixed"}),", and ",(0,o.jsx)(n.code,{children:"enum"}),"."]}),"\n",(0,o.jsx)(n.li,{children:"The Avro null type is elided as its value can only ever be null."}),"\n"]}),"\n",(0,o.jsx)(n.p,{children:"This is safe because an Avro union can only contain a single member of each unnamed type and duplicates of the same named type are not allowed. For example, only a single array is allowed, multiple records (or other named types) are allowed as long as each has a unique name."}),"\n",(0,o.jsxs)(n.p,{children:["You can then access the members of the union with a ",(0,o.jsx)(n.a,{href:"/docs/latest/ingestion/data-formats#flattenspec",children:"flattenSpec"})," like you would for other nested types."]}),"\n",(0,o.jsx)(n.h3,{id:"binary-types",children:"Binary types"}),"\n",(0,o.jsxs)(n.p,{children:["The extension returns ",(0,o.jsx)(n.code,{children:"bytes"})," and ",(0,o.jsx)(n.code,{children:"fixed"})," Avro types as base64 encoded strings by default. To decode these types as UTF-8 strings, enable the ",(0,o.jsx)(n.code,{children:"binaryAsString"})," option on the Avro parser."]}),"\n",(0,o.jsx)(n.h3,{id:"enums",children:"Enums"}),"\n",(0,o.jsxs)(n.p,{children:["The extension returns ",(0,o.jsx)(n.code,{children:"enum"})," types as ",(0,o.jsx)(n.code,{children:"string"})," of the enum symbol."]}),"\n",(0,o.jsx)(n.h3,{id:"complex-types",children:"Complex types"}),"\n",(0,o.jsxs)(n.p,{children:["You can ingest ",(0,o.jsx)(n.code,{children:"record"})," and ",(0,o.jsx)(n.code,{children:"map"})," types representing nested data with a ",(0,o.jsx)(n.a,{href:"/docs/latest/ingestion/data-formats#flattenspec",children:"flattenSpec"})," on the parser."]}),"\n",(0,o.jsx)(n.h3,{id:"logical-types",children:"Logical types"}),"\n",(0,o.jsx)(n.p,{children:"Druid does not currently support Avro logical types. It ignores them and handles fields according to the underlying primitive type."})]})}function h(e={}){const{wrapper:n}={...(0,r.R)(),...e.components};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(c,{...e})}):c(e)}},28453:(e,n,s)=>{s.d(n,{R:()=>i,x:()=>a});var t=s(96540);const o={},r=t.createContext(o);function i(e){const n=t.useContext(r);return t.useMemo((function(){return"function"==typeof e?e(n):{...n,...e}}),[n,e])}function a(e){let n;return n=e.disableParentContext?"function"==typeof e.components?e.components(o):e.components||o:i(e.components),t.createElement(r.Provider,{value:n},e.children)}}}]);