blob: 2d226e629d30b2cc58401d159d3b50605d73186b [file] [log] [blame]
"use strict";(self.webpackChunkdoris_website=self.webpackChunkdoris_website||[]).push([[87827],{15680:(e,a,n)=>{n.d(a,{xA:()=>c,yg:()=>y});var t=n(296540);function l(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function o(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&&(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function i(e){for(var a=1;a<arguments.length;a++){var n=null!=arguments[a]?arguments[a]:{};a%2?o(Object(n),!0).forEach((function(a){l(e,a,n[a])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):o(Object(n)).forEach((function(a){Object.defineProperty(e,a,Object.getOwnPropertyDescriptor(n,a))}))}return e}function r(e,a){if(null==e)return{};var n,t,l=function(e,a){if(null==e)return{};var n,t,l={},o=Object.keys(e);for(t=0;t<o.length;t++)n=o[t],a.indexOf(n)>=0||(l[n]=e[n]);return l}(e,a);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(t=0;t<o.length;t++)n=o[t],a.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var d=t.createContext({}),s=function(e){var a=t.useContext(d),n=a;return e&&(n="function"==typeof e?e(a):i(i({},a),e)),n},c=function(e){var a=s(e.components);return t.createElement(d.Provider,{value:a},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var a=e.children;return t.createElement(t.Fragment,{},a)}},g=t.forwardRef((function(e,a){var n=e.components,l=e.mdxType,o=e.originalType,d=e.parentName,c=r(e,["components","mdxType","originalType","parentName"]),p=s(n),g=l,y=p["".concat(d,".").concat(g)]||p[g]||u[g]||o;return n?t.createElement(y,i(i({ref:a},c),{},{components:n})):t.createElement(y,i({ref:a},c))}));function y(e,a){var n=arguments,l=a&&a.mdxType;if("string"==typeof e||l){var o=n.length,i=new Array(o);i[0]=g;var r={};for(var d in a)hasOwnProperty.call(a,d)&&(r[d]=a[d]);r.originalType=e,r[p]="string"==typeof e?e:l,i[1]=r;for(var s=2;s<o;s++)i[s]=n[s];return t.createElement.apply(null,i)}return t.createElement.apply(null,n)}g.displayName="MDXCreateElement"},105796:(e,a,n)=>{n.r(a),n.d(a,{assets:()=>d,contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>r,toc:()=>s});var t=n(58168),l=(n(296540),n(15680));const o={title:"Alibaba Cloud DLF",language:"en"},i=void 0,r={unversionedId:"lakehouse/datalake-analytics/dlf",id:"version-2.1/lakehouse/datalake-analytics/dlf",title:"Alibaba Cloud DLF",description:"\x3c!--",source:"@site/versioned_docs/version-2.1/lakehouse/datalake-analytics/dlf.md",sourceDirName:"lakehouse/datalake-analytics",slug:"/lakehouse/datalake-analytics/dlf",permalink:"/docs/lakehouse/datalake-analytics/dlf",draft:!1,tags:[],version:"2.1",frontMatter:{title:"Alibaba Cloud DLF",language:"en"},sidebar:"docs",previous:{title:"Paimon",permalink:"/docs/lakehouse/datalake-analytics/paimon"},next:{title:"Hive",permalink:"/docs/lakehouse/datalake-building/hive-build"}},d={},s=[{value:"Connect to DLF",id:"connect-to-dlf",level:2},{value:"Create a DLF Catalog.",id:"create-a-dlf-catalog",level:3},{value:"Use OSS-HDFS as the datasource",id:"use-oss-hdfs-as-the-datasource",level:3},{value:"DLF Iceberg Catalog",id:"dlf-iceberg-catalog",level:3},{value:"Column type mapping",id:"column-type-mapping",level:2}],c={toc:s},p="wrapper";function u(e){let{components:a,...n}=e;return(0,l.yg)(p,(0,t.A)({},c,n,{components:a,mdxType:"MDXLayout"}),(0,l.yg)("h1",{id:"alibaba-cloud-dlf"},"Alibaba Cloud DLF"),(0,l.yg)("p",null,"Data Lake Formation (DLF) is the unified metadata management service of Alibaba Cloud. It is compatible with the Hive Metastore protocol."),(0,l.yg)("blockquote",null,(0,l.yg)("p",{parentName:"blockquote"},(0,l.yg)("a",{parentName:"p",href:"https://www.alibabacloud.com/product/datalake-formation"},"What is DLF"))),(0,l.yg)("p",null,"Doris can access DLF the same way as it accesses Hive Metastore."),(0,l.yg)("h2",{id:"connect-to-dlf"},"Connect to DLF"),(0,l.yg)("h3",{id:"create-a-dlf-catalog"},"Create a DLF Catalog."),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-sql"},'CREATE CATALOG dlf PROPERTIES (\n "type"="hms",\n "hive.metastore.type" = "dlf",\n "dlf.proxy.mode" = "DLF_ONLY",\n "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",\n "dlf.region" = "cn-beijing",\n "dlf.uid" = "uid",\n "dlf.catalog.id" = "catalog_id", //optional\n "dlf.access_key" = "ak",\n "dlf.secret_key" = "sk"\n);\n')),(0,l.yg)("p",null,(0,l.yg)("inlineCode",{parentName:"p"},"type")," should always be ",(0,l.yg)("inlineCode",{parentName:"p"},"hms"),". If you need to access Alibaba Cloud OSS on the public network, can add ",(0,l.yg)("inlineCode",{parentName:"p"},'"dlf.access.public"="true"'),"."),(0,l.yg)("ul",null,(0,l.yg)("li",{parentName:"ul"},(0,l.yg)("inlineCode",{parentName:"li"},"dlf.endpoint"),": DLF Endpoint. See ",(0,l.yg)("a",{parentName:"li",href:"https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints"},"Regions and Endpoints of DLF"),"."),(0,l.yg)("li",{parentName:"ul"},(0,l.yg)("inlineCode",{parentName:"li"},"dlf.region"),": DLF Region. See ",(0,l.yg)("a",{parentName:"li",href:"https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints"},"Regions and Endpoints of DLF"),"."),(0,l.yg)("li",{parentName:"ul"},(0,l.yg)("inlineCode",{parentName:"li"},"dlf.uid"),': Alibaba Cloud account. You can find the "Account ID" in the upper right corner on the Alibaba Cloud console.'),(0,l.yg)("li",{parentName:"ul"},(0,l.yg)("inlineCode",{parentName:"li"},"dlf.catalog.id"),": Optional. Used to specify the dlf catalog, if not specified, the default Catalog ID will be used."),(0,l.yg)("li",{parentName:"ul"},(0,l.yg)("inlineCode",{parentName:"li"},"dlf.access_key"),"\uff1aAccessKey, which you can create and manage on the ",(0,l.yg)("a",{parentName:"li",href:"https://ram.console.aliyun.com/manage/ak"},"Alibaba Cloud console"),"."),(0,l.yg)("li",{parentName:"ul"},(0,l.yg)("inlineCode",{parentName:"li"},"dlf.secret_key"),"\uff1aSecretKey, which you can create and manage on the ",(0,l.yg)("a",{parentName:"li",href:"https://ram.console.aliyun.com/manage/ak"},"Alibaba Cloud console"),".")),(0,l.yg)("p",null,"Other configuration items are fixed and require no modifications."),(0,l.yg)("p",null,"After the above steps, you can access metadata in DLF the same way as you access Hive MetaStore."),(0,l.yg)("p",null,"Doris supports accessing Hive/Iceberg/Hudi metadata in DLF."),(0,l.yg)("h3",{id:"use-oss-hdfs-as-the-datasource"},"Use OSS-HDFS as the datasource"),(0,l.yg)("ol",null,(0,l.yg)("li",{parentName:"ol"},(0,l.yg)("p",{parentName:"li"},"Enable OSS-HDFS. ",(0,l.yg)("a",{parentName:"p",href:"https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-hdfsnew"},"Grant access to OSS or OSS-HDFS"))),(0,l.yg)("li",{parentName:"ol"},(0,l.yg)("p",{parentName:"li"},"Download the SDK. ",(0,l.yg)("a",{parentName:"p",href:"https://github.com/aliyun/alibabacloud-jindodata/blob/master/docs/user/5.x/5.0.0-beta7/jindodata_download.md"},"JindoData SDK"),". If the Jindo SDK directory already exists on the cluster, skip this step.")),(0,l.yg)("li",{parentName:"ol"},(0,l.yg)("p",{parentName:"li"},"Decompress the jindosdk.tar.gz or locate the Jindo SDK directory on the cluster, and then enter its lib directory and put ",(0,l.yg)("inlineCode",{parentName:"p"},"jindo-core.jar, jindo-sdk.jar")," to both ",(0,l.yg)("inlineCode",{parentName:"p"},"${DORIS_HOME}/fe/lib")," and ",(0,l.yg)("inlineCode",{parentName:"p"},"${DORIS_HOME}/be/lib/java_extensions/preload-extensions"),".")),(0,l.yg)("li",{parentName:"ol"},(0,l.yg)("p",{parentName:"li"},"Create DLF Catalog, set ",(0,l.yg)("inlineCode",{parentName:"p"},"oss.hdfs.enabled")," as ",(0,l.yg)("inlineCode",{parentName:"p"},"true"),"\uff1a"),(0,l.yg)("pre",{parentName:"li"},(0,l.yg)("code",{parentName:"pre",className:"language-sql"},'CREATE CATALOG dlf_oss_hdfs PROPERTIES (\n "type"="hms",\n "hive.metastore.type" = "dlf",\n "dlf.proxy.mode" = "DLF_ONLY",\n "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",\n "dlf.region" = "cn-beijing",\n "dlf.uid" = "uid",\n "dlf.catalog.id" = "catalog_id", //optional\n "dlf.access_key" = "ak",\n "dlf.secret_key" = "sk",\n "oss.hdfs.enabled" = "true"\n);\n'))),(0,l.yg)("li",{parentName:"ol"},(0,l.yg)("p",{parentName:"li"},"When the Jindo SDK version is inconsistent with the version used on the EMR cluster, will reported ",(0,l.yg)("inlineCode",{parentName:"p"},"Plugin not found")," and the Jindo SDK needs to be replaced with the corresponding version."))),(0,l.yg)("h3",{id:"dlf-iceberg-catalog"},"DLF Iceberg Catalog"),(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-sql"},'CREATE CATALOG dlf_iceberg PROPERTIES (\n "type"="iceberg",\n "iceberg.catalog.type" = "dlf",\n "dlf.proxy.mode" = "DLF_ONLY",\n "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com",\n "dlf.region" = "cn-beijing",\n "dlf.uid" = "uid",\n "dlf.catalog.id" = "catalog_id", //optional\n "dlf.access_key" = "ak",\n "dlf.secret_key" = "sk"\n);\n')),(0,l.yg)("h2",{id:"column-type-mapping"},"Column type mapping"),(0,l.yg)("p",null,"Consistent with Hive Catalog, please refer to the ",(0,l.yg)("strong",{parentName:"p"},"column type mapping")," section in ",(0,l.yg)("a",{parentName:"p",href:"/docs/lakehouse/datalake-analytics/hive"},"Hive Catalog"),"."))}u.isMDXComponent=!0}}]);