blob: 89d726d23a80e1febe771826546be1f01f5a364c [file] [log] [blame]
"use strict";(self.webpackChunkdoris_website=self.webpackChunkdoris_website||[]).push([[20542],{15680:(e,a,n)=>{n.d(a,{xA:()=>d,yg:()=>u});var r=n(296540);function p(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function l(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);a&&(r=r.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var a=1;a<arguments.length;a++){var n=null!=arguments[a]?arguments[a]:{};a%2?l(Object(n),!0).forEach((function(a){p(e,a,n[a])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):l(Object(n)).forEach((function(a){Object.defineProperty(e,a,Object.getOwnPropertyDescriptor(n,a))}))}return e}function o(e,a){if(null==e)return{};var n,r,p=function(e,a){if(null==e)return{};var n,r,p={},l=Object.keys(e);for(r=0;r<l.length;r++)n=l[r],a.indexOf(n)>=0||(p[n]=e[n]);return p}(e,a);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r<l.length;r++)n=l[r],a.indexOf(n)>=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(p[n]=e[n])}return p}var t=r.createContext({}),s=function(e){var a=r.useContext(t),n=a;return e&&(n="function"==typeof e?e(a):i(i({},a),e)),n},d=function(e){var a=s(e.components);return r.createElement(t.Provider,{value:a},e.children)},m="mdxType",y={inlineCode:"code",wrapper:function(e){var a=e.children;return r.createElement(r.Fragment,{},a)}},g=r.forwardRef((function(e,a){var n=e.components,p=e.mdxType,l=e.originalType,t=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),m=s(n),g=p,u=m["".concat(t,".").concat(g)]||m[g]||y[g]||l;return n?r.createElement(u,i(i({ref:a},d),{},{components:n})):r.createElement(u,i({ref:a},d))}));function u(e,a){var n=arguments,p=a&&a.mdxType;if("string"==typeof e||p){var l=n.length,i=new Array(l);i[0]=g;var o={};for(var t in a)hasOwnProperty.call(a,t)&&(o[t]=a[t]);o.originalType=e,o[m]="string"==typeof e?e:p,i[1]=o;for(var s=2;s<l;s++)i[s]=n[s];return r.createElement.apply(null,i)}return r.createElement.apply(null,n)}g.displayName="MDXCreateElement"},897363:(e,a,n)=>{n.r(a),n.d(a,{assets:()=>t,contentTitle:()=>i,default:()=>y,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var r=n(58168),p=(n(296540),n(15680));const l={title:"Spark Load",language:"zh-CN"},i=void 0,o={unversionedId:"data-operate/import/import-way/spark-load-manual",id:"version-1.2/data-operate/import/import-way/spark-load-manual",title:"Spark Load",description:"\x3c!--",source:"@site/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-way/spark-load-manual.md",sourceDirName:"data-operate/import/import-way",slug:"/data-operate/import/import-way/spark-load-manual",permalink:"/zh-CN/docs/1.2/data-operate/import/import-way/spark-load-manual",draft:!1,tags:[],version:"1.2",frontMatter:{title:"Spark Load",language:"zh-CN"},sidebar:"docs",previous:{title:"Routine Load",permalink:"/zh-CN/docs/1.2/data-operate/import/import-way/routine-load-manual"},next:{title:"Stream load",permalink:"/zh-CN/docs/1.2/data-operate/import/import-way/stream-load-manual"}},t={},s=[{value:"\u9002\u7528\u573a\u666f",id:"\u9002\u7528\u573a\u666f",level:2},{value:"\u57fa\u672c\u539f\u7406",id:"\u57fa\u672c\u539f\u7406",level:2},{value:"\u57fa\u672c\u6d41\u7a0b",id:"\u57fa\u672c\u6d41\u7a0b",level:3},{value:"\u5168\u5c40\u5b57\u5178",id:"\u5168\u5c40\u5b57\u5178",level:2},{value:"\u9002\u7528\u573a\u666f",id:"\u9002\u7528\u573a\u666f-1",level:3},{value:"\u6784\u5efa\u6d41\u7a0b",id:"\u6784\u5efa\u6d41\u7a0b",level:3},{value:"\u6570\u636e\u9884\u5904\u7406\uff08DPP\uff09",id:"\u6570\u636e\u9884\u5904\u7406dpp",level:2},{value:"\u57fa\u672c\u6d41\u7a0b",id:"\u57fa\u672c\u6d41\u7a0b-1",level:3},{value:"Hive Bitmap UDF",id:"hive-bitmap-udf",level:2},{value:"\u57fa\u672c\u64cd\u4f5c",id:"\u57fa\u672c\u64cd\u4f5c",level:2},{value:"\u914d\u7f6e ETL \u96c6\u7fa4",id:"\u914d\u7f6e-etl-\u96c6\u7fa4",level:3},{value:"\u914d\u7f6e Spark \u5ba2\u6237\u7aef",id:"\u914d\u7f6e-spark-\u5ba2\u6237\u7aef",level:3},{value:"\u914d\u7f6e Yarn \u5ba2\u6237\u7aef",id:"\u914d\u7f6e-yarn-\u5ba2\u6237\u7aef",level:3},{value:"\u521b\u5efa\u5bfc\u5165",id:"\u521b\u5efa\u5bfc\u5165",level:3},{value:"\u67e5\u770b\u5bfc\u5165",id:"\u67e5\u770b\u5bfc\u5165",level:3},{value:"\u67e5\u770b Spark Launcher \u63d0\u4ea4\u65e5\u5fd7",id:"\u67e5\u770b-spark-launcher-\u63d0\u4ea4\u65e5\u5fd7",level:3},{value:"\u53d6\u6d88\u5bfc\u5165",id:"\u53d6\u6d88\u5bfc\u5165",level:3},{value:"\u76f8\u5173\u7cfb\u7edf\u914d\u7f6e",id:"\u76f8\u5173\u7cfb\u7edf\u914d\u7f6e",level:2},{value:"FE \u914d\u7f6e",id:"fe-\u914d\u7f6e",level:3},{value:"\u6700\u4f73\u5b9e\u8df5",id:"\u6700\u4f73\u5b9e\u8df5",level:2},{value:"\u5e94\u7528\u573a\u666f",id:"\u5e94\u7528\u573a\u666f",level:3},{value:"\u5e38\u89c1\u95ee\u9898",id:"\u5e38\u89c1\u95ee\u9898",level:2},{value:"\u66f4\u591a\u5e2e\u52a9",id:"\u66f4\u591a\u5e2e\u52a9",level:2}],d={toc:s},m="wrapper";function y(e){let{components:a,...n}=e;return(0,p.yg)(m,(0,r.A)({},d,n,{components:a,mdxType:"MDXLayout"}),(0,p.yg)("h1",{id:"spark-load"},"Spark Load"),(0,p.yg)("p",null,"Spark Load \u901a\u8fc7\u5916\u90e8\u7684 Spark \u8d44\u6e90\u5b9e\u73b0\u5bf9\u5bfc\u5165\u6570\u636e\u7684\u9884\u5904\u7406\uff0c\u63d0\u9ad8 Doris \u5927\u6570\u636e\u91cf\u7684\u5bfc\u5165\u6027\u80fd\u5e76\u4e14\u8282\u7701 Doris \u96c6\u7fa4\u7684\u8ba1\u7b97\u8d44\u6e90\u3002\u4e3b\u8981\u7528\u4e8e\u521d\u6b21\u8fc1\u79fb\uff0c\u5927\u6570\u636e\u91cf\u5bfc\u5165 Doris \u7684\u573a\u666f\u3002"),(0,p.yg)("p",null,"Spark Load \u662f\u5229\u7528\u4e86 Spark \u96c6\u7fa4\u7684\u8d44\u6e90\u5bf9\u8981\u5bfc\u5165\u7684\u6570\u636e\u7684\u8fdb\u884c\u4e86\u6392\u5e8f\uff0cDoris BE \u76f4\u63a5\u5199\u6587\u4ef6\uff0c\u8fd9\u6837\u80fd\u5927\u5927\u964d\u4f4e Doris \u96c6\u7fa4\u7684\u8d44\u6e90\u4f7f\u7528\uff0c\u5bf9\u4e8e\u5386\u53f2\u6d77\u91cf\u6570\u636e\u8fc1\u79fb\u964d\u4f4e Doris \u96c6\u7fa4\u8d44\u6e90\u4f7f\u7528\u53ca\u8d1f\u8f7d\u6709\u5f88\u597d\u7684\u6548\u679c\u3002"),(0,p.yg)("p",null,"\u5982\u679c\u7528\u6237\u5728\u6ca1\u6709 Spark \u96c6\u7fa4\u8fd9\u79cd\u8d44\u6e90\u7684\u60c5\u51b5\u4e0b\uff0c\u53c8\u60f3\u65b9\u4fbf\u3001\u5feb\u901f\u7684\u5b8c\u6210\u5916\u90e8\u5b58\u50a8\u5386\u53f2\u6570\u636e\u7684\u8fc1\u79fb\uff0c\u53ef\u4ee5\u4f7f\u7528 ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD"},"Broker Load")," \u3002\u76f8\u5bf9 Spark Load \u5bfc\u5165\uff0cBroker Load \u5bf9 Doris \u96c6\u7fa4\u7684\u8d44\u6e90\u5360\u7528\u4f1a\u66f4\u9ad8\u3002"),(0,p.yg)("p",null,"Spark Load \u662f\u4e00\u79cd\u5f02\u6b65\u5bfc\u5165\u65b9\u5f0f\uff0c\u7528\u6237\u9700\u8981\u901a\u8fc7 MySQL \u534f\u8bae\u521b\u5efa Spark \u7c7b\u578b\u5bfc\u5165\u4efb\u52a1\uff0c\u5e76\u901a\u8fc7 ",(0,p.yg)("inlineCode",{parentName:"p"},"SHOW LOAD")," \u67e5\u770b\u5bfc\u5165\u7ed3\u679c\u3002"),(0,p.yg)("h2",{id:"\u9002\u7528\u573a\u666f"},"\u9002\u7528\u573a\u666f"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},"\u6e90\u6570\u636e\u5728 Spark \u53ef\u4ee5\u8bbf\u95ee\u7684\u5b58\u50a8\u7cfb\u7edf\u4e2d\uff0c\u5982 HDFS\u3002"),(0,p.yg)("li",{parentName:"ul"},"\u6570\u636e\u91cf\u5728 \u51e0\u5341 GB \u5230 TB \u7ea7\u522b\u3002")),(0,p.yg)("h2",{id:"\u57fa\u672c\u539f\u7406"},"\u57fa\u672c\u539f\u7406"),(0,p.yg)("h3",{id:"\u57fa\u672c\u6d41\u7a0b"},"\u57fa\u672c\u6d41\u7a0b"),(0,p.yg)("p",null,"\u7528\u6237\u901a\u8fc7 MySQL \u5ba2\u6237\u7aef\u63d0\u4ea4 Spark \u7c7b\u578b\u5bfc\u5165\u4efb\u52a1\uff0cFE \u8bb0\u5f55\u5143\u6570\u636e\u5e76\u8fd4\u56de\u7528\u6237\u63d0\u4ea4\u6210\u529f\u3002"),(0,p.yg)("p",null,"Spark Load \u4efb\u52a1\u7684\u6267\u884c\u4e3b\u8981\u5206\u4e3a\u4ee5\u4e0b 5 \u4e2a\u9636\u6bb5\u3002"),(0,p.yg)("ol",null,(0,p.yg)("li",{parentName:"ol"},"FE \u8c03\u5ea6\u63d0\u4ea4 ETL \u4efb\u52a1\u5230 Spark \u96c6\u7fa4\u6267\u884c\u3002"),(0,p.yg)("li",{parentName:"ol"},"Spark \u96c6\u7fa4\u6267\u884c ETL \u5b8c\u6210\u5bf9\u5bfc\u5165\u6570\u636e\u7684\u9884\u5904\u7406\uff0c\u5305\u62ec\u5168\u5c40\u5b57\u5178\u6784\u5efa\uff08 Bitmap \u7c7b\u578b\uff09\u3001\u5206\u533a\u3001\u6392\u5e8f\u3001\u805a\u5408\u7b49\u3002"),(0,p.yg)("li",{parentName:"ol"},"ETL \u4efb\u52a1\u5b8c\u6210\u540e\uff0cFE \u83b7\u53d6\u9884\u5904\u7406\u8fc7\u7684\u6bcf\u4e2a\u5206\u7247\u7684\u6570\u636e\u8def\u5f84\uff0c\u5e76\u8c03\u5ea6\u76f8\u5173\u7684 BE \u6267\u884c Push \u4efb\u52a1\u3002"),(0,p.yg)("li",{parentName:"ol"},"BE \u901a\u8fc7 Broker \u8bfb\u53d6\u6570\u636e\uff0c\u8f6c\u5316\u4e3a Doris \u5e95\u5c42\u5b58\u50a8\u683c\u5f0f\u3002"),(0,p.yg)("li",{parentName:"ol"},"FE \u8c03\u5ea6\u751f\u6548\u7248\u672c\uff0c\u5b8c\u6210\u5bfc\u5165\u4efb\u52a1\u3002")),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-text"}," +\n | 0. User create spark load job\n +----v----+\n | FE |---------------------------------+\n +----+----+ |\n | 3. FE send push tasks |\n | 5. FE publish version |\n +------------+------------+ |\n | | | |\n+---v---+ +---v---+ +---v---+ |\n| BE | | BE | | BE | |1. FE submit Spark ETL job\n+---^---+ +---^---+ +---^---+ |\n |4. BE push with broker | |\n+---+---+ +---+---+ +---+---+ |\n|Broker | |Broker | |Broker | |\n+---^---+ +---^---+ +---^---+ |\n | | | |\n+---+------------+------------+---+ 2.ETL +-------------v---------------+\n| HDFS +-------\x3e Spark cluster |\n| <-------+ |\n+---------------------------------+ +-----------------------------+\n")),(0,p.yg)("h2",{id:"\u5168\u5c40\u5b57\u5178"},"\u5168\u5c40\u5b57\u5178"),(0,p.yg)("h3",{id:"\u9002\u7528\u573a\u666f-1"},"\u9002\u7528\u573a\u666f"),(0,p.yg)("p",null,"\u76ee\u524d Doris \u4e2d Bitmap \u5217\u662f\u4f7f\u7528\u7c7b\u5e93 ",(0,p.yg)("inlineCode",{parentName:"p"},"Roaringbitmap")," \u5b9e\u73b0\u7684\uff0c\u800c ",(0,p.yg)("inlineCode",{parentName:"p"},"Roaringbitmap")," \u7684\u8f93\u5165\u6570\u636e\u7c7b\u578b\u53ea\u80fd\u662f\u6574\u578b\uff0c\u56e0\u6b64\u5982\u679c\u8981\u5728\u5bfc\u5165\u6d41\u7a0b\u4e2d\u5b9e\u73b0\u5bf9\u4e8e Bitmap \u5217\u7684\u9884\u8ba1\u7b97\uff0c\u90a3\u4e48\u5c31\u9700\u8981\u5c06\u8f93\u5165\u6570\u636e\u7684\u7c7b\u578b\u8f6c\u6362\u6210\u6574\u578b\u3002"),(0,p.yg)("p",null,"\u5728 Doris \u73b0\u6709\u7684\u5bfc\u5165\u6d41\u7a0b\u4e2d\uff0c\u5168\u5c40\u5b57\u5178\u7684\u6570\u636e\u7ed3\u6784\u662f\u57fa\u4e8e Hive \u8868\u5b9e\u73b0\u7684\uff0c\u4fdd\u5b58\u4e86\u539f\u59cb\u503c\u5230\u7f16\u7801\u503c\u7684\u6620\u5c04\u3002"),(0,p.yg)("h3",{id:"\u6784\u5efa\u6d41\u7a0b"},"\u6784\u5efa\u6d41\u7a0b"),(0,p.yg)("ol",null,(0,p.yg)("li",{parentName:"ol"},"\u8bfb\u53d6\u4e0a\u6e38\u6570\u636e\u6e90\u7684\u6570\u636e\uff0c\u751f\u6210\u4e00\u5f20 Hive \u4e34\u65f6\u8868\uff0c\u8bb0\u4e3a ",(0,p.yg)("inlineCode",{parentName:"li"},"hive_table"),"\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u4ece ",(0,p.yg)("inlineCode",{parentName:"li"},"hive_table "),"\u4e2d\u62bd\u53d6\u5f85\u53bb\u91cd\u5b57\u6bb5\u7684\u53bb\u91cd\u503c\uff0c\u751f\u6210\u4e00\u5f20\u65b0\u7684 Hive \u8868\uff0c\u8bb0\u4e3a ",(0,p.yg)("inlineCode",{parentName:"li"},"distinct_value_table"),"\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u65b0\u5efa\u4e00\u5f20\u5168\u5c40\u5b57\u5178\u8868\uff0c\u8bb0\u4e3a ",(0,p.yg)("inlineCode",{parentName:"li"},"dict_table")," \uff0c\u4e00\u5217\u4e3a\u539f\u59cb\u503c\uff0c\u4e00\u5217\u4e3a\u7f16\u7801\u540e\u7684\u503c\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u5c06 ",(0,p.yg)("inlineCode",{parentName:"li"},"distinct_value_table")," \u4e0e ",(0,p.yg)("inlineCode",{parentName:"li"},"dict_table")," \u505a Left Join\uff0c\u8ba1\u7b97\u51fa\u65b0\u589e\u7684\u53bb\u91cd\u503c\u96c6\u5408\uff0c\u7136\u540e\u5bf9\u8fd9\u4e2a\u96c6\u5408\u4f7f\u7528\u7a97\u53e3\u51fd\u6570\u8fdb\u884c\u7f16\u7801\uff0c\u6b64\u65f6\u53bb\u91cd\u5217\u539f\u59cb\u503c\u5c31\u591a\u4e86\u4e00\u5217\u7f16\u7801\u540e\u7684\u503c\uff0c\u6700\u540e\u5c06\u8fd9\u4e24\u5217\u7684\u6570\u636e\u5199\u56de ",(0,p.yg)("inlineCode",{parentName:"li"},"dict_table"),"\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u5c06 ",(0,p.yg)("inlineCode",{parentName:"li"},"dict_table "),"\u4e0e ",(0,p.yg)("inlineCode",{parentName:"li"},"hive_table")," \u8fdb\u884c Join\uff0c\u5b8c\u6210 ",(0,p.yg)("inlineCode",{parentName:"li"},"hive_table")," \u4e2d\u539f\u59cb\u503c\u66ff\u6362\u6210\u6574\u578b\u7f16\u7801\u503c\u7684\u5de5\u4f5c\u3002"),(0,p.yg)("li",{parentName:"ol"},(0,p.yg)("inlineCode",{parentName:"li"},"hive_table "),"\u4f1a\u88ab\u4e0b\u4e00\u6b65\u6570\u636e\u9884\u5904\u7406\u7684\u6d41\u7a0b\u6240\u8bfb\u53d6\uff0c\u7ecf\u8fc7\u8ba1\u7b97\u540e\u5bfc\u5165\u5230 Doris \u4e2d\u3002")),(0,p.yg)("h2",{id:"\u6570\u636e\u9884\u5904\u7406dpp"},"\u6570\u636e\u9884\u5904\u7406\uff08DPP\uff09"),(0,p.yg)("h3",{id:"\u57fa\u672c\u6d41\u7a0b-1"},"\u57fa\u672c\u6d41\u7a0b"),(0,p.yg)("ol",null,(0,p.yg)("li",{parentName:"ol"},"\u4ece\u6570\u636e\u6e90\u8bfb\u53d6\u6570\u636e\uff0c\u4e0a\u6e38\u6570\u636e\u6e90\u53ef\u4ee5\u662f HDFS \u6587\u4ef6\uff0c\u4e5f\u53ef\u4ee5\u662f Hive \u8868\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u5bf9\u8bfb\u53d6\u5230\u7684\u6570\u636e\u8fdb\u884c\u5b57\u6bb5\u6620\u5c04\uff0c\u8868\u8fbe\u5f0f\u8ba1\u7b97\u4ee5\u53ca\u6839\u636e\u5206\u533a\u4fe1\u606f\u751f\u6210\u5206\u6876\u5b57\u6bb5 ",(0,p.yg)("inlineCode",{parentName:"li"},"bucket_id"),"\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u6839\u636e Doris \u8868\u7684 Rollup \u5143\u6570\u636e\u751f\u6210 RollupTree\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u904d\u5386 RollupTree\uff0c\u8fdb\u884c\u5206\u5c42\u7684\u805a\u5408\u64cd\u4f5c\uff0c\u4e0b\u4e00\u4e2a\u5c42\u7ea7\u7684 Rollup \u53ef\u4ee5\u7531\u4e0a\u4e00\u4e2a\u5c42\u7684 Rollup \u8ba1\u7b97\u5f97\u6765\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u6bcf\u6b21\u5b8c\u6210\u805a\u5408\u8ba1\u7b97\u540e\uff0c\u4f1a\u5bf9\u6570\u636e\u6839\u636e ",(0,p.yg)("inlineCode",{parentName:"li"},"bucket_id "),"\u8fdb\u884c\u5206\u6876\u7136\u540e\u5199\u5165 HDFS \u4e2d\u3002"),(0,p.yg)("li",{parentName:"ol"},"\u540e\u7eed Broker \u4f1a\u62c9\u53d6 HDFS \u4e2d\u7684\u6587\u4ef6\u7136\u540e\u5bfc\u5165 Doris Be \u4e2d\u3002")),(0,p.yg)("h2",{id:"hive-bitmap-udf"},"Hive Bitmap UDF"),(0,p.yg)("p",null,"Spark \u652f\u6301\u5c06 Hive \u751f\u6210\u7684 Bitmap \u6570\u636e\u76f4\u63a5\u5bfc\u5165\u5230 Doris\u3002\u8be6\u89c1 ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/ecosystem/hive-bitmap-udf"},"hive-bitmap-udf \u6587\u6863")),(0,p.yg)("h2",{id:"\u57fa\u672c\u64cd\u4f5c"},"\u57fa\u672c\u64cd\u4f5c"),(0,p.yg)("h3",{id:"\u914d\u7f6e-etl-\u96c6\u7fa4"},"\u914d\u7f6e ETL \u96c6\u7fa4"),(0,p.yg)("p",null,"Spark \u4f5c\u4e3a\u4e00\u79cd\u5916\u90e8\u8ba1\u7b97\u8d44\u6e90\u5728 Doris \u4e2d\u7528\u6765\u5b8c\u6210 ETL \u5de5\u4f5c\uff0c\u672a\u6765\u53ef\u80fd\u8fd8\u6709\u5176\u4ed6\u7684\u5916\u90e8\u8d44\u6e90\u4f1a\u52a0\u5165\u5230 Doris \u4e2d\u4f7f\u7528\uff0c\u5982 Spark/GPU \u7528\u4e8e\u67e5\u8be2\uff0cHDFS/S3 \u7528\u4e8e\u5916\u90e8\u5b58\u50a8\uff0cMapReduce \u7528\u4e8e ETL \u7b49\uff0c\u56e0\u6b64\u6211\u4eec\u5f15\u5165 Resource Management \u6765\u7ba1\u7406 Doris \u4f7f\u7528\u7684\u8fd9\u4e9b\u5916\u90e8\u8d44\u6e90\u3002"),(0,p.yg)("p",null,"\u63d0\u4ea4 Spark \u5bfc\u5165\u4efb\u52a1\u4e4b\u524d\uff0c\u9700\u8981\u914d\u7f6e\u6267\u884c ETL \u4efb\u52a1\u7684 Spark \u96c6\u7fa4\u3002"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'-- create spark resource\nCREATE EXTERNAL RESOURCE resource_name\nPROPERTIES\n(\n type = spark,\n spark_conf_key = spark_conf_value,\n working_dir = path,\n broker = broker_name,\n broker.property_key = property_value,\n broker.hadoop.security.authentication = kerberos,\n broker.kerberos_principal = doris@YOUR.COM,\n broker.kerberos_keytab = /home/doris/my.keytab\n broker.kerberos_keytab_content = ASDOWHDLAWIDJHWLDKSALDJSDIWALD\n)\n\n-- drop spark resource\nDROP RESOURCE resource_name\n\n-- show resources\nSHOW RESOURCES\nSHOW PROC "/resources"\n\n-- privileges\nGRANT USAGE_PRIV ON RESOURCE resource_name TO user_identity\nGRANT USAGE_PRIV ON RESOURCE resource_name TO ROLE role_name\n\nREVOKE USAGE_PRIV ON RESOURCE resource_name FROM user_identity\nREVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name\n')),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u521b\u5efa\u8d44\u6e90")),(0,p.yg)("p",null,(0,p.yg)("inlineCode",{parentName:"p"},"resource_name")," \u4e3a Doris \u4e2d\u914d\u7f6e\u7684 Spark \u8d44\u6e90\u7684\u540d\u5b57\u3002"),(0,p.yg)("p",null,(0,p.yg)("inlineCode",{parentName:"p"},"PROPERTIES")," \u662f Spark \u8d44\u6e90\u76f8\u5173\u53c2\u6570\uff0c\u5982\u4e0b\uff1a"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"type"),"\uff1a\u8d44\u6e90\u7c7b\u578b\uff0c\u5fc5\u586b\uff0c\u76ee\u524d\u4ec5\u652f\u6301 Spark\u3002"),(0,p.yg)("li",{parentName:"ul"},"Spark \u76f8\u5173\u53c2\u6570\u5982\u4e0b\uff1a",(0,p.yg)("ul",{parentName:"li"},(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.master"),": \u5fc5\u586b\uff0c\u76ee\u524d\u652f\u6301 Yarn\uff0cSpark://host:port\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.submit.deployMode"),": Spark \u7a0b\u5e8f\u7684\u90e8\u7f72\u6a21\u5f0f\uff0c\u5fc5\u586b\uff0c\u652f\u6301 Cluster\u3001Client \u4e24\u79cd\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.fs.defaultFS"),": Master \u4e3a Yarn \u65f6\u5fc5\u586b\u3002"))),(0,p.yg)("li",{parentName:"ul"},"YARN RM \u76f8\u5173\u53c2\u6570\u5982\u4e0b\uff1a",(0,p.yg)("ul",{parentName:"li"},(0,p.yg)("li",{parentName:"ul"},"\u5982\u679c Spark \u4e3a\u5355\u70b9 RM\uff0c\u5219\u9700\u8981\u914d\u7f6e",(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.yarn.resourcemanager.address"),"\uff0c\u8868\u793a\u5355\u70b9 ResourceManager \u5730\u5740\u3002"),(0,p.yg)("li",{parentName:"ul"},"\u5982\u679c Spark \u4e3a RM-HA\uff0c\u5219\u9700\u8981\u914d\u7f6e\uff08\u5176\u4e2d hostname \u548c address \u4efb\u9009\u4e00\u4e2a\u914d\u7f6e\uff09\uff1a",(0,p.yg)("ul",{parentName:"li"},(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.yarn.resourcemanager.ha.enabled"),": ResourceManager \u542f\u7528 HA\uff0c\u8bbe\u7f6e\u4e3a True\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.yarn.resourcemanager.ha.rm-ids"),": ResourceManager \u903b\u8f91 ID \u5217\u8868\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.yarn.resourcemanager.hostname.rm-id"),": \u5bf9\u4e8e\u6bcf\u4e2a rm-id\uff0c\u6307\u5b9a ResourceManager \u5bf9\u5e94\u7684\u4e3b\u673a\u540d\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.yarn.resourcemanager.address.rm-id"),": \u5bf9\u4e8e\u6bcf\u4e2a rm-id\uff0c\u6307\u5b9a host:port \u4ee5\u4f9b\u5ba2\u6237\u7aef\u63d0\u4ea4\u4f5c\u4e1a\u3002"))))),(0,p.yg)("li",{parentName:"ul"},"HDFS HA \u76f8\u5173\u53c2\u6570\u5982\u4e0b\uff1a",(0,p.yg)("ul",{parentName:"li"},(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.fs.defaultFS"),", HDFS \u5ba2\u6237\u7aef\u9ed8\u8ba4\u8def\u5f84\u524d\u7f00"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.dfs.nameservices"),", HDFS \u96c6\u7fa4\u903b\u8f91\u540d\u79f0"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.dfs.ha.namenodes.nameservices01")," , nameservice \u4e2d\u6bcf\u4e2a NameNode \u7684\u552f\u4e00\u6807\u8bc6\u7b26"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.dfs.namenode.rpc-address.nameservices01.mynamenode1"),", \u6bcf\u4e2a NameNode \u7684\u5b8c\u5168\u9650\u5b9a\u7684 RPC \u5730\u5740"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.dfs.namenode.rpc-address.nameservices01.mynamenode2"),", \u6bcf\u4e2a NameNode \u7684\u5b8c\u5168\u9650\u5b9a\u7684 RPC \u5730\u5740"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"spark.hadoop.dfs.client.failover.proxy.provider")," = ",(0,p.yg)("inlineCode",{parentName:"li"},"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"),", \u8bbe\u7f6e\u5b9e\u73b0\u7c7b"))),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"working_dir"),": ETL \u4f7f\u7528\u7684\u76ee\u5f55\u3002Spark \u4f5c\u4e3a ETL \u8d44\u6e90\u4f7f\u7528\u65f6\u5fc5\u586b\u3002\u4f8b\u5982\uff1ahdfs://host:port/tmp/doris\u3002",(0,p.yg)("ul",{parentName:"li"},(0,p.yg)("li",{parentName:"ul"},"\u5176\u4ed6\u53c2\u6570\u4e3a\u53ef\u9009\uff0c\u53c2\u8003 ",(0,p.yg)("a",{parentName:"li",href:"http://spark.apache.org/docs/latest/configuration.html"},"http://spark.apache.org/docs/latest/configuration.html")))),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"working_dir"),": ETL \u4f7f\u7528\u7684\u76ee\u5f55\u3002Spark \u4f5c\u4e3a ETL \u8d44\u6e90\u4f7f\u7528\u65f6\u5fc5\u586b\u3002\u4f8b\u5982\uff1ahdfs://host:port/tmp/doris\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.hadoop.security.authentication"),"\uff1a\u6307\u5b9a\u8ba4\u8bc1\u65b9\u5f0f\u4e3a Kerberos\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.kerberos_principal"),"\uff1a\u6307\u5b9a Kerberos \u7684 Principal\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.kerberos_keytab"),"\uff1a\u6307\u5b9a Kerberos \u7684 Keytab \u6587\u4ef6\u8def\u5f84\u3002\u8be5\u6587\u4ef6\u5fc5\u987b\u4e3a Broker \u8fdb\u7a0b\u6240\u5728\u670d\u52a1\u5668\u4e0a\u7684\u6587\u4ef6\u7684\u7edd\u5bf9\u8def\u5f84\uff0c\u5e76\u4e14\u53ef\u4ee5\u88ab Broker \u8fdb\u7a0b\u8bbf\u95ee\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.kerberos_keytab_content"),"\uff1a\u6307\u5b9a Kerberos \u4e2d Keytab \u6587\u4ef6\u5185\u5bb9\u7ecf\u8fc7 Base64 \u7f16\u7801\u4e4b\u540e\u7684\u5185\u5bb9\u3002\u8fd9\u4e2a\u8ddf ",(0,p.yg)("inlineCode",{parentName:"li"},"broker.kerberos_keytab")," \u914d\u7f6e\u4e8c\u9009\u4e00\u5373\u53ef\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker"),": Broker \u540d\u5b57\u3002Spark \u4f5c\u4e3a ETL \u8d44\u6e90\u4f7f\u7528\u65f6\u5fc5\u586b\u3002\u9700\u8981\u4f7f\u7528 ",(0,p.yg)("inlineCode",{parentName:"li"},"ALTER SYSTEM ADD BROKER")," \u547d\u4ee4\u63d0\u524d\u5b8c\u6210\u914d\u7f6e\u3002",(0,p.yg)("ul",{parentName:"li"},(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.property_key"),": Broker \u8bfb\u53d6 ETL \u751f\u6210\u7684\u4e2d\u95f4\u6587\u4ef6\u65f6\u9700\u8981\u6307\u5b9a\u7684\u8ba4\u8bc1\u4fe1\u606f\u7b49\u3002"))),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"env"),": \u6307\u5b9a Spark \u73af\u5883\u53d8\u91cf,\u652f\u6301\u52a8\u6001\u8bbe\u7f6e,\u6bd4\u5982\u5f53\u8ba4\u8bc1 Hadoop \u8ba4\u4e3a\u65b9\u5f0f\u4e3a Simple \u65f6\uff0c\u8bbe\u7f6e Hadoop \u7528\u6237\u540d\u548c\u5bc6\u7801",(0,p.yg)("ul",{parentName:"li"},(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"env.HADOOP_USER_NAME"),": \u8bbf\u95ee Hadoop \u7528\u6237\u540d"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"env.HADOOP_USER_PASSWORD"),":\u5bc6\u7801")))),(0,p.yg)("p",null,"\u793a\u4f8b\uff1a"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'-- yarn cluster \u6a21\u5f0f\nCREATE EXTERNAL RESOURCE "spark0"\nPROPERTIES\n(\n "type" = "spark",\n "spark.master" = "yarn",\n "spark.submit.deployMode" = "cluster",\n "spark.jars" = "xxx.jar,yyy.jar",\n "spark.files" = "/tmp/aaa,/tmp/bbb",\n "spark.executor.memory" = "1g",\n "spark.yarn.queue" = "queue0",\n "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999",\n "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000",\n "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",\n "broker" = "broker0",\n "broker.username" = "user0",\n "broker.password" = "password0"\n);\n\n-- spark standalone client \u6a21\u5f0f\nCREATE EXTERNAL RESOURCE "spark1"\nPROPERTIES\n(\n "type" = "spark",\n "spark.master" = "spark://127.0.0.1:7777",\n "spark.submit.deployMode" = "client",\n "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",\n "broker" = "broker1"\n);\n\n-- yarn HA \u6a21\u5f0f\nCREATE EXTERNAL RESOURCE sparkHA\nPROPERTIES\n(\n "type" = "spark",\n "spark.master" = "yarn",\n "spark.submit.deployMode" = "cluster",\n "spark.executor.memory" = "1g",\n "spark.yarn.queue" = "default",\n "spark.hadoop.yarn.resourcemanager.ha.enabled" = "true",\n "spark.hadoop.yarn.resourcemanager.ha.rm-ids" = "rm1,rm2",\n "spark.hadoop.yarn.resourcemanager.address.rm1" = "xxxx:8032",\n "spark.hadoop.yarn.resourcemanager.address.rm2" = "xxxx:8032",\n "spark.hadoop.fs.defaultFS" = "hdfs://nameservices01",\n "spark.hadoop.dfs.nameservices" = "nameservices01",\n "spark.hadoop.dfs.ha.namenodes.nameservices01" = "mynamenode1,mynamenode2",\n "spark.hadoop.dfs.namenode.rpc-address.nameservices01.mynamenode1" = "xxxx:8020",\n "spark.hadoop.dfs.namenode.rpc-address.nameservices01.mynamenode2" = "xxxx:8020",\n "spark.hadoop.dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",\n "working_dir" = "hdfs://nameservices01/doris_prd_data/sinan/spark_load/",\n "broker" = "broker_name",\n "broker.username" = "username",\n "broker.password" = "",\n "broker.dfs.nameservices" = "nameservices01",\n "broker.dfs.ha.namenodes.HDFS4001273" = "mynamenode1, mynamenode2",\n "broker.dfs.namenode.rpc-address.nameservices01.mynamenode1" = "xxxx:8020",\n "broker.dfs.namenode.rpc-address.nameservices01.mynamenode2" = "xxxx:8020",\n "broker.dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"\n);\n\n')),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"Spark Load \u652f\u6301 Kerberos \u8ba4\u8bc1")),(0,p.yg)("p",null,"\u5982\u679c\u662f Spark Load \u8bbf\u95ee\u5e26\u6709 Kerberos \u8ba4\u8bc1\u7684 Hadoop \u96c6\u7fa4\u8d44\u6e90\uff0c\u6211\u4eec\u53ea\u9700\u8981\u5728\u521b\u5efa Spark resource \u7684\u65f6\u5019\u6307\u5b9a\u4ee5\u4e0b\u53c2\u6570\u5373\u53ef\uff1a"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.hadoop.security.authentication"),"\uff1a\u6307\u5b9a\u8ba4\u8bc1\u65b9\u5f0f\u4e3a Kerberos\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.kerberos_principal"),"\uff1a\u6307\u5b9a Kerberos \u7684 Principal\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.kerberos_keytab"),"\uff1a\u6307\u5b9a Kerberos \u7684 Keytab \u6587\u4ef6\u8def\u5f84\u3002\u8be5\u6587\u4ef6\u5fc5\u987b\u4e3a Broker \u8fdb\u7a0b\u6240\u5728\u670d\u52a1\u5668\u4e0a\u7684\u6587\u4ef6\u7684\u7edd\u5bf9\u8def\u5f84\u3002\u5e76\u4e14\u53ef\u4ee5\u88ab Broker \u8fdb\u7a0b\u8bbf\u95ee\u3002"),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("inlineCode",{parentName:"li"},"broker.kerberos_keytab_content"),"\uff1a\u6307\u5b9a Kerberos \u4e2d Keytab \u6587\u4ef6\u5185\u5bb9\u7ecf\u8fc7 Base64 \u7f16\u7801\u4e4b\u540e\u7684\u5185\u5bb9\u3002\u8fd9\u4e2a\u8ddf ",(0,p.yg)("inlineCode",{parentName:"li"},"kerberos_keytab")," \u914d\u7f6e\u4e8c\u9009\u4e00\u5373\u53ef\u3002")),(0,p.yg)("p",null,"\u793a\u4f8b\uff1a"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'CREATE EXTERNAL RESOURCE "spark_on_kerberos"\nPROPERTIES\n(\n "type" = "spark",\n "spark.master" = "yarn",\n "spark.submit.deployMode" = "cluster",\n "spark.jars" = "xxx.jar,yyy.jar",\n "spark.files" = "/tmp/aaa,/tmp/bbb",\n "spark.executor.memory" = "1g",\n "spark.yarn.queue" = "queue0",\n "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999",\n "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000",\n "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris",\n "broker" = "broker0",\n "broker.hadoop.security.authentication" = "kerberos",\n "broker.kerberos_principal" = "doris@YOUR.COM",\n "broker.kerberos_keytab" = "/home/doris/my.keytab"\n);\n')),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u67e5\u770b\u8d44\u6e90")),(0,p.yg)("p",null,"\u666e\u901a\u8d26\u6237\u53ea\u80fd\u770b\u5230\u81ea\u5df1\u6709 USAGE_PRIV \u4f7f\u7528\u6743\u9650\u7684\u8d44\u6e90\u3002"),(0,p.yg)("p",null,"Root \u548c Admin \u8d26\u6237\u53ef\u4ee5\u770b\u5230\u6240\u6709\u7684\u8d44\u6e90\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u8d44\u6e90\u6743\u9650")),(0,p.yg)("p",null,"\u8d44\u6e90\u6743\u9650\u901a\u8fc7 GRANT REVOKE \u6765\u7ba1\u7406\uff0c\u76ee\u524d\u4ec5\u652f\u6301 USAGE_PRIV \u4f7f\u7528\u6743\u9650\u3002"),(0,p.yg)("p",null,"\u53ef\u4ee5\u5c06 USAGE_PRIV \u6743\u9650\u8d4b\u4e88\u67d0\u4e2a\u7528\u6237\u6216\u8005\u67d0\u4e2a\u89d2\u8272\uff0c\u89d2\u8272\u7684\u4f7f\u7528\u4e0e\u4e4b\u524d\u4e00\u81f4\u3002"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'-- \u6388\u4e88spark0\u8d44\u6e90\u7684\u4f7f\u7528\u6743\u9650\u7ed9\u7528\u6237user0\nGRANT USAGE_PRIV ON RESOURCE "spark0" TO "user0"@"%";\n\n-- \u6388\u4e88spark0\u8d44\u6e90\u7684\u4f7f\u7528\u6743\u9650\u7ed9\u89d2\u8272role0\nGRANT USAGE_PRIV ON RESOURCE "spark0" TO ROLE "role0";\n\n-- \u6388\u4e88\u6240\u6709\u8d44\u6e90\u7684\u4f7f\u7528\u6743\u9650\u7ed9\u7528\u6237user0\nGRANT USAGE_PRIV ON RESOURCE * TO "user0"@"%";\n\n-- \u6388\u4e88\u6240\u6709\u8d44\u6e90\u7684\u4f7f\u7528\u6743\u9650\u7ed9\u89d2\u8272role0\nGRANT USAGE_PRIV ON RESOURCE * TO ROLE "role0";\n\n-- \u64a4\u9500\u7528\u6237user0\u7684spark0\u8d44\u6e90\u4f7f\u7528\u6743\u9650\nREVOKE USAGE_PRIV ON RESOURCE "spark0" FROM "user0"@"%";\n')),(0,p.yg)("h3",{id:"\u914d\u7f6e-spark-\u5ba2\u6237\u7aef"},"\u914d\u7f6e Spark \u5ba2\u6237\u7aef"),(0,p.yg)("p",null,"FE \u5e95\u5c42\u901a\u8fc7\u6267\u884c spark-submit \u7684\u547d\u4ee4\u53bb\u63d0\u4ea4 Spark \u4efb\u52a1\uff0c\u56e0\u6b64\u9700\u8981\u4e3a FE \u914d\u7f6e Spark \u5ba2\u6237\u7aef\uff0c\u5efa\u8bae\u4f7f\u7528 2.4.5 \u6216\u4ee5\u4e0a\u7684 Spark2 \u5b98\u65b9\u7248\u672c\uff0c",(0,p.yg)("a",{parentName:"p",href:"https://archive.apache.org/dist/spark/"},"Spark \u4e0b\u8f7d\u5730\u5740"),"\uff0c\u4e0b\u8f7d\u5b8c\u6210\u540e\uff0c\u8bf7\u6309\u6b65\u9aa4\u5b8c\u6210\u4ee5\u4e0b\u914d\u7f6e\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u914d\u7f6e SPARK_HOME \u73af\u5883\u53d8\u91cf")),(0,p.yg)("p",null,"\u5c06 Spark \u5ba2\u6237\u7aef\u653e\u5728 FE \u540c\u4e00\u53f0\u673a\u5668\u4e0a\u7684\u76ee\u5f55\u4e0b\uff0c\u5e76\u5728 FE \u7684\u914d\u7f6e\u6587\u4ef6\u914d\u7f6e ",(0,p.yg)("inlineCode",{parentName:"p"},"spark_home_default_dir")," \u9879\u6307\u5411\u6b64\u76ee\u5f55\uff0c\u6b64\u914d\u7f6e\u9879\u9ed8\u8ba4\u4e3a FE \u6839\u76ee\u5f55\u4e0b\u7684 ",(0,p.yg)("inlineCode",{parentName:"p"},"lib/spark2x ")," \u8def\u5f84\uff0c\u6b64\u9879\u4e0d\u53ef\u4e3a\u7a7a\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u914d\u7f6e Spark \u4f9d\u8d56\u5305")),(0,p.yg)("p",null,"\u5c06 Spark \u5ba2\u6237\u7aef\u4e0b\u7684 jars \u6587\u4ef6\u5939\u5185\u6240\u6709 jar \u5305\u5f52\u6863\u6253\u5305\u6210\u4e00\u4e2a Zip \u6587\u4ef6\uff0c\u5e76\u5728 FE \u7684\u914d\u7f6e\u6587\u4ef6\u914d\u7f6e ",(0,p.yg)("inlineCode",{parentName:"p"},"spark_resource_path")," \u9879\u6307\u5411\u6b64 Zip \u6587\u4ef6\uff0c\u82e5\u6b64\u914d\u7f6e\u9879\u4e3a\u7a7a\uff0c\u5219FE\u4f1a\u5c1d\u8bd5\u5bfb\u627e FE \u6839\u76ee\u5f55\u4e0b\u7684 ",(0,p.yg)("inlineCode",{parentName:"p"},"lib/spark2x/jars/spark-2x.zip")," \u6587\u4ef6\uff0c\u82e5\u6ca1\u6709\u627e\u5230\u5219\u4f1a\u62a5\u6587\u4ef6\u4e0d\u5b58\u5728\u7684\u9519\u8bef\u3002"),(0,p.yg)("p",null,"\u5f53\u63d0\u4ea4 Spark Load \u4efb\u52a1\u65f6\uff0c\u4f1a\u5c06\u5f52\u6863\u597d\u7684\u4f9d\u8d56\u6587\u4ef6\u4e0a\u4f20\u81f3\u8fdc\u7aef\u4ed3\u5e93\uff0c\u9ed8\u8ba4\u4ed3\u5e93\u8def\u5f84\u6302\u5728 ",(0,p.yg)("inlineCode",{parentName:"p"},"working_dir/{cluster_id}")," \u76ee\u5f55\u4e0b\uff0c\u5e76\u4ee5",(0,p.yg)("inlineCode",{parentName:"p"},"__spark_repository__{resource_name} "),"\u547d\u540d\uff0c\u8868\u793a\u96c6\u7fa4\u5185\u7684\u4e00\u4e2a Resource \u5bf9\u5e94\u4e00\u4e2a\u8fdc\u7aef\u4ed3\u5e93\uff0c\u8fdc\u7aef\u4ed3\u5e93\u76ee\u5f55\u7ed3\u6784\u53c2\u8003\u5982\u4e0b:"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-text"},"__spark_repository__spark0/\n |-__archive_1.0.0/\n | |-__lib_990325d2c0d1d5e45bf675e54e44fb16_spark-dpp-1.0.0-jar-with-dependencies.jar\n | |-__lib_7670c29daf535efe3c9b923f778f61fc_spark-2x.zip\n |-__archive_1.1.0/\n | |-__lib_64d5696f99c379af2bee28c1c84271d5_spark-dpp-1.1.0-jar-with-dependencies.jar\n | |-__lib_1bbb74bb6b264a270bc7fca3e964160f_spark-2x.zip\n |-__archive_1.2.0/\n | |-...\n")),(0,p.yg)("p",null,"\u9664\u4e86 Spark \u4f9d\u8d56(\u9ed8\u8ba4\u4ee5 ",(0,p.yg)("inlineCode",{parentName:"p"},"spark-2x.zip "),"\u547d\u540d)\uff0cFE \u8fd8\u4f1a\u4e0a\u4f20 DPP \u7684\u4f9d\u8d56\u5305\u81f3\u8fdc\u7aef\u4ed3\u5e93\uff0c\u82e5\u6b64\u6b21 Spark Load \u63d0\u4ea4\u7684\u6240\u6709\u4f9d\u8d56\u6587\u4ef6\u90fd\u5df2\u5b58\u5728\u8fdc\u7aef\u4ed3\u5e93\uff0c\u90a3\u4e48\u5c31\u4e0d\u9700\u8981\u5728\u4e0a\u4f20\u4f9d\u8d56\uff0c\u7701\u4e0b\u539f\u6765\u6bcf\u6b21\u91cd\u590d\u4e0a\u4f20\u5927\u91cf\u6587\u4ef6\u7684\u65f6\u95f4\u3002"),(0,p.yg)("h3",{id:"\u914d\u7f6e-yarn-\u5ba2\u6237\u7aef"},"\u914d\u7f6e Yarn \u5ba2\u6237\u7aef"),(0,p.yg)("p",null,"FE \u5e95\u5c42\u901a\u8fc7\u6267\u884c Yarn \u547d\u4ee4\u53bb\u83b7\u53d6\u6b63\u5728\u8fd0\u884c\u7684 Application \u7684\u72b6\u6001\u4ee5\u53ca\u6740\u6b7b Application\uff0c\u56e0\u6b64\u9700\u8981\u4e3a FE \u914d\u7f6e Yarn \u5ba2\u6237\u7aef\uff0c\u5efa\u8bae\u4f7f\u7528 2.5.2 \u6216\u4ee5\u4e0a\u7684 Hadoop2 \u5b98\u65b9\u7248\u672c\uff0c",(0,p.yg)("a",{parentName:"p",href:"https://archive.apache.org/dist/hadoop/common/"},"Hadoop \u4e0b\u8f7d\u5730\u5740")," \uff0c\u4e0b\u8f7d\u5b8c\u6210\u540e\uff0c\u8bf7\u6309\u6b65\u9aa4\u5b8c\u6210\u4ee5\u4e0b\u914d\u7f6e\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u914d\u7f6e Yarn \u53ef\u6267\u884c\u6587\u4ef6\u8def\u5f84")),(0,p.yg)("p",null,"\u5c06\u4e0b\u8f7d\u597d\u7684 Yarn \u5ba2\u6237\u7aef\u653e\u5728 FE \u540c\u4e00\u53f0\u673a\u5668\u7684\u76ee\u5f55\u4e0b\uff0c\u5e76\u5728 FE \u914d\u7f6e\u6587\u4ef6\u914d\u7f6e ",(0,p.yg)("inlineCode",{parentName:"p"},"yarn_client_path")," \u9879\u6307\u5411 Yarn \u7684\u4e8c\u8fdb\u5236\u53ef\u6267\u884c\u6587\u4ef6\uff0c\u9ed8\u8ba4\u4e3a FE \u6839\u76ee\u5f55\u4e0b\u7684 ",(0,p.yg)("inlineCode",{parentName:"p"},"lib/yarn-client/hadoop/bin/yarn")," \u8def\u5f84\u3002"),(0,p.yg)("p",null,"(\u53ef\u9009) \u5f53 FE \u901a\u8fc7 Yarn \u5ba2\u6237\u7aef\u53bb\u83b7\u53d6 Application \u7684\u72b6\u6001\u6216\u8005\u6740\u6b7b Application \u65f6\uff0c\u9ed8\u8ba4\u4f1a\u5728 FE \u6839\u76ee\u5f55\u4e0b\u7684 ",(0,p.yg)("inlineCode",{parentName:"p"},"lib/yarn-config")," \u8def\u5f84\u4e0b\u751f\u6210\u6267\u884c Yarn \u547d\u4ee4\u6240\u9700\u7684\u914d\u7f6e\u6587\u4ef6\uff0c\u6b64\u8def\u5f84\u53ef\u901a\u8fc7\u5728 FE \u914d\u7f6e\u6587\u4ef6\u914d\u7f6e ",(0,p.yg)("inlineCode",{parentName:"p"},"yarn_config_dir")," \u9879\u4fee\u6539\uff0c\u76ee\u524d\u751f\u6210\u7684\u914d\u7f6e\u6587\u4ef6\u5305\u62ec ",(0,p.yg)("inlineCode",{parentName:"p"},"core-site.xml")," \u548c",(0,p.yg)("inlineCode",{parentName:"p"},"yarn-site.xml"),"\u3002"),(0,p.yg)("h3",{id:"\u521b\u5efa\u5bfc\u5165"},"\u521b\u5efa\u5bfc\u5165"),(0,p.yg)("p",null,"\u8bed\u6cd5\uff1a"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},"LOAD LABEL load_label\n (data_desc, ...)\n WITH RESOURCE resource_name \n [resource_properties]\n [PROPERTIES (key1=value1, ... )]\n\n* load_label:\n db_name.label_name\n\n* data_desc:\n DATA INFILE ('file_path', ...)\n [NEGATIVE]\n INTO TABLE tbl_name\n [PARTITION (p1, p2)]\n [COLUMNS TERMINATED BY separator ]\n [(col1, ...)]\n [COLUMNS FROM PATH AS (col2, ...)]\n [SET (k1=f1(xx), k2=f2(xx))]\n [WHERE predicate]\n \n DATA FROM TABLE hive_external_tbl\n [NEGATIVE]\n INTO TABLE tbl_name\n [PARTITION (p1, p2)]\n [SET (k1=f1(xx), k2=f2(xx))]\n [WHERE predicate]\n\n* resource_properties:\n (key2=value2, ...)\n")),(0,p.yg)("p",null,"\u793a\u4f8b1\uff1a\u4e0a\u6e38\u6570\u636e\u6e90\u4e3a HDFS \u6587\u4ef6\u7684\u60c5\u51b5"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'LOAD LABEL db1.label1\n(\n DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1")\n INTO TABLE tbl1\n COLUMNS TERMINATED BY ","\n (tmp_c1,tmp_c2)\n SET\n (\n id=tmp_c2,\n name=tmp_c1\n ),\n DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2")\n INTO TABLE tbl2\n COLUMNS TERMINATED BY ","\n (col1, col2)\n where col1 > 1\n)\nWITH RESOURCE \'spark0\'\n(\n "spark.executor.memory" = "2g",\n "spark.shuffle.compress" = "true"\n)\nPROPERTIES\n(\n "timeout" = "3600"\n);\n')),(0,p.yg)("p",null,"\u793a\u4f8b2\uff1a\u4e0a\u6e38\u6570\u636e\u6e90\u662f Hive \u8868\u7684\u60c5\u51b5"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'step 1:\u65b0\u5efa Hive \u5916\u90e8\u8868\nCREATE EXTERNAL TABLE hive_t1\n(\n k1 INT,\n K2 SMALLINT,\n k3 varchar(50),\n uuid varchar(100)\n)\nENGINE=hive\nproperties\n(\n"database" = "tmp",\n"table" = "t1",\n"hive.metastore.uris" = "thrift://0.0.0.0:8080"\n);\n\nstep 2: \u63d0\u4ea4 Load \u547d\u4ee4\uff0c\u8981\u6c42\u5bfc\u5165\u7684 Doris \u8868\u4e2d\u7684\u5217\u5fc5\u987b\u5728 Hive \u5916\u90e8\u8868\u4e2d\u5b58\u5728\u3002\nLOAD LABEL db1.label1\n(\n DATA FROM TABLE hive_t1\n INTO TABLE tbl1\n SET\n (\n uuid=bitmap_dict(uuid)\n )\n)\nWITH RESOURCE \'spark0\'\n(\n "spark.executor.memory" = "2g",\n "spark.shuffle.compress" = "true"\n)\nPROPERTIES\n(\n "timeout" = "3600"\n);\n')),(0,p.yg)("p",null,"\u793a\u4f8b3\uff1a\u4e0a\u6e38\u6570\u636e\u6e90\u662f Hive binary \u7c7b\u578b\u60c5\u51b5"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'step 1:\u65b0\u5efa Hive \u5916\u90e8\u8868\nCREATE EXTERNAL TABLE hive_t1\n(\n k1 INT,\n K2 SMALLINT,\n k3 varchar(50),\n uuid varchar(100) // Hive \u4e2d\u7684\u7c7b\u578b\u4e3a binary\n)\nENGINE=hive\nproperties\n(\n"database" = "tmp",\n"table" = "t1",\n"hive.metastore.uris" = "thrift://0.0.0.0:8080"\n);\n\nstep 2: \u63d0\u4ea4 Load \u547d\u4ee4\uff0c\u8981\u6c42\u5bfc\u5165\u7684 Doris \u8868\u4e2d\u7684\u5217\u5fc5\u987b\u5728 Hive \u5916\u90e8\u8868\u4e2d\u5b58\u5728\u3002\nLOAD LABEL db1.label1\n(\n DATA FROM TABLE hive_t1\n INTO TABLE tbl1\n SET\n (\n uuid=binary_bitmap(uuid)\n )\n)\nWITH RESOURCE \'spark0\'\n(\n "spark.executor.memory" = "2g",\n "spark.shuffle.compress" = "true"\n)\nPROPERTIES\n(\n "timeout" = "3600"\n);\n')),(0,p.yg)("p",null,"\u793a\u4f8b4\uff1a \u5bfc\u5165 Hive \u5206\u533a\u8868\u7684\u6570\u636e"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'--Hive \u5efa\u8868\u8bed\u53e5\ncreate table test_partition(\n id int,\n name string,\n age int\n)\npartitioned by (dt string)\nrow format delimited fields terminated by \',\'\nstored as textfile;\n\n--Doris \u5efa\u8868\u8bed\u53e5\nCREATE TABLE IF NOT EXISTS test_partition_04\n(\n dt date,\n id int,\n name string,\n age int\n)\nUNIQUE KEY(`dt`, `id`)\nDISTRIBUTED BY HASH(`id`) BUCKETS 1\nPROPERTIES (\n "replication_allocation" = "tag.location.default: 1"\n);\n--Spark Load \u8bed\u53e5\nCREATE EXTERNAL RESOURCE "spark_resource"\nPROPERTIES\n(\n"type" = "spark",\n"spark.master" = "yarn",\n"spark.submit.deployMode" = "cluster",\n"spark.executor.memory" = "1g",\n"spark.yarn.queue" = "default",\n"spark.hadoop.yarn.resourcemanager.address" = "localhost:50056",\n"spark.hadoop.fs.defaultFS" = "hdfs://localhost:9000",\n"working_dir" = "hdfs://localhost:9000/tmp/doris",\n"broker" = "broker_01"\n);\nLOAD LABEL demo.test_hive_partition_table_18\n(\n DATA INFILE("hdfs://localhost:9000/user/hive/warehouse/demo.db/test/dt=2022-08-01/*")\n INTO TABLE test_partition_04\n COLUMNS TERMINATED BY ","\n FORMAT AS "csv"\n (id,name,age)\n COLUMNS FROM PATH AS (`dt`)\n SET\n (\n dt=dt,\n id=id,\n name=name,\n age=age\n )\n)\nWITH RESOURCE \'spark_resource\'\n(\n "spark.executor.memory" = "1g",\n "spark.shuffle.compress" = "true"\n)\nPROPERTIES\n(\n "timeout" = "3600"\n);\n')),(0,p.yg)("p",null,"\u521b\u5efa\u5bfc\u5165\u7684\u8be6\u7ec6\u8bed\u6cd5\u6267\u884c ",(0,p.yg)("inlineCode",{parentName:"p"},"HELP SPARK LOAD")," \u67e5\u770b\u8bed\u6cd5\u5e2e\u52a9\u3002\u8fd9\u91cc\u4e3b\u8981\u4ecb\u7ecd Spark Load \u7684\u521b\u5efa\u5bfc\u5165\u8bed\u6cd5\u4e2d\u53c2\u6570\u610f\u4e49\u548c\u6ce8\u610f\u4e8b\u9879\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"Label")),(0,p.yg)("p",null,"\u5bfc\u5165\u4efb\u52a1\u7684\u6807\u8bc6\u3002\u6bcf\u4e2a\u5bfc\u5165\u4efb\u52a1\uff0c\u90fd\u6709\u4e00\u4e2a\u5728\u5355 Database \u5185\u90e8\u552f\u4e00\u7684 Label\u3002\u5177\u4f53\u89c4\u5219\u4e0e ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/data-operate/import/import-way/broker-load-manual"},(0,p.yg)("inlineCode",{parentName:"a"},"Broker Load"))," \u4e00\u81f4\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u6570\u636e\u63cf\u8ff0\u7c7b\u53c2\u6570")),(0,p.yg)("p",null,"\u76ee\u524d\u652f\u6301\u7684\u6570\u636e\u6e90\u6709 CSV \u548c Hive Table\u3002\u5176\u4ed6\u89c4\u5219\u4e0e ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/data-operate/import/import-way/broker-load-manual"},(0,p.yg)("inlineCode",{parentName:"a"},"Broker Load"))," \u4e00\u81f4\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u5bfc\u5165\u4f5c\u4e1a\u53c2\u6570")),(0,p.yg)("p",null,"\u5bfc\u5165\u4f5c\u4e1a\u53c2\u6570\u4e3b\u8981\u6307\u7684\u662f Spark Load \u521b\u5efa\u5bfc\u5165\u8bed\u53e5\u4e2d\u7684\u5c5e\u4e8e ",(0,p.yg)("inlineCode",{parentName:"p"},"opt_properties")," \u90e8\u5206\u7684\u53c2\u6570\u3002\u5bfc\u5165\u4f5c\u4e1a\u53c2\u6570\u662f\u4f5c\u7528\u4e8e\u6574\u4e2a\u5bfc\u5165\u4f5c\u4e1a\u7684\u3002\u89c4\u5219\u4e0e ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/data-operate/import/import-way/broker-load-manual"},(0,p.yg)("inlineCode",{parentName:"a"},"Broker Load"))," \u4e00\u81f4\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"Spark\u8d44\u6e90\u53c2\u6570")),(0,p.yg)("p",null,"Spark \u8d44\u6e90\u9700\u8981\u63d0\u524d\u914d\u7f6e\u5230 Doris \u7cfb\u7edf\u4e2d\u5e76\u4e14\u8d4b\u4e88\u7528\u6237 USAGE_PRIV \u6743\u9650\u540e\u624d\u80fd\u4f7f\u7528 Spark Load\u3002"),(0,p.yg)("p",null,"\u5f53\u7528\u6237\u6709\u4e34\u65f6\u6027\u7684\u9700\u6c42\uff0c\u6bd4\u5982\u589e\u52a0\u4efb\u52a1\u4f7f\u7528\u7684\u8d44\u6e90\u800c\u4fee\u6539 Spark Configs\uff0c\u53ef\u4ee5\u5728\u8fd9\u91cc\u8bbe\u7f6e\uff0c\u8bbe\u7f6e\u4ec5\u5bf9\u672c\u6b21\u4efb\u52a1\u751f\u6548\uff0c\u5e76\u4e0d\u5f71\u54cd Doris \u96c6\u7fa4\u4e2d\u5df2\u6709\u7684\u914d\u7f6e\u3002"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'WITH RESOURCE \'spark0\'\n(\n "spark.driver.memory" = "1g",\n "spark.executor.memory" = "3g"\n)\n')),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u6570\u636e\u6e90\u4e3a Hive \u8868\u65f6\u7684\u5bfc\u5165")),(0,p.yg)("p",null,"\u76ee\u524d\u5982\u679c\u671f\u671b\u5728\u5bfc\u5165\u6d41\u7a0b\u4e2d\u5c06 Hive \u8868\u4f5c\u4e3a\u6570\u636e\u6e90\uff0c\u90a3\u4e48\u9700\u8981\u5148\u65b0\u5efa\u4e00\u5f20\u7c7b\u578b\u4e3a Hive \u7684\u5916\u90e8\u8868\uff0c \u7136\u540e\u63d0\u4ea4\u5bfc\u5165\u547d\u4ee4\u65f6\u6307\u5b9a\u5916\u90e8\u8868\u7684\u8868\u540d\u5373\u53ef\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"},"\u5bfc\u5165\u6d41\u7a0b\u6784\u5efa\u5168\u5c40\u5b57\u5178")),(0,p.yg)("p",null,"\u9002\u7528\u4e8e Doris \u8868\u805a\u5408\u5217\u7684\u6570\u636e\u7c7b\u578b\u4e3a Bitmap \u7c7b\u578b\u3002 \u5728 Load \u547d\u4ee4\u4e2d\u6307\u5b9a\u9700\u8981\u6784\u5efa\u5168\u5c40\u5b57\u5178\u7684\u5b57\u6bb5\u5373\u53ef\uff0c\u683c\u5f0f\u4e3a\uff1a",(0,p.yg)("inlineCode",{parentName:"p"},"Doris \u5b57\u6bb5\u540d\u79f0=bitmap_dict(Hive \u8868\u5b57\u6bb5\u540d\u79f0)")," \u9700\u8981\u6ce8\u610f\u7684\u662f\u76ee\u524d\u53ea\u6709\u5728\u4e0a\u6e38\u6570\u636e\u6e90\u4e3a Hive \u8868\u65f6\u624d\u652f\u6301\u5168\u5c40\u5b57\u5178\u7684\u6784\u5efa\u3002"),(0,p.yg)("p",null,(0,p.yg)("strong",{parentName:"p"}," Hive binary\uff08bitmap\uff09\u7c7b\u578b\u5217\u7684\u5bfc\u5165")),(0,p.yg)("p",null,"\u9002\u7528\u4e8e Doris \u8868\u805a\u5408\u5217\u7684\u6570\u636e\u7c7b\u578b\u4e3a Bitmap \u7c7b\u578b\uff0c\u4e14\u6570\u636e\u6e90 Hive \u8868\u4e2d\u5bf9\u5e94\u5217\u7684\u6570\u636e\u7c7b\u578b\u4e3a binary\uff08\u901a\u8fc7 FE \u4e2d spark-dpp \u4e2d\u7684 ",(0,p.yg)("inlineCode",{parentName:"p"},"org.apache.doris.load.loadv2.dpp.BitmapValue")," \u7c7b\u5e8f\u5217\u5316\uff09\u7c7b\u578b\u3002 \u65e0\u9700\u6784\u5efa\u5168\u5c40\u5b57\u5178\uff0c\u5728 Load \u547d\u4ee4\u4e2d\u6307\u5b9a\u76f8\u5e94\u5b57\u6bb5\u5373\u53ef\uff0c\u683c\u5f0f\u4e3a\uff1a",(0,p.yg)("inlineCode",{parentName:"p"},"Doris \u5b57\u6bb5\u540d\u79f0= binary_bitmap( Hive \u8868\u5b57\u6bb5\u540d\u79f0)")," \u540c\u6837\uff0c\u76ee\u524d\u53ea\u6709\u5728\u4e0a\u6e38\u6570\u636e\u6e90\u4e3a Hive \u8868\u65f6\u624d\u652f\u6301 binary\uff08 bitmap \uff09\u7c7b\u578b\u7684\u6570\u636e\u5bfc\u5165 Hive bitmap \u4f7f\u7528\u53ef\u53c2\u8003 ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/ecosystem/hive-bitmap-udf"},"hive-bitmap-udf")," \u3002"),(0,p.yg)("h3",{id:"\u67e5\u770b\u5bfc\u5165"},"\u67e5\u770b\u5bfc\u5165"),(0,p.yg)("p",null,"Spark Load \u5bfc\u5165\u65b9\u5f0f\u540c Broker Load \u4e00\u6837\u90fd\u662f\u5f02\u6b65\u7684\uff0c\u6240\u4ee5\u7528\u6237\u5fc5\u987b\u5c06\u521b\u5efa\u5bfc\u5165\u7684 Label \u8bb0\u5f55\uff0c\u5e76\u4e14\u5728",(0,p.yg)("strong",{parentName:"p"},"\u67e5\u770b\u5bfc\u5165\u547d\u4ee4\u4e2d\u4f7f\u7528 Label \u6765\u67e5\u770b\u5bfc\u5165\u7ed3\u679c"),"\u3002\u67e5\u770b\u5bfc\u5165\u547d\u4ee4\u5728\u6240\u6709\u5bfc\u5165\u65b9\u5f0f\u4e2d\u662f\u901a\u7528\u7684\uff0c\u5177\u4f53\u8bed\u6cd5\u53ef\u6267\u884c ",(0,p.yg)("inlineCode",{parentName:"p"},"HELP SHOW LOAD")," \u67e5\u770b\u3002"),(0,p.yg)("p",null,"\u793a\u4f8b\uff1a"),(0,p.yg)("pre",null,(0,p.yg)("code",{parentName:"pre",className:"language-sql"},'mysql> show load order by createtime desc limit 1\\G\n*************************** 1. row ***************************\n JobId: 76391\n Label: label1\n State: FINISHED\n Progress: ETL:100%; LOAD:100%\n Type: SPARK\n EtlInfo: unselected.rows=4; dpp.abnorm.ALL=15; dpp.norm.ALL=28133376\n TaskInfo: cluster:cluster0; timeout(s):10800; max_filter_ratio:5.0E-5\n ErrorMsg: N/A\n CreateTime: 2019-07-27 11:46:42\n EtlStartTime: 2019-07-27 11:46:44\n EtlFinishTime: 2019-07-27 11:49:44\n LoadStartTime: 2019-07-27 11:49:44\nLoadFinishTime: 2019-07-27 11:50:16\n URL: http://1.1.1.1:8089/proxy/application_1586619723848_0035/\n JobDetails: {"ScannedRows":28133395,"TaskNumber":1,"FileNumber":1,"FileSize":200000}\n')),(0,p.yg)("p",null,"\u8fd4\u56de\u7ed3\u679c\u96c6\u4e2d\u53c2\u6570\u610f\u4e49\u53ef\u4ee5\u53c2\u8003 ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/data-operate/import/import-way/broker-load-manual"},"Broker Load"),"\u3002\u4e0d\u540c\u70b9\u5982\u4e0b\uff1a"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},"State"),(0,p.yg)("p",{parentName:"li"},"\u5bfc\u5165\u4efb\u52a1\u5f53\u524d\u6240\u5904\u7684\u9636\u6bb5\u3002\u4efb\u52a1\u63d0\u4ea4\u4e4b\u540e\u72b6\u6001\u4e3a PENDING\uff0c\u63d0\u4ea4 Spark ETL \u4e4b\u540e\u72b6\u6001\u53d8\u4e3a ETL\uff0cETL \u5b8c\u6210\u4e4b\u540e FE \u8c03\u5ea6 BE \u6267\u884c push \u64cd\u4f5c\u72b6\u6001\u53d8\u4e3a LOADING\uff0cpush \u5b8c\u6210\u5e76\u4e14\u7248\u672c\u751f\u6548\u540e\u72b6\u6001\u53d8\u4e3a FINISHED\u3002"),(0,p.yg)("p",{parentName:"li"},"\u5bfc\u5165\u4efb\u52a1\u7684\u6700\u7ec8\u9636\u6bb5\u6709\u4e24\u4e2a\uff1aCANCELLED \u548c FINISHED\uff0c\u5f53 Load Job \u5904\u4e8e\u8fd9\u4e24\u4e2a\u9636\u6bb5\u65f6\u5bfc\u5165\u5b8c\u6210\u3002\u5176\u4e2d CANCELLED \u4e3a\u5bfc\u5165\u5931\u8d25\uff0cFINISHED \u4e3a\u5bfc\u5165\u6210\u529f\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},"Progress"),(0,p.yg)("p",{parentName:"li"},"\u5bfc\u5165\u4efb\u52a1\u7684\u8fdb\u5ea6\u63cf\u8ff0\u3002\u5206\u4e3a\u4e24\u79cd\u8fdb\u5ea6\uff1aETL \u548c LOAD\uff0c\u5bf9\u5e94\u4e86\u5bfc\u5165\u6d41\u7a0b\u7684\u4e24\u4e2a\u9636\u6bb5 ETL \u548c LOADING\u3002"),(0,p.yg)("p",{parentName:"li"},"LOAD \u7684\u8fdb\u5ea6\u8303\u56f4\u4e3a\uff1a0~100%\u3002"),(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"LOAD \u8fdb\u5ea6 = \u5f53\u524d\u5df2\u5b8c\u6210\u6240\u6709 Replica \u5bfc\u5165\u7684 Tablet \u4e2a\u6570 / \u672c\u6b21\u5bfc\u5165\u4efb\u52a1\u7684\u603b Tablet \u4e2a\u6570 * 100%")),(0,p.yg)("p",{parentName:"li"},(0,p.yg)("strong",{parentName:"p"},"\u5982\u679c\u6240\u6709\u5bfc\u5165\u8868\u5747\u5b8c\u6210\u5bfc\u5165\uff0c\u6b64\u65f6 LOAD \u7684\u8fdb\u5ea6\u4e3a 99%")," \u5bfc\u5165\u8fdb\u5165\u5230\u6700\u540e\u751f\u6548\u9636\u6bb5\uff0c\u6574\u4e2a\u5bfc\u5165\u5b8c\u6210\u540e\uff0cLOAD \u7684\u8fdb\u5ea6\u624d\u4f1a\u6539\u4e3a 100%\u3002"),(0,p.yg)("p",{parentName:"li"},"\u5bfc\u5165\u8fdb\u5ea6\u5e76\u4e0d\u662f\u7ebf\u6027\u7684\u3002\u6240\u4ee5\u5982\u679c\u4e00\u6bb5\u65f6\u95f4\u5185\u8fdb\u5ea6\u6ca1\u6709\u53d8\u5316\uff0c\u5e76\u4e0d\u4ee3\u8868\u5bfc\u5165\u6ca1\u6709\u5728\u6267\u884c\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},"Type"),(0,p.yg)("p",{parentName:"li"},"\u5bfc\u5165\u4efb\u52a1\u7684\u7c7b\u578b\u3002Spark load \u4e3a SPARK\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},"CreateTime/EtlStartTime/EtlFinishTime/LoadStartTime/LoadFinishTime"),(0,p.yg)("p",{parentName:"li"},"\u8fd9\u51e0\u4e2a\u503c\u5206\u522b\u4ee3\u8868\u5bfc\u5165\u521b\u5efa\u7684\u65f6\u95f4\uff0cETL \u9636\u6bb5\u5f00\u59cb\u7684\u65f6\u95f4\uff0cETL \u9636\u6bb5\u5b8c\u6210\u7684\u65f6\u95f4\uff0cLOADING \u9636\u6bb5\u5f00\u59cb\u7684\u65f6\u95f4\u548c\u6574\u4e2a\u5bfc\u5165\u4efb\u52a1\u5b8c\u6210\u7684\u65f6\u95f4\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},"JobDetails"),(0,p.yg)("p",{parentName:"li"},"\u663e\u793a\u4e00\u4e9b\u4f5c\u4e1a\u7684\u8be6\u7ec6\u8fd0\u884c\u72b6\u6001\uff0cETL \u7ed3\u675f\u7684\u65f6\u5019\u66f4\u65b0\u3002\u5305\u62ec\u5bfc\u5165\u6587\u4ef6\u7684\u4e2a\u6570\u3001\u603b\u5927\u5c0f\uff08\u5b57\u8282\uff09\u3001\u5b50\u4efb\u52a1\u4e2a\u6570\u3001\u5df2\u5904\u7406\u7684\u539f\u59cb\u884c\u6570\u7b49\u3002"),(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},'{"ScannedRows":139264,"TaskNumber":1,"FileNumber":1,"FileSize":940754064}'))),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},"URL"),(0,p.yg)("p",{parentName:"li"},"\u53ef\u590d\u5236\u8f93\u5165\u5230\u6d4f\u89c8\u5668\uff0c\u8df3\u8f6c\u81f3\u76f8\u5e94 Application \u7684 Web \u754c\u9762"))),(0,p.yg)("h3",{id:"\u67e5\u770b-spark-launcher-\u63d0\u4ea4\u65e5\u5fd7"},"\u67e5\u770b Spark Launcher \u63d0\u4ea4\u65e5\u5fd7"),(0,p.yg)("p",null,"\u6709\u65f6\u7528\u6237\u9700\u8981\u67e5\u770b Spark \u4efb\u52a1\u63d0\u4ea4\u8fc7\u7a0b\u4e2d\u4ea7\u751f\u7684\u8be6\u7ec6\u65e5\u5fd7\uff0c\u65e5\u5fd7\u9ed8\u8ba4\u4fdd\u5b58\u5728FE\u6839\u76ee\u5f55\u4e0b ",(0,p.yg)("inlineCode",{parentName:"p"},"log/spark_launcher_log")," \u8def\u5f84\u4e0b\uff0c\u5e76\u4ee5 ",(0,p.yg)("inlineCode",{parentName:"p"},"spark_launcher_{load_job_id}_{label}.log "),"\u547d\u540d\uff0c\u65e5\u5fd7\u4f1a\u5728\u6b64\u76ee\u5f55\u4e0b\u4fdd\u5b58\u4e00\u6bb5\u65f6\u95f4\uff0c\u5f53FE\u5143\u6570\u636e\u4e2d\u7684\u5bfc\u5165\u4fe1\u606f\u88ab\u6e05\u7406\u65f6\uff0c\u76f8\u5e94\u7684\u65e5\u5fd7\u4e5f\u4f1a\u88ab\u6e05\u7406\uff0c\u9ed8\u8ba4\u4fdd\u5b58\u65f6\u95f4\u4e3a3\u5929\u3002"),(0,p.yg)("h3",{id:"\u53d6\u6d88\u5bfc\u5165"},"\u53d6\u6d88\u5bfc\u5165"),(0,p.yg)("p",null,"\u5f53 Spark Load \u4f5c\u4e1a\u72b6\u6001\u4e0d\u4e3a CANCELLED \u6216 FINISHED \u65f6\uff0c\u53ef\u4ee5\u88ab\u7528\u6237\u624b\u52a8\u53d6\u6d88\u3002\u53d6\u6d88\u65f6\u9700\u8981\u6307\u5b9a\u5f85\u53d6\u6d88\u5bfc\u5165\u4efb\u52a1\u7684 Label \u3002\u53d6\u6d88\u5bfc\u5165\u547d\u4ee4\u8bed\u6cd5\u53ef\u6267\u884c ",(0,p.yg)("inlineCode",{parentName:"p"},"HELP CANCEL LOAD")," \u67e5\u770b\u3002"),(0,p.yg)("h2",{id:"\u76f8\u5173\u7cfb\u7edf\u914d\u7f6e"},"\u76f8\u5173\u7cfb\u7edf\u914d\u7f6e"),(0,p.yg)("h3",{id:"fe-\u914d\u7f6e"},"FE \u914d\u7f6e"),(0,p.yg)("p",null,"\u4e0b\u9762\u914d\u7f6e\u5c5e\u4e8e Spark Load \u7684\u7cfb\u7edf\u7ea7\u522b\u914d\u7f6e\uff0c\u4e5f\u5c31\u662f\u4f5c\u7528\u4e8e\u6240\u6709 Spark Load \u5bfc\u5165\u4efb\u52a1\u7684\u914d\u7f6e\u3002\u4e3b\u8981\u901a\u8fc7\u4fee\u6539 ",(0,p.yg)("inlineCode",{parentName:"p"},"fe.conf"),"\u6765\u8c03\u6574\u914d\u7f6e\u503c\u3002"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"enable_spark_load")),(0,p.yg)("p",{parentName:"li"},"\u5f00\u542f Spark Load \u548c\u521b\u5efa Resource \u529f\u80fd\u3002\u9ed8\u8ba4\u4e3a False\uff0c\u5173\u95ed\u6b64\u529f\u80fd\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"spark_load_default_timeout_second")),(0,p.yg)("p",{parentName:"li"},"\u4efb\u52a1\u9ed8\u8ba4\u8d85\u65f6\u65f6\u95f4\u4e3a 259200 \u79d2\uff083 \u5929\uff09\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"spark_home_default_dir")),(0,p.yg)("p",{parentName:"li"},"Spark \u5ba2\u6237\u7aef\u8def\u5f84 (",(0,p.yg)("inlineCode",{parentName:"p"},"fe/lib/spark2x"),") \u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"spark_resource_path")),(0,p.yg)("p",{parentName:"li"},"\u6253\u5305\u597d\u7684 Spark \u4f9d\u8d56\u6587\u4ef6\u8def\u5f84\uff08\u9ed8\u8ba4\u4e3a\u7a7a\uff09\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"spark_launcher_log_dir")),(0,p.yg)("p",{parentName:"li"},"Spark \u5ba2\u6237\u7aef\u7684\u63d0\u4ea4\u65e5\u5fd7\u5b58\u653e\u7684\u76ee\u5f55\uff08",(0,p.yg)("inlineCode",{parentName:"p"},"fe/log/spark_launcher_log"),"\uff09\u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"yarn_client_path")),(0,p.yg)("p",{parentName:"li"},"Yarn \u4e8c\u8fdb\u5236\u53ef\u6267\u884c\u6587\u4ef6\u8def\u5f84 (",(0,p.yg)("inlineCode",{parentName:"p"},"fe/lib/yarn-client/hadoop/bin/yarn"),") \u3002")),(0,p.yg)("li",{parentName:"ul"},(0,p.yg)("p",{parentName:"li"},(0,p.yg)("inlineCode",{parentName:"p"},"yarn_config_dir")),(0,p.yg)("p",{parentName:"li"},"Yarn \u914d\u7f6e\u6587\u4ef6\u751f\u6210\u8def\u5f84 (",(0,p.yg)("inlineCode",{parentName:"p"},"fe/lib/yarn-config"),") \u3002"))),(0,p.yg)("h2",{id:"\u6700\u4f73\u5b9e\u8df5"},"\u6700\u4f73\u5b9e\u8df5"),(0,p.yg)("h3",{id:"\u5e94\u7528\u573a\u666f"},"\u5e94\u7528\u573a\u666f"),(0,p.yg)("p",null,"\u4f7f\u7528 Spark Load \u6700\u9002\u5408\u7684\u573a\u666f\u5c31\u662f\u539f\u59cb\u6570\u636e\u5728\u6587\u4ef6\u7cfb\u7edf\uff08HDFS\uff09\u4e2d\uff0c\u6570\u636e\u91cf\u5728 \u51e0\u5341 GB \u5230 TB \u7ea7\u522b\u3002\u5c0f\u6570\u636e\u91cf\u8fd8\u662f\u5efa\u8bae\u4f7f\u7528 ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/data-operate/import/import-way/stream-load-manual"},"Stream Load")," \u6216\u8005 ",(0,p.yg)("a",{parentName:"p",href:"/zh-CN/docs/1.2/data-operate/import/import-way/broker-load-manual"},"Broker Load"),"\u3002"),(0,p.yg)("h2",{id:"\u5e38\u89c1\u95ee\u9898"},"\u5e38\u89c1\u95ee\u9898"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},"\u73b0\u5728 Spark Load \u8fd8\u4e0d\u652f\u6301 Doris \u8868\u5b57\u6bb5\u662f String \u7c7b\u578b\u7684\u5bfc\u5165\uff0c\u5982\u679c\u4f60\u7684\u8868\u5b57\u6bb5\u6709 String \u7c7b\u578b\u7684\u8bf7\u6539\u6210 Varchar \u7c7b\u578b\uff0c\u4e0d\u7136\u4f1a\u5bfc\u5165\u5931\u8d25\uff0c\u63d0\u793a ",(0,p.yg)("inlineCode",{parentName:"li"},"type:ETL_QUALITY_UNSATISFIED; msg:quality not good enough to cancel")),(0,p.yg)("li",{parentName:"ul"},"\u4f7f\u7528 Spark Load \u65f6\u6ca1\u6709\u5728 Spark \u5ba2\u6237\u7aef\u7684 ",(0,p.yg)("inlineCode",{parentName:"li"},"spark-env.sh")," \u914d\u7f6e ",(0,p.yg)("inlineCode",{parentName:"li"},"HADOOP_CONF_DIR")," \u73af\u5883\u53d8\u91cf\u3002")),(0,p.yg)("p",null,"\u5982\u679c ",(0,p.yg)("inlineCode",{parentName:"p"},"HADOOP_CONF_DIR")," \u73af\u5883\u53d8\u91cf\u6ca1\u6709\u8bbe\u7f6e\uff0c\u4f1a\u62a5 ",(0,p.yg)("inlineCode",{parentName:"p"},"When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.")," \u9519\u8bef\u3002"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},"\u4f7f\u7528 Spark Load \u65f6",(0,p.yg)("inlineCode",{parentName:"li"},"spark_home_default_dir"),"\u914d\u7f6e\u9879\u6ca1\u6709\u6307\u5b9a Spark \u5ba2\u6237\u7aef\u6839\u76ee\u5f55\u3002")),(0,p.yg)("p",null,"\u63d0\u4ea4 Spark Job \u65f6\u7528\u5230 spark-submit \u547d\u4ee4\uff0c\u5982\u679c ",(0,p.yg)("inlineCode",{parentName:"p"},"spark_home_default_dir")," \u8bbe\u7f6e\u9519\u8bef\uff0c\u4f1a\u62a5 ",(0,p.yg)("inlineCode",{parentName:"p"},'Cannot run program "xxx/bin/spark-submit": error=2, No such file or directory')," \u9519\u8bef\u3002"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},"\u4f7f\u7528 Spark Load \u65f6 ",(0,p.yg)("inlineCode",{parentName:"li"},"spark_resource_path")," \u914d\u7f6e\u9879\u6ca1\u6709\u6307\u5411\u6253\u5305\u597d\u7684 zip \u6587\u4ef6\u3002")),(0,p.yg)("p",null,"\u5982\u679c ",(0,p.yg)("inlineCode",{parentName:"p"},"spark_resource_path "),"\u6ca1\u6709\u8bbe\u7f6e\u6b63\u786e\uff0c\u4f1a\u62a5 ",(0,p.yg)("inlineCode",{parentName:"p"},"File xxx/jars/spark-2x.zip does not exist")," \u9519\u8bef\u3002"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},"\u4f7f\u7528 Spark Load \u65f6 ",(0,p.yg)("inlineCode",{parentName:"li"},"yarn_client_path")," \u914d\u7f6e\u9879\u6ca1\u6709\u6307\u5b9a Yarn \u7684\u53ef\u6267\u884c\u6587\u4ef6\u3002")),(0,p.yg)("p",null,"\u5982\u679c ",(0,p.yg)("inlineCode",{parentName:"p"},"yarn_client_path "),"\u6ca1\u6709\u8bbe\u7f6e\u6b63\u786e\uff0c\u4f1a\u62a5 ",(0,p.yg)("inlineCode",{parentName:"p"},"yarn client does not exist in path: xxx/yarn-client/hadoop/bin/yarn")," \u9519\u8bef"),(0,p.yg)("ul",null,(0,p.yg)("li",{parentName:"ul"},"\u4f7f\u7528 Spark Load \u65f6\u6ca1\u6709\u5728 Yarn \u5ba2\u6237\u7aef\u7684 ",(0,p.yg)("inlineCode",{parentName:"li"},"hadoop-config.sh")," \u914d\u7f6e ",(0,p.yg)("inlineCode",{parentName:"li"},"JAVA_HOME")," \u73af\u5883\u53d8\u91cf\u3002")),(0,p.yg)("p",null,"\u5982\u679c ",(0,p.yg)("inlineCode",{parentName:"p"},"JAVA_HOME")," \u73af\u5883\u53d8\u91cf\u6ca1\u6709\u8bbe\u7f6e\uff0c\u4f1a\u62a5 ",(0,p.yg)("inlineCode",{parentName:"p"},"yarn application kill failed. app id: xxx, load job id: xxx, msg: which: no xxx/lib/yarn-client/hadoop/bin/yarn in ((null)) Error: JAVA_HOME is not set and could not be found")," \u9519\u8bef"),(0,p.yg)("h2",{id:"\u66f4\u591a\u5e2e\u52a9"},"\u66f4\u591a\u5e2e\u52a9"),(0,p.yg)("p",null,"\u5173\u4e8e",(0,p.yg)("strong",{parentName:"p"},"Spark Load")," \u4f7f\u7528\u7684\u66f4\u591a\u8be6\u7ec6\u8bed\u6cd5\uff0c\u53ef\u4ee5\u5728 MySQL \u5ba2\u6237\u7aef\u547d\u4ee4\u884c\u4e0b\u8f93\u5165 ",(0,p.yg)("inlineCode",{parentName:"p"},"HELP SPARK LOAD")," \u83b7\u53d6\u66f4\u591a\u5e2e\u52a9\u4fe1\u606f\u3002"))}y.isMDXComponent=!0}}]);