blob: 0e973a01146e2df34f3776b3ae750998144d55a4 [file] [log] [blame]
"use strict";(self.webpackChunkdoris_website=self.webpackChunkdoris_website||[]).push([[10172],{15680:(e,n,t)=>{t.d(n,{xA:()=>c,yg:()=>m});var a=t(296540);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function i(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n<arguments.length;n++){var t=null!=arguments[n]?arguments[n]:{};n%2?i(Object(t),!0).forEach((function(n){r(e,n,t[n])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(t)):i(Object(t)).forEach((function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(t,n))}))}return e}function l(e,n){if(null==e)return{};var t,a,r=function(e,n){if(null==e)return{};var t,a,r={},i=Object.keys(e);for(a=0;a<i.length;a++)t=i[a],n.indexOf(t)>=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a<i.length;a++)t=i[a],n.indexOf(t)>=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var s=a.createContext({}),p=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},c=function(e){var n=p(e.components);return a.createElement(s.Provider,{value:n},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},g=a.forwardRef((function(e,n){var t=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),u=p(t),g=r,m=u["".concat(s,".").concat(g)]||u[g]||d[g]||i;return t?a.createElement(m,o(o({ref:n},c),{},{components:t})):a.createElement(m,o({ref:n},c))}));function m(e,n){var t=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var i=t.length,o=new Array(i);o[0]=g;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[u]="string"==typeof e?e:r,o[1]=l;for(var p=2;p<i;p++)o[p]=t[p];return a.createElement.apply(null,o)}return a.createElement.apply(null,t)}g.displayName="MDXCreateElement"},250486:(e,n,t)=>{t.r(n),t.d(n,{assets:()=>s,contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=t(58168),r=(t(296540),t(15680));const i={title:"Remote User Defined Function Service",language:"en"},o=void 0,l={unversionedId:"query/udf/remote-user-defined-function",id:"version-2.0/query/udf/remote-user-defined-function",title:"Remote User Defined Function Service",description:"\x3c!--",source:"@site/versioned_docs/version-2.0/query/udf/remote-user-defined-function.md",sourceDirName:"query/udf",slug:"/query/udf/remote-user-defined-function",permalink:"/docs/2.0/query/udf/remote-user-defined-function",draft:!1,tags:[],version:"2.0",frontMatter:{title:"Remote User Defined Function Service",language:"en"},sidebar:"docs",previous:{title:"Java UDF",permalink:"/docs/2.0/query/udf/java-user-defined-function"},next:{title:"Lakehouse Overview",permalink:"/docs/2.0/lakehouse/lakehouse-overview"}},s={},p=[{value:"Remote UDF",id:"remote-udf",level:2},{value:"Writing UDF Functions",id:"writing-udf-functions",level:2},{value:"Copying the Proto Files",id:"copying-the-proto-files",level:3},{value:"Generating Interfaces",id:"generating-interfaces",level:3},{value:"Implementing Interfaces",id:"implementing-interfaces",level:3},{value:"Creating UDF",id:"creating-udf",level:2},{value:"Using UDF",id:"using-udf",level:2},{value:"Deleting UDF",id:"deleting-udf",level:2},{value:"Example",id:"example",level:2}],c={toc:p},u="wrapper";function d(e){let{components:n,...t}=e;return(0,r.yg)(u,(0,a.A)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,r.yg)("h2",{id:"remote-udf"},"Remote UDF"),(0,r.yg)("p",null,"Remote UDF Service supports accessing user-provided UDF Services via RPC to execute user-defined functions. Compared to native UDF implementation, Remote UDF Service has the following advantages and limitations:"),(0,r.yg)("p",null,(0,r.yg)("strong",{parentName:"p"},"1. Advantages")),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"Cross-language: UDF Services can be written in various languages supported by Protobuf.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"Security: UDF failures or crashes only affect the UDF Service itself and do not cause Doris process crashes.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"Flexibility: UDF Services can invoke any other services or library classes to meet diverse business requirements."))),(0,r.yg)("p",null,(0,r.yg)("strong",{parentName:"p"},"2. Usage Limitations")),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"Performance: Compared to native UDFs, UDF Service introduces additional network overhead, resulting in lower performance. Additionally, the UDF Service implementation itself can impact function execution efficiency, and users need to handle issues like high concurrency and thread safety.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"Single-row mode and batch processing mode: In Doris' original row-based query execution framework, UDF RPC calls are made for each row of data, resulting in poor performance. However, in the new vectorized execution framework, UDF RPC calls are made for each batch of data (default: 2048 rows), leading to significant performance improvements. In actual testing, the performance of Remote UDF based on vectorization and batch processing is comparable to that of native UDF based on row storage."))),(0,r.yg)("h2",{id:"writing-udf-functions"},"Writing UDF Functions"),(0,r.yg)("p",null,"This section provides instructions on how to develop a Remote RPC service. A Java version example is provided in ",(0,r.yg)("inlineCode",{parentName:"p"},"samples/doris-demo/udf-demo/")," for reference."),(0,r.yg)("h3",{id:"copying-the-proto-files"},"Copying the Proto Files"),(0,r.yg)("p",null,"Copy ",(0,r.yg)("inlineCode",{parentName:"p"},"gensrc/proto/function_service.proto")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"gensrc/proto/types.proto")," to the RPC service."),(0,r.yg)("p",null,(0,r.yg)("strong",{parentName:"p"},"function_service.proto")),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"PFunctionCallRequest"),(0,r.yg)("ul",{parentName:"li"},(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"function_name: Function name, corresponding to the symbol specified during function creation.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"args: Arguments passed to the method.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"context: Query context information.")))),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"PFunctionCallResponse"),(0,r.yg)("ul",{parentName:"li"},(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"result: Result.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"status: Status, where 0 represents normal.")))),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"PCheckFunctionRequest"),(0,r.yg)("ul",{parentName:"li"},(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"function: Function-related information.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"match_type: Matching type.")))),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"PCheckFunctionResponse"),(0,r.yg)("ul",{parentName:"li"},(0,r.yg)("li",{parentName:"ul"},"status: Status, where 0 represents normal.")))),(0,r.yg)("h3",{id:"generating-interfaces"},"Generating Interfaces"),(0,r.yg)("p",null,"Generate code using protoc. Refer to ",(0,r.yg)("inlineCode",{parentName:"p"},"protoc -h")," for specific parameters."),(0,r.yg)("h3",{id:"implementing-interfaces"},"Implementing Interfaces"),(0,r.yg)("p",null,"The following three methods need to be implemented:"),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"fnCall: Used to write the calculation logic.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"checkFn: Used for UDF creation validation, checking if the function name, parameters, return values, etc., are valid.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},"handShake: Used for interface probing."))),(0,r.yg)("hr",null),(0,r.yg)("h2",{id:"creating-udf"},"Creating UDF"),(0,r.yg)("p",null,"Currently, UDTF is not supported."),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-sql"},'CREATE FUNCTION \nname ([,...])\n[RETURNS] rettype\nPROPERTIES (["key"="value"][,...]) \n')),(0,r.yg)("p",null,"Note:"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("p",{parentName:"li"},"The ",(0,r.yg)("inlineCode",{parentName:"p"},"symbol")," in the PROPERTIES represents the method name passed in the RPC call, and this parameter must be set.")),(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("p",{parentName:"li"},"The ",(0,r.yg)("inlineCode",{parentName:"p"},"object_file")," in the PROPERTIES represents the RPC service address. Currently, it supports a single address and cluster addresses in the brpc-compatible format. For cluster connection methods, refer to the ",(0,r.yg)("a",{parentName:"p",href:"https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E8%BF%9E%E6%8E%A5%E6%9C%8D%E5%8A%A1%E9%9B%86%E7%BE%A4"},"Format Specification")," (Chinese).")),(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("p",{parentName:"li"},"The ",(0,r.yg)("inlineCode",{parentName:"p"},"type")," in the PROPERTIES represents the UDF invocation type, which is set to Native by default. Use RPC to pass when using RPC UDF.")),(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("p",{parentName:"li"},(0,r.yg)("inlineCode",{parentName:"p"},"name"),": A function belongs to a specific database. The name is in the form of ",(0,r.yg)("inlineCode",{parentName:"p"},"dbName"),".",(0,r.yg)("inlineCode",{parentName:"p"},"funcName"),". When ",(0,r.yg)("inlineCode",{parentName:"p"},"dbName")," is not explicitly specified, the current session's database is used as ",(0,r.yg)("inlineCode",{parentName:"p"},"dbName"),"."))),(0,r.yg)("p",null,"Example:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-sql"},'CREATE FUNCTION rpc_add_two(INT,INT) RETURNS INT PROPERTIES (\n "SYMBOL"="add_int_two",\n "OBJECT_FILE"="127.0.0.1:9114",\n "TYPE"="RPC"\n);\nCREATE FUNCTION rpc_add_one(INT) RETURNS INT PROPERTIES (\n "SYMBOL"="add_int_one",\n "OBJECT_FILE"="127.0.0.1:9114",\n "TYPE"="RPC"\n);\nCREATE FUNCTION rpc_add_string(varchar(30)) RETURNS varchar(30) PROPERTIES (\n "SYMBOL"="add_string",\n "OBJECT_FILE"="127.0.0.1:9114",\n "TYPE"="RPC"\n);\n')),(0,r.yg)("h2",{id:"using-udf"},"Using UDF"),(0,r.yg)("p",null,"Users must have the ",(0,r.yg)("inlineCode",{parentName:"p"},"SELECT")," privilege on the corresponding database to use UDF."),(0,r.yg)("p",null,"The usage of UDF is similar to regular functions, with the only difference being that the scope of built-in functions is global, while the scope of UDF is within the database. When the session is connected to a database, simply use the UDF name to search for the corresponding UDF within the current database. Otherwise, the user needs to explicitly specify the database name of the UDF, such as ",(0,r.yg)("inlineCode",{parentName:"p"},"dbName"),".",(0,r.yg)("inlineCode",{parentName:"p"},"funcName"),"."),(0,r.yg)("h2",{id:"deleting-udf"},"Deleting UDF"),(0,r.yg)("p",null,"When you no longer need a UDF function, you can delete it using the ",(0,r.yg)("inlineCode",{parentName:"p"},"DROP FUNCTION")," command."),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,"The ",(0,r.yg)("inlineCode",{parentName:"p"},"samples/doris-demo/")," directory provides examples of RPC server implementations in CPP, Java, and Python languages. Please refer to the ",(0,r.yg)("inlineCode",{parentName:"p"},"README.md")," file in each directory for specific usage instructions.\nFor example, ",(0,r.yg)("inlineCode",{parentName:"p"},"rpc_add_string"),":"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-sql"},"mysql >select rpc_add_string('doris');\n+-------------------------+\n| rpc_add_string('doris') |\n+-------------------------+\n| doris_rpc_test |\n+-------------------------+\n")),(0,r.yg)("p",null,"The log will display:"),(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-json"},'INFO: fnCall request=function_name: "add_string"\nargs {\n type {\n id: STRING\n }\n has_null: false\n string_value: "doris"\n}\nINFO: fnCall res=result {\n type {\n id: STRING\n }\n has_null: false\n string_value: "doris_rpc_test"\n}\nstatus {\n status_code: 0\n}\n')))}d.isMDXComponent=!0}}]);