blob: 0da2502adddde25f73ae7c2db89df06e6cb2b65e [file] [log] [blame]
"use strict";(self.webpackChunkdoris_website=self.webpackChunkdoris_website||[]).push([[42241],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>g});var o=a(296540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,o)}return a}function i(e){for(var t=1;t<arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?r(Object(a),!0).forEach((function(t){n(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):r(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function s(e,t){if(null==e)return{};var a,o,n=function(e,t){if(null==e)return{};var a,o,n={},r=Object.keys(e);for(o=0;o<r.length;o++)a=r[o],t.indexOf(a)>=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(o=0;o<r.length;o++)a=r[o],t.indexOf(a)>=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var p=o.createContext({}),l=function(e){var t=o.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},c=function(e){var t=l(e.components);return o.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var a=e.components,n=e.mdxType,r=e.originalType,p=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),u=l(a),m=n,g=u["".concat(p,".").concat(m)]||u[m]||d[m]||r;return a?o.createElement(g,i(i({ref:t},c),{},{components:a})):o.createElement(g,i({ref:t},c))}));function g(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var r=a.length,i=new Array(r);i[0]=m;var s={};for(var p in t)hasOwnProperty.call(t,p)&&(s[p]=t[p]);s.originalType=e,s[u]="string"==typeof e?e:n,i[1]=s;for(var l=2;l<r;l++)i[l]=a[l];return o.createElement.apply(null,i)}return o.createElement.apply(null,a)}m.displayName="MDXCreateElement"},613322:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var o=a(58168),n=(a(296540),a(15680));const r={title:"AutoMQ Load",language:"en"},i=void 0,s={unversionedId:"ecosystem/automq-load",id:"version-1.2/ecosystem/automq-load",title:"AutoMQ Load",description:"AutoMQ is a cloud-native fork of Kafka by separating storage to object storage like S3. It remains 100% compatible with Apache Kafka\xae while offering users up to a 10x cost-effective and 100x elasticity . Through its innovative shared storage architecture, it achieves capabilities such as reassign partitions in seconds, self-balancing and auto scaling in seconds while ensuring high throughput and low latency.",source:"@site/versioned_docs/version-1.2/ecosystem/automq-load.md",sourceDirName:"ecosystem",slug:"/ecosystem/automq-load",permalink:"/docs/1.2/ecosystem/automq-load",draft:!1,tags:[],version:"1.2",frontMatter:{title:"AutoMQ Load",language:"en"},sidebar:"docs",previous:{title:"CloudCanal Data Import",permalink:"/docs/1.2/ecosystem/cloudcanal"},next:{title:"Hive Bitmap UDF",permalink:"/docs/1.2/ecosystem/hive-bitmap-udf"}},p={},l=[{value:"Environment Preparation",id:"environment-preparation",level:2},{value:"Prepare Apache Doris and Test Data",id:"prepare-apache-doris-and-test-data",level:3},{value:"Prepare Kafka Command Line Tools",id:"prepare-kafka-command-line-tools",level:3},{value:"Prepare AutoMQ and test data",id:"prepare-automq-and-test-data",level:3},{value:"Create a Routine Load import job",id:"create-a-routine-load-import-job",level:2},{value:"Verify data import",id:"verify-data-import",level:2}],c={toc:l},u="wrapper";function d(e){let{components:t,...r}=e;return(0,n.yg)(u,(0,o.A)({},c,r,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("p",null,(0,n.yg)("a",{parentName:"p",href:"https://github.com/AutoMQ/automq"},"AutoMQ")," is a cloud-native fork of Kafka by separating storage to object storage like S3. It remains 100% compatible with Apache Kafka\xae while offering users up to a 10x cost-effective and 100x elasticity . Through its innovative shared storage architecture, it achieves capabilities such as reassign partitions in seconds, self-balancing and auto scaling in seconds while ensuring high throughput and low latency.\n",(0,n.yg)("img",{alt:"AutoMQ Storage Architecture",src:a(608279).A,width:"828",height:"948"})),(0,n.yg)("p",null,"This article will explain how to use Apache Doris Routine Load to import data from AutoMQ into Doris. For more details on Routine Load, please refer to the ",(0,n.yg)("a",{parentName:"p",href:"https://doris.apache.org/docs/data-operate/import/routine-load-manual/"},"Routine Load")," document."),(0,n.yg)("h2",{id:"environment-preparation"},"Environment Preparation"),(0,n.yg)("h3",{id:"prepare-apache-doris-and-test-data"},"Prepare Apache Doris and Test Data"),(0,n.yg)("p",null,"Ensure that a working Apache Doris cluster is already set up. For demonstration purposes, we have deployed a test Apache Doris environment on Linux following the ",(0,n.yg)("a",{parentName:"p",href:"https://doris.apache.org/docs/install/cluster-deployment/run-docker-cluster"},"Deploying with Docker")," document.\nCreate databases and test tables:"),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},"create database automq_db;\nCREATE TABLE automq_db.users (\n id bigint NOT NULL,\n name string NOT NULL,\n timestamp string NULL,\n status string NULL\n\n) DISTRIBUTED BY hash (id) PROPERTIES ('replication_num' = '1');\n")),(0,n.yg)("h3",{id:"prepare-kafka-command-line-tools"},"Prepare Kafka Command Line Tools"),(0,n.yg)("p",null,"Download the latest TGZ package from ",(0,n.yg)("a",{parentName:"p",href:"https://github.com/AutoMQ/automq"},"AutoMQ Releases")," and extract it. Assuming the extraction directory is $AUTOMQ_HOME, this article will use the scripts under $AUTOMQ_HOME/bin to create topics and generate test data."),(0,n.yg)("h3",{id:"prepare-automq-and-test-data"},"Prepare AutoMQ and test data"),(0,n.yg)("p",null,"Refer to the AutoMQ ",(0,n.yg)("a",{parentName:"p",href:"https://docs.automq.com/docs/automq-opensource/EvqhwAkpriAomHklOUzcUtybn7g"},"official deployment documentation")," to deploy a functional cluster, ensuring network connectivity between AutoMQ and Apache Doris.\nQuickly create a topic named example_topic in AutoMQ and write a test JSON data to it by following these steps."),(0,n.yg)("p",null,(0,n.yg)("strong",{parentName:"p"},"Create Topic")),(0,n.yg)("p",null,"Use the Apache Kafka\xae command line tool in AutoMQ to create the topic, ensuring that you have access to a Kafka environment and that the Kafka service is running. Here is an example command to create a topic:"),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},"$AUTOMQ_HOME/bin/kafka-topics.sh --create --topic exampleto_topic --bootstrap-server 127.0.0.1:9092 --partitions 1 --replication-factor 1\n")),(0,n.yg)("blockquote",null,(0,n.yg)("p",{parentName:"blockquote"},"Tips: When executing the command, replace ",(0,n.yg)("inlineCode",{parentName:"p"},"topic")," and ",(0,n.yg)("inlineCode",{parentName:"p"},"bootstarp-server")," with the actual AutoMQ Bootstrap Server address.")),(0,n.yg)("p",null,"After creating the topic, you can use the following command to verify that the topic has been successfully created."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},"$AUTOMQ_HOME/bin/kafka-topics.sh --describe example_topic --bootstrap-server 127.0.0.1:9092\n")),(0,n.yg)("p",null,(0,n.yg)("strong",{parentName:"p"},"Generate test data")),(0,n.yg)("p",null,"Create a JSON-formatted test data entry, corresponding to the table mentioned earlier."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},'{\n "id": 1,\n "name": "testuser",\n "timestamp": "2023-11-10T12:00:00",\n "status": "active"\n}\n')),(0,n.yg)("p",null,(0,n.yg)("strong",{parentName:"p"},"Write test data")),(0,n.yg)("p",null,"Use Kafka's command-line tools or a programming approach to write the test data to a topic named ",(0,n.yg)("inlineCode",{parentName:"p"},"example_topic"),". Below is an example using the command-line tool:"),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},'echo \'{"id": 1, "name": "testuser", "timestamp": "2023-11-10T12:00:00", "status": "active"}\' | sh kafka-console-producer.sh --broker-list 127.0.0.1:9092 --topic example_topic\n')),(0,n.yg)("p",null,"To view the data just written to the topic, use the following command:"),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},"sh $AUTOMQ_HOME/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:9092 --topic example_topic --from-beginning\n")),(0,n.yg)("blockquote",null,(0,n.yg)("p",{parentName:"blockquote"},"Tips: When executing the command, replace ",(0,n.yg)("inlineCode",{parentName:"p"},"topic")," and ",(0,n.yg)("inlineCode",{parentName:"p"},"bootstarp-server")," with the actual AutoMQ Bootstrap Server address.")),(0,n.yg)("h2",{id:"create-a-routine-load-import-job"},"Create a Routine Load import job"),(0,n.yg)("p",null,"In the Apache Doris command line, create a Routine Load job that accepts JSON data to continuously import data from an AutoMQ Kafka topic. For detailed parameter information of Routine Load, please refer to ","[Doris Routine Load]","."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},'CREATE ROUTINE LOAD automq_example_load ON users\nCOLUMNS(id, name, timestamp, status)\nPROPERTIES\n(\n "format" = "json",\n "jsonpaths" = "[\\"$.id\\",\\"$.name\\",\\"$.timestamp\\",\\"$.status\\"]"\n )\nFROM KAFKA\n(\n "kafka_broker_list" = "127.0.0.1:9092",\n "kafka_topic" = "example_topic",\n "property.kafka_default_offsets" = "OFFSET_BEGINNING"\n);\n')),(0,n.yg)("blockquote",null,(0,n.yg)("p",{parentName:"blockquote"},"Tips: When executing the command, you need to replace kafka_broker_list with the actual AutoMQ Bootstrap Server address.")),(0,n.yg)("h2",{id:"verify-data-import"},"Verify data import"),(0,n.yg)("p",null,"First, check the status of the Routine Load import job to ensure that the task is running."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},"show routine load\\G;\n")),(0,n.yg)("p",null,"Then query the relevant tables in the Apache Doris database, and you will see that the data has been successfully imported."),(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre"},"select * from users;\n+------+--------------+---------------------+--------+\n| id | name | timestamp | status |\n+------+--------------+---------------------+--------+\n| 1 | testuser | 2023-11-10T12:00:00 | active |\n| 2 | testuser | 2023-11-10T12:00:00 | active |\n+------+--------------+---------------------+--------+\n2 rows in set (0.01 sec)\n")))}d.isMDXComponent=!0},608279:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/automq_storage_architecture-ae9745cd9eb2b4c42ef8e10d44e9fe7a.png"}}]);