| import{_ as c,r as l,o as i,c as r,b as n,d as s,a,w as t,e as p}from"./app-Bp5kEZWW.js";const d={},u=n("h1",{id:"iotdb-流处理框架",tabindex:"-1"},[n("a",{class:"header-anchor",href:"#iotdb-流处理框架"},[n("span",null,"IoTDB 流处理框架")])],-1),k=n("p",null,"IoTDB 流处理框架允许用户实现自定义的流处理逻辑,可以实现对存储引擎变更的监听和捕获、实现对变更数据的变形、实现对变形后数据的向外推送等逻辑。",-1),v=p(`<ul><li>抽取(Extract)</li><li>处理(Process)</li><li>发送(Connect)</li></ul><p>流处理框架允许用户使用 Java 语言自定义编写三个子任务的处理逻辑,通过类似 UDF 的方式处理数据。<br> 在一个 Pipe 中,上述的三个子任务分别由三种插件执行实现,数据会依次经过这三个插件进行处理:<br> Pipe Extractor 用于抽取数据,Pipe Processor 用于处理数据,Pipe Connector 用于发送数据,最终数据将被发至外部系统。</p><p><strong>Pipe 任务的模型如下:</strong></p><figure><img src="https://alioss.timecho.com/docs/img/同步引擎.jpeg" alt="任务模型图" tabindex="0" loading="lazy"><figcaption>任务模型图</figcaption></figure><p>描述一个数据流处理任务,本质就是描述 Pipe Extractor、Pipe Processor 和 Pipe Connector 插件的属性。<br> 用户可以通过 SQL 语句声明式地配置三个子任务的具体属性,通过组合不同的属性,实现灵活的数据 ETL 能力。</p><p>利用流处理框架,可以搭建完整的数据链路来满足端<em>边云同步、异地灾备、读写负载分库</em>等需求。</p><h2 id="自定义流处理插件开发" tabindex="-1"><a class="header-anchor" href="#自定义流处理插件开发"><span>自定义流处理插件开发</span></a></h2><h3 id="编程开发依赖" tabindex="-1"><a class="header-anchor" href="#编程开发依赖"><span>编程开发依赖</span></a></h3><p>推荐采用 maven 构建项目,在<code>pom.xml</code>中添加以下依赖。请注意选择和 IoTDB 服务器版本相同的依赖版本。</p><div class="language-xml line-numbers-mode" data-ext="xml" data-title="xml"><pre class="language-xml"><code><span class="token tag"><span class="token tag"><span class="token punctuation"><</span>dependency</span><span class="token punctuation">></span></span> |
| <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>groupId</span><span class="token punctuation">></span></span>org.apache.iotdb<span class="token tag"><span class="token tag"><span class="token punctuation"></</span>groupId</span><span class="token punctuation">></span></span> |
| <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>artifactId</span><span class="token punctuation">></span></span>pipe-api<span class="token tag"><span class="token tag"><span class="token punctuation"></</span>artifactId</span><span class="token punctuation">></span></span> |
| <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>version</span><span class="token punctuation">></span></span>1.2.1<span class="token tag"><span class="token tag"><span class="token punctuation"></</span>version</span><span class="token punctuation">></span></span> |
| <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>scope</span><span class="token punctuation">></span></span>provided<span class="token tag"><span class="token tag"><span class="token punctuation"></</span>scope</span><span class="token punctuation">></span></span> |
| <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>dependency</span><span class="token punctuation">></span></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h3 id="事件驱动编程模型" tabindex="-1"><a class="header-anchor" href="#事件驱动编程模型"><span>事件驱动编程模型</span></a></h3><p>流处理插件的用户编程接口设计,参考了事件驱动编程模型的通用设计理念。事件(Event)是用户编程接口中的数据抽象,而编程接口与具体的执行方式解耦,只需要专注于描述事件(数据)到达系统后,系统期望的处理方式即可。</p><p>在流处理插件的用户编程接口中,事件是数据库数据写入操作的抽象。事件由单机流处理引擎捕获,按照流处理三个阶段的流程,依次传递至 PipeExtractor 插件,PipeProcessor 插件和 PipeConnector 插件,并依次在三个插件中触发用户逻辑的执行。</p><p>为了兼顾端侧低负载场景下的流处理低延迟和端侧高负载场景下的流处理高吞吐,流处理引擎会动态地在操作日志和数据文件中选择处理对象,因此,流处理的用户编程接口要求用户提供下列两类事件的处理逻辑:操作日志写入事件 TabletInsertionEvent 和数据文件写入事件 TsFileInsertionEvent。</p><h4 id="操作日志写入事件-tabletinsertionevent" tabindex="-1"><a class="header-anchor" href="#操作日志写入事件-tabletinsertionevent"><span><strong>操作日志写入事件(TabletInsertionEvent)</strong></span></a></h4><p>操作日志写入事件(TabletInsertionEvent)是对用户写入请求的高层数据抽象,它通过提供统一的操作接口,为用户提供了操纵写入请求底层数据的能力。</p><p>对于不同的数据库部署方式,操作日志写入事件对应的底层存储结构是不一样的。对于单机部署的场景,操作日志写入事件是对写前日志(WAL)条目的封装;对于分布式部署的场景,操作日志写入事件是对单个节点共识协议操作日志条目的封装。</p><p>对于数据库不同写入请求接口生成的写入操作,操作日志写入事件对应的请求结构体的数据结构也是不一样的。IoTDB 提供了 InsertRecord、InsertRecords、InsertTablet、InsertTablets 等众多的写入接口,每一种写入请求都使用了完全不同的序列化方式,生成的二进制条目也不尽相同。</p><p>操作日志写入事件的存在,为用户提供了一种统一的数据操作视图,它屏蔽了底层数据结构的实现差异,极大地降低了用户的编程门槛,提升了功能的易用性。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token doc-comment comment">/** TabletInsertionEvent is used to define the event of data insertion. */</span> |
| <span class="token keyword">public</span> <span class="token keyword">interface</span> <span class="token class-name">TabletInsertionEvent</span> <span class="token keyword">extends</span> <span class="token class-name">Event</span> <span class="token punctuation">{</span> |
| |
| <span class="token doc-comment comment">/** |
| * The consumer processes the data row by row and collects the results by RowCollector. |
| * |
| * <span class="token keyword">@return</span> <span class="token punctuation">{</span><span class="token keyword">@code</span> <span class="token code-section"><span class="token code language-java"><span class="token class-name">Iterable</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">></span></span></span></span><span class="token punctuation">}</span> a list of new TabletInsertionEvent contains the |
| * results collected by the RowCollector |
| */</span> |
| <span class="token class-name">Iterable</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">></span></span> <span class="token function">processRowByRow</span><span class="token punctuation">(</span><span class="token class-name">BiConsumer</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">Row</span><span class="token punctuation">,</span> <span class="token class-name">RowCollector</span><span class="token punctuation">></span></span> consumer<span class="token punctuation">)</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * The consumer processes the Tablet directly and collects the results by RowCollector. |
| * |
| * <span class="token keyword">@return</span> <span class="token punctuation">{</span><span class="token keyword">@code</span> <span class="token code-section"><span class="token code language-java"><span class="token class-name">Iterable</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">></span></span></span></span><span class="token punctuation">}</span> a list of new TabletInsertionEvent contains the |
| * results collected by the RowCollector |
| */</span> |
| <span class="token class-name">Iterable</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">></span></span> <span class="token function">processTablet</span><span class="token punctuation">(</span><span class="token class-name">BiConsumer</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">Tablet</span><span class="token punctuation">,</span> <span class="token class-name">RowCollector</span><span class="token punctuation">></span></span> consumer<span class="token punctuation">)</span><span class="token punctuation">;</span> |
| <span class="token punctuation">}</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h4 id="数据文件写入事件-tsfileinsertionevent" tabindex="-1"><a class="header-anchor" href="#数据文件写入事件-tsfileinsertionevent"><span><strong>数据文件写入事件(TsFileInsertionEvent)</strong></span></a></h4><p>数据文件写入事件(TsFileInsertionEvent) 是对数据库文件落盘操作的高层抽象,它是若干操作日志写入事件(TabletInsertionEvent)的数据集合。</p><p>IoTDB 的存储引擎是 LSM 结构的。数据写入时会先将写入操作落盘到日志结构的文件里,同时将写入数据保存在内存里。当内存达到控制上限,则会触发刷盘行为,即将内存中的数据转换为数据库文件,同时删除之前预写的操作日志。当内存中的数据转换为数据库文件中的数据时,会经过编码压缩和通用压缩两次压缩处理,因此数据库文件的数据相比内存中的原始数据占用的空间更少。</p><p>在极端的网络情况下,直接传输数据文件相比传输数据写入的操作要更加经济,它会占用更低的网络带宽,能实现更快的传输速度。当然,天下没有免费的午餐,对文件中的数据进行计算处理,相比直接对内存中的数据进行计算处理时,需要额外付出文件 I/O 的代价。但是,正是磁盘数据文件和内存写入操作两种结构各有优劣的存在,给了系统做动态权衡调整的机会,也正是基于这样的观察,插件的事件模型中才引入了数据文件写入事件。</p><p>综上,数据文件写入事件出现在流处理插件的事件流中,存在下面两种情况:</p><p>(1)历史数据抽取:一个流处理任务开始前,所有已经落盘的写入数据都会以 TsFile 的形式存在。一个流处理任务开始后,采集历史数据时,历史数据将以 TsFileInsertionEvent 作为抽象;</p><p>(2)实时数据抽取:一个流处理任务进行时,当数据流中实时处理操作日志写入事件的速度慢于写入请求速度一定进度之后,未来得及处理的操作日志写入事件会被被持久化至磁盘,以 TsFile 的形式存在,这一些数据被流处理引擎抽取到后,会以 TsFileInsertionEvent 作为抽象。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token doc-comment comment">/** |
| * TsFileInsertionEvent is used to define the event of writing TsFile. Event data stores in disks, |
| * which is compressed and encoded, and requires IO cost for computational processing. |
| */</span> |
| <span class="token keyword">public</span> <span class="token keyword">interface</span> <span class="token class-name">TsFileInsertionEvent</span> <span class="token keyword">extends</span> <span class="token class-name">Event</span> <span class="token punctuation">{</span> |
| |
| <span class="token doc-comment comment">/** |
| * The method is used to convert the TsFileInsertionEvent into several TabletInsertionEvents. |
| * |
| * <span class="token keyword">@return</span> <span class="token punctuation">{</span><span class="token keyword">@code</span> <span class="token code-section"><span class="token code language-java"><span class="token class-name">Iterable</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">></span></span></span></span><span class="token punctuation">}</span> the list of TabletInsertionEvent |
| */</span> |
| <span class="token class-name">Iterable</span><span class="token generics"><span class="token punctuation"><</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">></span></span> <span class="token function">toTabletInsertionEvents</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span> |
| <span class="token punctuation">}</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h3 id="自定义流处理插件编程接口定义" tabindex="-1"><a class="header-anchor" href="#自定义流处理插件编程接口定义"><span>自定义流处理插件编程接口定义</span></a></h3><p>基于自定义流处理插件编程接口,用户可以轻松编写数据抽取插件、数据处理插件和数据发送插件,从而使得流处理功能灵活适配各种工业场景。</p><h4 id="数据抽取插件接口" tabindex="-1"><a class="header-anchor" href="#数据抽取插件接口"><span>数据抽取插件接口</span></a></h4><p>数据抽取是流处理数据从数据抽取到数据发送三阶段的第一阶段。数据抽取插件(PipeExtractor)是流处理引擎和存储引擎的桥梁,它通过监听存储引擎的行为,<br> 捕获各种数据写入事件。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token doc-comment comment">/** |
| * PipeExtractor |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>PipeExtractor is responsible for capturing events from sources. |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>Various data sources can be supported by implementing different PipeExtractor classes. |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>The lifecycle of a PipeExtractor is as follows: |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>When a collaboration task is created, the KV pairs of \`WITH EXTRACTOR\` clause in SQL are |
| * parsed and the validation method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> |
| * will be called to validate the parameters. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Before the collaboration task starts, the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeExtractorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will be called |
| * to config the runtime behavior of the PipeExtractor. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Then the method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">start</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will be called to start the PipeExtractor. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>While the collaboration task is in progress, the method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">supply</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will |
| * be called to capture events from sources and then the events will be passed to the |
| * PipeProcessor. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>The method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">close</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will be called when the collaboration task is |
| * cancelled (the \`DROP PIPE\` command is executed). |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| */</span> |
| <span class="token keyword">public</span> <span class="token keyword">interface</span> <span class="token class-name">PipeExtractor</span> <span class="token keyword">extends</span> <span class="token class-name">PipePlugin</span> <span class="token punctuation">{</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is mainly used to validate <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeParameters</span></span><span class="token punctuation">}</span> and it is executed before <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeExtractorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">validator</span> the validator used to validate <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeParameters</span></span><span class="token punctuation">}</span> |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> if any parameter is not valid |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span> validator<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is mainly used to customize PipeExtractor. In this method, the user can do the |
| * following things: |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Use PipeParameters to parse key-value pair attributes entered by the user. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Set the running configurations in PipeExtractorRuntimeConfiguration. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>This method is called after the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">parameters</span> used to parse the input parameters entered by the user |
| * <span class="token keyword">@param</span> <span class="token parameter">configuration</span> used to set the required properties of the running PipeExtractor |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">PipeExtractorRuntimeConfiguration</span> configuration<span class="token punctuation">)</span> |
| <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * Start the extractor. After this method is called, events should be ready to be supplied by |
| * <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">supply</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span>. This method is called after <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeExtractorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called. |
| * |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">start</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * Supply single event from the extractor and the caller will send the event to the processor. |
| * This method is called after <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeExtractor</span><span class="token punctuation">#</span><span class="token function">start</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called. |
| * |
| * <span class="token keyword">@return</span> the event to be supplied. the event may be null if the extractor has no more events at |
| * the moment, but the extractor is still running for more events. |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token class-name">Event</span> <span class="token function">supply</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| <span class="token punctuation">}</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h4 id="数据处理插件接口" tabindex="-1"><a class="header-anchor" href="#数据处理插件接口"><span>数据处理插件接口</span></a></h4><p>数据处理是流处理数据从数据抽取到数据发送三阶段的第二阶段。数据处理插件(PipeProcessor)主要用于过滤和转换由数据抽取插件(PipeExtractor)捕获的<br> 各种事件。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token doc-comment comment">/** |
| * PipeProcessor |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>PipeProcessor is used to filter and transform the Event formed by the PipeExtractor. |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>The lifecycle of a PipeProcessor is as follows: |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>When a collaboration task is created, the KV pairs of \`WITH PROCESSOR\` clause in SQL are |
| * parsed and the validation method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> |
| * will be called to validate the parameters. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Before the collaboration task starts, the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeProcessorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will be called |
| * to config the runtime behavior of the PipeProcessor. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>While the collaboration task is in progress: |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>PipeExtractor captures the events and wraps them into three types of Event instances. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>PipeProcessor processes the event and then passes them to the PipeConnector. The |
| * following 3 methods will be called: <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">process</span><span class="token punctuation">(</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">,</span> <span class="token class-name">EventCollector</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span>, <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">process</span><span class="token punctuation">(</span><span class="token class-name">TsFileInsertionEvent</span><span class="token punctuation">,</span> <span class="token class-name">EventCollector</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> and <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">process</span><span class="token punctuation">(</span><span class="token class-name">Event</span><span class="token punctuation">,</span> <span class="token class-name">EventCollector</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span>. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>PipeConnector serializes the events into binaries and send them to sinks. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>When the collaboration task is cancelled (the \`DROP PIPE\` command is executed), the <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">close</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span> <span class="token punctuation">}</span> method will be called. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| */</span> |
| <span class="token keyword">public</span> <span class="token keyword">interface</span> <span class="token class-name">PipeProcessor</span> <span class="token keyword">extends</span> <span class="token class-name">PipePlugin</span> <span class="token punctuation">{</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is mainly used to validate <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeParameters</span></span><span class="token punctuation">}</span> and it is executed before <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeProcessorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">validator</span> the validator used to validate <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeParameters</span></span><span class="token punctuation">}</span> |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> if any parameter is not valid |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span> validator<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is mainly used to customize PipeProcessor. In this method, the user can do the |
| * following things: |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Use PipeParameters to parse key-value pair attributes entered by the user. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Set the running configurations in PipeProcessorRuntimeConfiguration. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>This method is called after the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeProcessor</span><span class="token punctuation">#</span><span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called and before the beginning of the |
| * events processing. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">parameters</span> used to parse the input parameters entered by the user |
| * <span class="token keyword">@param</span> <span class="token parameter">configuration</span> used to set the required properties of the running PipeProcessor |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">PipeProcessorRuntimeConfiguration</span> configuration<span class="token punctuation">)</span> |
| <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is called to process the TabletInsertionEvent. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">tabletInsertionEvent</span> TabletInsertionEvent to be processed |
| * <span class="token keyword">@param</span> <span class="token parameter">eventCollector</span> used to collect result events after processing |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">process</span><span class="token punctuation">(</span><span class="token class-name">TabletInsertionEvent</span> tabletInsertionEvent<span class="token punctuation">,</span> <span class="token class-name">EventCollector</span> eventCollector<span class="token punctuation">)</span> |
| <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is called to process the TsFileInsertionEvent. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">tsFileInsertionEvent</span> TsFileInsertionEvent to be processed |
| * <span class="token keyword">@param</span> <span class="token parameter">eventCollector</span> used to collect result events after processing |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">default</span> <span class="token keyword">void</span> <span class="token function">process</span><span class="token punctuation">(</span><span class="token class-name">TsFileInsertionEvent</span> tsFileInsertionEvent<span class="token punctuation">,</span> <span class="token class-name">EventCollector</span> eventCollector<span class="token punctuation">)</span> |
| <span class="token keyword">throws</span> <span class="token class-name">Exception</span> <span class="token punctuation">{</span> |
| <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">final</span> <span class="token class-name">TabletInsertionEvent</span> tabletInsertionEvent <span class="token operator">:</span> |
| tsFileInsertionEvent<span class="token punctuation">.</span><span class="token function">toTabletInsertionEvents</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token punctuation">{</span> |
| <span class="token function">process</span><span class="token punctuation">(</span>tabletInsertionEvent<span class="token punctuation">,</span> eventCollector<span class="token punctuation">)</span><span class="token punctuation">;</span> |
| <span class="token punctuation">}</span> |
| <span class="token punctuation">}</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is called to process the Event. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">event</span> Event to be processed |
| * <span class="token keyword">@param</span> <span class="token parameter">eventCollector</span> used to collect result events after processing |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">process</span><span class="token punctuation">(</span><span class="token class-name">Event</span> event<span class="token punctuation">,</span> <span class="token class-name">EventCollector</span> eventCollector<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| <span class="token punctuation">}</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h4 id="数据发送插件接口" tabindex="-1"><a class="header-anchor" href="#数据发送插件接口"><span>数据发送插件接口</span></a></h4><p>数据发送是流处理数据从数据抽取到数据发送三阶段的第三阶段。数据发送插件(PipeConnector)主要用于发送经由数据处理插件(PipeProcessor)处理过后的<br> 各种事件,它作为流处理框架的网络实现层,接口上应允许接入多种实时通信协议和多种连接器。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token doc-comment comment">/** |
| * PipeConnector |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>PipeConnector is responsible for sending events to sinks. |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>Various network protocols can be supported by implementing different PipeConnector classes. |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>The lifecycle of a PipeConnector is as follows: |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>When a collaboration task is created, the KV pairs of \`WITH CONNECTOR\` clause in SQL are |
| * parsed and the validation method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> |
| * will be called to validate the parameters. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Before the collaboration task starts, the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeConnectorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will be called |
| * to config the runtime behavior of the PipeConnector and the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">handshake</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will be called to create a connection with sink. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>While the collaboration task is in progress: |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>PipeExtractor captures the events and wraps them into three types of Event instances. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>PipeProcessor processes the event and then passes them to the PipeConnector. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>PipeConnector serializes the events into binaries and send them to sinks. The |
| * following 3 methods will be called: <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">transfer</span><span class="token punctuation">(</span><span class="token class-name">TabletInsertionEvent</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span>, <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">transfer</span><span class="token punctuation">(</span><span class="token class-name">TsFileInsertionEvent</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> and <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">transfer</span><span class="token punctuation">(</span><span class="token class-name">Event</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span>. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>When the collaboration task is cancelled (the \`DROP PIPE\` command is executed), the <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">close</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span> <span class="token punctuation">}</span> method will be called. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>In addition, the method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">heartbeat</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> will be called periodically to check |
| * whether the connection with sink is still alive. The method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">handshake</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> |
| * will be called to create a new connection with the sink when the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">heartbeat</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> throws exceptions. |
| */</span> |
| <span class="token keyword">public</span> <span class="token keyword">interface</span> <span class="token class-name">PipeConnector</span> <span class="token keyword">extends</span> <span class="token class-name">PipePlugin</span> <span class="token punctuation">{</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is mainly used to validate <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeParameters</span></span><span class="token punctuation">}</span> and it is executed before <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeConnectorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">validator</span> the validator used to validate <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeParameters</span></span><span class="token punctuation">}</span> |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> if any parameter is not valid |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span> validator<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is mainly used to customize PipeConnector. In this method, the user can do the |
| * following things: |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>ul</span><span class="token punctuation">></span></span> |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Use PipeParameters to parse key-value pair attributes entered by the user. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>li</span><span class="token punctuation">></span></span>Set the running configurations in PipeConnectorRuntimeConfiguration. |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"></</span>ul</span><span class="token punctuation">></span></span> |
| * |
| * <span class="token tag"><span class="token tag"><span class="token punctuation"><</span>p</span><span class="token punctuation">></span></span>This method is called after the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">validate</span><span class="token punctuation">(</span><span class="token class-name">PipeParameterValidator</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called and before the method <span class="token punctuation">{</span><span class="token keyword">@link</span> |
| * <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">handshake</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is called. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">parameters</span> used to parse the input parameters entered by the user |
| * <span class="token keyword">@param</span> <span class="token parameter">configuration</span> used to set the required properties of the running PipeConnector |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">PipeConnectorRuntimeConfiguration</span> configuration<span class="token punctuation">)</span> |
| <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is used to create a connection with sink. This method will be called after the |
| * method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">customize</span><span class="token punctuation">(</span><span class="token class-name">PipeParameters</span><span class="token punctuation">,</span> <span class="token class-name">PipeConnectorRuntimeConfiguration</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> is |
| * called or will be called when the method <span class="token punctuation">{</span><span class="token keyword">@link</span> <span class="token reference"><span class="token class-name">PipeConnector</span><span class="token punctuation">#</span><span class="token function">heartbeat</span><span class="token punctuation">(</span><span class="token punctuation">)</span></span><span class="token punctuation">}</span> throws exceptions. |
| * |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> if the connection is failed to be created |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">handshake</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method will be called periodically to check whether the connection with sink is still |
| * alive. |
| * |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> if the connection dies |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">heartbeat</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is used to transfer the TabletInsertionEvent. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">tabletInsertionEvent</span> TabletInsertionEvent to be transferred |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">PipeConnectionException</span></span> if the connection is broken |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">transfer</span><span class="token punctuation">(</span><span class="token class-name">TabletInsertionEvent</span> tabletInsertionEvent<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is used to transfer the TsFileInsertionEvent. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">tsFileInsertionEvent</span> TsFileInsertionEvent to be transferred |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">PipeConnectionException</span></span> if the connection is broken |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">default</span> <span class="token keyword">void</span> <span class="token function">transfer</span><span class="token punctuation">(</span><span class="token class-name">TsFileInsertionEvent</span> tsFileInsertionEvent<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span> <span class="token punctuation">{</span> |
| <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">final</span> <span class="token class-name">TabletInsertionEvent</span> tabletInsertionEvent <span class="token operator">:</span> |
| tsFileInsertionEvent<span class="token punctuation">.</span><span class="token function">toTabletInsertionEvents</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token punctuation">{</span> |
| <span class="token function">transfer</span><span class="token punctuation">(</span>tabletInsertionEvent<span class="token punctuation">)</span><span class="token punctuation">;</span> |
| <span class="token punctuation">}</span> |
| <span class="token punctuation">}</span> |
| |
| <span class="token doc-comment comment">/** |
| * This method is used to transfer the Event. |
| * |
| * <span class="token keyword">@param</span> <span class="token parameter">event</span> Event to be transferred |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">PipeConnectionException</span></span> if the connection is broken |
| * <span class="token keyword">@throws</span> <span class="token reference"><span class="token class-name">Exception</span></span> the user can throw errors if necessary |
| */</span> |
| <span class="token keyword">void</span> <span class="token function">transfer</span><span class="token punctuation">(</span><span class="token class-name">Event</span> event<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span><span class="token punctuation">;</span> |
| <span class="token punctuation">}</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h2 id="自定义流处理插件管理" tabindex="-1"><a class="header-anchor" href="#自定义流处理插件管理"><span>自定义流处理插件管理</span></a></h2><p>为了保证用户自定义插件在实际生产中的灵活性和易用性,系统还需要提供对插件进行动态统一管理的能力。<br> 本章节介绍的流处理插件管理语句提供了对插件进行动态统一管理的入口。</p><h3 id="加载插件语句" tabindex="-1"><a class="header-anchor" href="#加载插件语句"><span>加载插件语句</span></a></h3><p>在 IoTDB 中,若要在系统中动态载入一个用户自定义插件,则首先需要基于 PipeExtractor、 PipeProcessor 或者 PipeConnector 实现一个具体的插件类,<br> 然后需要将插件类编译打包成 jar 可执行文件,最后使用加载插件的管理语句将插件载入 IoTDB。</p><p>加载插件的管理语句的语法如图所示。</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">CREATE</span> PIPEPLUGIN <span class="token operator"><</span>别名<span class="token operator">></span> |
| <span class="token keyword">AS</span> <span class="token operator"><</span>全类名<span class="token operator">></span> |
| <span class="token keyword">USING</span> <span class="token operator"><</span>JAR 包的 URI<span class="token operator">></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div>`,45),m=n("br",null,null,-1),b={href:"https://example.com:8080/iotdb/pipe-plugin.jar",target:"_blank",rel:"noopener noreferrer"},h=n("br",null,null,-1),g=p(`<div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">CREATE</span> PIPEPLUGIN example |
| <span class="token keyword">AS</span> <span class="token string">'edu.tsinghua.iotdb.pipe.ExampleProcessor'</span> |
| <span class="token keyword">USING</span> URI <span class="token string">'<https://example.com:8080/iotdb/pipe-plugin.jar>'</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h3 id="删除插件语句" tabindex="-1"><a class="header-anchor" href="#删除插件语句"><span>删除插件语句</span></a></h3><p>当用户不再想使用一个插件,需要将插件从系统中卸载时,可以使用如图所示的删除插件语句。</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">DROP</span> PIPEPLUGIN <span class="token operator"><</span>别名<span class="token operator">></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><h3 id="查看插件语句" tabindex="-1"><a class="header-anchor" href="#查看插件语句"><span>查看插件语句</span></a></h3><p>用户也可以按需查看系统中的插件。查看插件的语句如图所示。</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SHOW</span> PIPEPLUGINS |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><h2 id="系统预置的流处理插件" tabindex="-1"><a class="header-anchor" href="#系统预置的流处理插件"><span>系统预置的流处理插件</span></a></h2><h3 id="预置-extractor-插件" tabindex="-1"><a class="header-anchor" href="#预置-extractor-插件"><span>预置 extractor 插件</span></a></h3><h4 id="iotdb-extractor" tabindex="-1"><a class="header-anchor" href="#iotdb-extractor"><span>iotdb-extractor</span></a></h4><p>作用:抽取 IoTDB 内部的历史或实时数据进入 pipe。</p><table><thead><tr><th>key</th><th>value</th><th>value 取值范围</th><th>required or optional with default</th></tr></thead><tbody><tr><td>extractor</td><td>iotdb-extractor</td><td>String: iotdb-extractor</td><td>required</td></tr><tr><td>extractor.pattern</td><td>用于筛选时间序列的路径前缀</td><td>String: 任意的时间序列前缀</td><td>optional: root</td></tr><tr><td>extractor.history.enable</td><td>是否抽取历史数据</td><td>Boolean: true, false</td><td>optional: true</td></tr><tr><td>extractor.history.start-time</td><td>抽取的历史数据的开始 event time,包含 start-time</td><td>Long: [Long.MIN_VALUE, Long.MAX_VALUE]</td><td>optional: Long.MIN_VALUE</td></tr><tr><td>extractor.history.end-time</td><td>抽取的历史数据的结束 event time,包含 end-time</td><td>Long: [Long.MIN_VALUE, Long.MAX_VALUE]</td><td>optional: Long.MAX_VALUE</td></tr><tr><td>extractor.realtime.enable</td><td>是否抽取实时数据</td><td>Boolean: true, false</td><td>optional: true</td></tr></tbody></table>`,12),f=n("p",null,[s("🚫 "),n("strong",null,"extractor.pattern 参数说明")],-1),P={href:"https://iotdb.apache.org/zh/Download/#_1-0-%E7%89%88%E6%9C%AC%E4%B8%8D%E5%85%BC%E5%AE%B9%E7%9A%84%E8%AF%AD%E6%B3%95%E8%AF%A6%E7%BB%86%E8%AF%B4%E6%98%8E",target:"_blank",rel:"noopener noreferrer"},E=p("<li><p>在底层实现中,当检测到 pattern 为 root(默认值)时,抽取效率较高,其他任意格式都将降低性能</p></li><li><p>路径前缀不需要能够构成完整的路径。例如,当创建一个包含参数为 'extractor.pattern'='root.aligned.1' 的 pipe 时:</p><ul><li>root.aligned.1TS</li><li>root.aligned.1TS.`1`</li><li>root.aligned.100T</li></ul><p>的数据会被抽取;</p><ul><li>root.aligned.`1`</li><li>root.aligned.`123`</li></ul><p>的数据不会被抽取。</p></li>",2),w=p(`<blockquote><p>❗️<strong>extractor.history 的 start-time,end-time 参数说明</strong></p><ul><li>start-time,end-time 应为 ISO 格式,例如 2011-12-03T10:15:30 或 2011-12-03T10:15:30+01:00</li></ul></blockquote><blockquote><p>✅ <strong>一条数据从生产到落库 IoTDB,包含两个关键的时间概念</strong></p><ul><li><strong>event time:</strong> 数据实际生产时的时间(或者数据生产系统给数据赋予的生成时间,是数据点中的时间项),也称为事件时间。</li><li><strong>arrival time:</strong> 数据到达 IoTDB 系统内的时间。</li></ul><p>我们常说的乱序数据,指的是数据到达时,其 <strong>event time</strong> 远落后于当前系统时间(或者已经落库的最大 <strong>event time</strong>)的数据。另一方面,不论是乱序数据还是顺序数据,只要它们是新到达系统的,那它们的 <strong>arrival time</strong> 都是会随着数据到达 IoTDB 的顺序递增的。</p></blockquote><blockquote><p>💎 <strong>iotdb-extractor 的工作可以拆分成两个阶段</strong></p><ol><li>历史数据抽取:所有 <strong>arrival time</strong> < 创建 pipe 时<strong>当前系统时间</strong>的数据称为历史数据</li><li>实时数据抽取:所有 <strong>arrival time</strong> >= 创建 pipe 时<strong>当前系统时间</strong>的数据称为实时数据</li></ol><p>历史数据传输阶段和实时数据传输阶段,<strong>两阶段串行执行,只有当历史数据传输阶段完成后,才执行实时数据传输阶段。</strong></p><p>用户可以指定 iotdb-extractor 进行:</p><ul><li>历史数据抽取(<code>'extractor.history.enable' = 'true'</code>, <code>'extractor.realtime.enable' = 'false'</code> )</li><li>实时数据抽取(<code>'extractor.history.enable' = 'false'</code>, <code>'extractor.realtime.enable' = 'true'</code> )</li><li>全量数据抽取(<code>'extractor.history.enable' = 'true'</code>, <code>'extractor.realtime.enable' = 'true'</code> )</li><li>禁止同时设置 <code>extractor.history.enable</code> 和 <code>extractor.realtime.enable</code> 为 <code>false</code></li></ul></blockquote><h3 id="预置-processor-插件" tabindex="-1"><a class="header-anchor" href="#预置-processor-插件"><span>预置 processor 插件</span></a></h3><h4 id="do-nothing-processor" tabindex="-1"><a class="header-anchor" href="#do-nothing-processor"><span>do-nothing-processor</span></a></h4><p>作用:不对 extractor 传入的事件做任何的处理。</p><table><thead><tr><th>key</th><th>value</th><th>value 取值范围</th><th>required or optional with default</th></tr></thead><tbody><tr><td>processor</td><td>do-nothing-processor</td><td>String: do-nothing-processor</td><td>required</td></tr></tbody></table><h3 id="预置-connector-插件" tabindex="-1"><a class="header-anchor" href="#预置-connector-插件"><span>预置 connector 插件</span></a></h3><h4 id="do-nothing-connector" tabindex="-1"><a class="header-anchor" href="#do-nothing-connector"><span>do-nothing-connector</span></a></h4><p>作用:不对 processor 传入的事件做任何的处理。</p><table><thead><tr><th>key</th><th>value</th><th>value 取值范围</th><th>required or optional with default</th></tr></thead><tbody><tr><td>connector</td><td>do-nothing-connector</td><td>String: do-nothing-connector</td><td>required</td></tr></tbody></table><h2 id="流处理任务管理" tabindex="-1"><a class="header-anchor" href="#流处理任务管理"><span>流处理任务管理</span></a></h2><h3 id="创建流处理任务" tabindex="-1"><a class="header-anchor" href="#创建流处理任务"><span>创建流处理任务</span></a></h3><p>使用 <code>CREATE PIPE</code> 语句来创建流处理任务。以数据同步流处理任务的创建为例,示例 SQL 语句如下:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">CREATE</span> PIPE <span class="token operator"><</span>PipeId<span class="token operator">></span> <span class="token comment">-- PipeId 是能够唯一标定流处理任务的名字</span> |
| <span class="token keyword">WITH</span> EXTRACTOR <span class="token punctuation">(</span> |
| <span class="token comment">-- 默认的 IoTDB 数据抽取插件</span> |
| <span class="token string">'extractor'</span> <span class="token operator">=</span> <span class="token string">'iotdb-extractor'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 路径前缀,只有能够匹配该路径前缀的数据才会被抽取,用作后续的处理和发送</span> |
| <span class="token string">'extractor.pattern'</span> <span class="token operator">=</span> <span class="token string">'root.timecho'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 是否抽取历史数据</span> |
| <span class="token string">'extractor.history.enable'</span> <span class="token operator">=</span> <span class="token string">'true'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 描述被抽取的历史数据的时间范围,表示最早时间</span> |
| <span class="token string">'extractor.history.start-time'</span> <span class="token operator">=</span> <span class="token string">'2011.12.03T10:15:30+01:00'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 描述被抽取的历史数据的时间范围,表示最晚时间</span> |
| <span class="token string">'extractor.history.end-time'</span> <span class="token operator">=</span> <span class="token string">'2022.12.03T10:15:30+01:00'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 是否抽取实时数据</span> |
| <span class="token string">'extractor.realtime.enable'</span> <span class="token operator">=</span> <span class="token string">'true'</span><span class="token punctuation">,</span> |
| <span class="token punctuation">)</span> |
| <span class="token keyword">WITH</span> PROCESSOR <span class="token punctuation">(</span> |
| <span class="token comment">-- 默认的数据处理插件,即不做任何处理</span> |
| <span class="token string">'processor'</span> <span class="token operator">=</span> <span class="token string">'do-nothing-processor'</span><span class="token punctuation">,</span> |
| <span class="token punctuation">)</span> |
| <span class="token keyword">WITH</span> CONNECTOR <span class="token punctuation">(</span> |
| <span class="token comment">-- IoTDB 数据发送插件,目标端为 IoTDB</span> |
| <span class="token string">'connector'</span> <span class="token operator">=</span> <span class="token string">'iotdb-thrift-connector'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 目标端 IoTDB 其中一个 DataNode 节点的数据服务 ip</span> |
| <span class="token string">'connector.ip'</span> <span class="token operator">=</span> <span class="token string">'127.0.0.1'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 目标端 IoTDB 其中一个 DataNode 节点的数据服务 port</span> |
| <span class="token string">'connector.port'</span> <span class="token operator">=</span> <span class="token string">'6667'</span><span class="token punctuation">,</span> |
| <span class="token punctuation">)</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p><strong>创建流处理任务时需要配置 PipeId 以及三个插件部分的参数:</strong></p>`,16),y=n("thead",null,[n("tr",null,[n("th",null,"配置项"),n("th",null,"说明"),n("th",null,"是否必填"),n("th",null,"默认实现"),n("th",null,"默认实现说明"),n("th",null,"是否允许自定义实现")])],-1),x=n("td",null,"PipeId",-1),T=n("td",null,"全局唯一标定一个流处理任务的名称",-1),I=n("td",null,"-",-1),C=n("td",null,"-",-1),_=n("td",null,"-",-1),R=n("tr",null,[n("td",null,"extractor"),n("td",null,"Pipe Extractor 插件,负责在数据库底层抽取流处理数据"),n("td",null,"选填"),n("td",null,"iotdb-extractor"),n("td",null,"将数据库的全量历史数据和后续到达的实时数据接入流处理任务"),n("td",null,"否")],-1),q=n("td",null,"processor",-1),S=n("td",null,"Pipe Processor 插件,负责处理数据",-1),O=n("td",null,"选填",-1),D=n("td",null,"do-nothing-processor",-1),N=n("td",null,"对传入的数据不做任何处理",-1),B=n("td",null,"connector",-1),A=n("td",null,"Pipe Connector 插件,负责发送数据",-1),U=n("td",null,"-",-1),j=n("td",null,"-",-1),F=p(`<p>示例中,使用了 iotdb-extractor、do-nothing-processor 和 iotdb-thrift-connector 插件构建数据流处理任务。IoTDB 还内置了其他的流处理插件,<strong>请查看“系统预置流处理插件”一节</strong>。</p><p><strong>一个最简的 CREATE PIPE 语句示例如下:</strong></p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">CREATE</span> PIPE <span class="token operator"><</span>PipeId<span class="token operator">></span> <span class="token comment">-- PipeId 是能够唯一标定流处理任务的名字</span> |
| <span class="token keyword">WITH</span> CONNECTOR <span class="token punctuation">(</span> |
| <span class="token comment">-- IoTDB 数据发送插件,目标端为 IoTDB</span> |
| <span class="token string">'connector'</span> <span class="token operator">=</span> <span class="token string">'iotdb-thrift-connector'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 目标端 IoTDB 其中一个 DataNode 节点的数据服务 ip</span> |
| <span class="token string">'connector.ip'</span> <span class="token operator">=</span> <span class="token string">'127.0.0.1'</span><span class="token punctuation">,</span> |
| <span class="token comment">-- 目标端 IoTDB 其中一个 DataNode 节点的数据服务 port</span> |
| <span class="token string">'connector.port'</span> <span class="token operator">=</span> <span class="token string">'6667'</span><span class="token punctuation">,</span> |
| <span class="token punctuation">)</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p>其表达的语义是:将本数据库实例中的全量历史数据和后续到达的实时数据,同步到目标为 127.0.0.1:6667 的 IoTDB 实例上。</p><p><strong>注意:</strong></p><ul><li><p>EXTRACTOR 和 PROCESSOR 为选填配置,若不填写配置参数,系统则会采用相应的默认实现</p></li><li><p>CONNECTOR 为必填配置,需要在 CREATE PIPE 语句中声明式配置</p></li><li><p>CONNECTOR 具备自复用能力。对于不同的流处理任务,如果他们的 CONNECTOR 具备完全相同 KV 属性的(所有属性的 key 对应的 value 都相同),<strong>那么系统最终只会创建一个 CONNECTOR 实例</strong>,以实现对连接资源的复用。</p><ul><li>例如,有下面 pipe1, pipe2 两个流处理任务的声明:</li></ul><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">CREATE</span> PIPE pipe1 |
| <span class="token keyword">WITH</span> CONNECTOR <span class="token punctuation">(</span> |
| <span class="token string">'connector'</span> <span class="token operator">=</span> <span class="token string">'iotdb-thrift-connector'</span><span class="token punctuation">,</span> |
| <span class="token string">'connector.thrift.host'</span> <span class="token operator">=</span> <span class="token string">'localhost'</span><span class="token punctuation">,</span> |
| <span class="token string">'connector.thrift.port'</span> <span class="token operator">=</span> <span class="token string">'9999'</span><span class="token punctuation">,</span> |
| <span class="token punctuation">)</span> |
| |
| <span class="token keyword">CREATE</span> PIPE pipe2 |
| <span class="token keyword">WITH</span> CONNECTOR <span class="token punctuation">(</span> |
| <span class="token string">'connector'</span> <span class="token operator">=</span> <span class="token string">'iotdb-thrift-connector'</span><span class="token punctuation">,</span> |
| <span class="token string">'connector.thrift.port'</span> <span class="token operator">=</span> <span class="token string">'9999'</span><span class="token punctuation">,</span> |
| <span class="token string">'connector.thrift.host'</span> <span class="token operator">=</span> <span class="token string">'localhost'</span><span class="token punctuation">,</span> |
| <span class="token punctuation">)</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><ul><li>因为它们对 CONNECTOR 的声明完全相同(<strong>即使某些属性声明时的顺序不同</strong>),所以框架会自动对它们声明的 CONNECTOR 进行复用,最终 pipe1, pipe2 的CONNECTOR 将会是同一个实例。</li></ul></li><li><p>请不要构建出包含数据循环同步的应用场景(会导致无限循环):</p><ul><li>IoTDB A -> IoTDB B -> IoTDB A</li><li>IoTDB A -> IoTDB A</li></ul></li></ul><h3 id="启动流处理任务" tabindex="-1"><a class="header-anchor" href="#启动流处理任务"><span>启动流处理任务</span></a></h3><p>CREATE PIPE 语句成功执行后,流处理任务相关实例会被创建,但整个流处理任务的运行状态会被置为 STOPPED,即流处理任务不会立刻处理数据。</p><p>可以使用 START PIPE 语句使流处理任务开始处理数据:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">START</span> PIPE <span class="token operator"><</span>PipeId<span class="token operator">></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><h3 id="停止流处理任务" tabindex="-1"><a class="header-anchor" href="#停止流处理任务"><span>停止流处理任务</span></a></h3><p>使用 STOP PIPE 语句使流处理任务停止处理数据:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code>STOP PIPE <span class="token operator"><</span>PipeId<span class="token operator">></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><h3 id="删除流处理任务" tabindex="-1"><a class="header-anchor" href="#删除流处理任务"><span>删除流处理任务</span></a></h3><p>使用 DROP PIPE 语句使流处理任务停止处理数据(当流处理任务状态为 RUNNING 时),然后删除整个流处理任务流处理任务:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">DROP</span> PIPE <span class="token operator"><</span>PipeId<span class="token operator">></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><p>用户在删除流处理任务前,不需要执行 STOP 操作。</p><h3 id="展示流处理任务" tabindex="-1"><a class="header-anchor" href="#展示流处理任务"><span>展示流处理任务</span></a></h3><p>使用 SHOW PIPES 语句查看所有流处理任务:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SHOW</span> PIPES |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><p>查询结果如下:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token operator">+</span><span class="token comment">-----------+-----------------------+-------+-------------+-------------+-------------+----------------+</span> |
| <span class="token operator">|</span> ID<span class="token operator">|</span> CreationTime <span class="token operator">|</span> State<span class="token operator">|</span>PipeExtractor<span class="token operator">|</span>PipeProcessor<span class="token operator">|</span>PipeConnector<span class="token operator">|</span>ExceptionMessage<span class="token operator">|</span> |
| <span class="token operator">+</span><span class="token comment">-----------+-----------------------+-------+-------------+-------------+-------------+----------------+</span> |
| <span class="token operator">|</span>iotdb<span class="token operator">-</span>kafka<span class="token operator">|</span><span class="token number">2022</span><span class="token operator">-</span><span class="token number">03</span><span class="token operator">-</span><span class="token number">30</span>T20:<span class="token number">58</span>:<span class="token number">30.689</span><span class="token operator">|</span>RUNNING<span class="token operator">|</span> <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token operator">|</span> <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token operator">|</span> <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token operator">|</span> None<span class="token operator">|</span> |
| <span class="token operator">+</span><span class="token comment">-----------+-----------------------+-------+-------------+-------------+-------------+----------------+</span> |
| <span class="token operator">|</span>iotdb<span class="token operator">-</span>iotdb<span class="token operator">|</span><span class="token number">2022</span><span class="token operator">-</span><span class="token number">03</span><span class="token operator">-</span><span class="token number">31</span>T12:<span class="token number">55</span>:<span class="token number">28.129</span><span class="token operator">|</span>STOPPED<span class="token operator">|</span> <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token operator">|</span> <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token operator">|</span> <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token operator">|</span> TException: <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token operator">|</span> |
| <span class="token operator">+</span><span class="token comment">-----------+-----------------------+-------+-------------+-------------+-------------+----------------+</span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p>可以使用 <code><PipeId></code> 指定想看的某个流处理任务状态:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SHOW</span> PIPE <span class="token operator"><</span>PipeId<span class="token operator">></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><p>您也可以通过 where 子句,判断某个 <PipeId> 使用的 Pipe Connector 被复用的情况。</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SHOW</span> PIPES |
| <span class="token keyword">WHERE</span> CONNECTOR USED <span class="token keyword">BY</span> <span class="token operator"><</span>PipeId<span class="token operator">></span> |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div></div></div><h3 id="流处理任务运行状态迁移" tabindex="-1"><a class="header-anchor" href="#流处理任务运行状态迁移"><span>流处理任务运行状态迁移</span></a></h3><p>一个流处理 pipe 在其被管理的生命周期中会经过多种状态:</p><ul><li><strong>STOPPED:</strong> pipe 处于停止运行状态。当管道处于该状态时,有如下几种可能: <ul><li>当一个 pipe 被成功创建之后,其初始状态为暂停状态</li><li>用户手动将一个处于正常运行状态的 pipe 暂停,其状态会被动从 RUNNING 变为 STOPPED</li><li>当一个 pipe 运行过程中出现无法恢复的错误时,其状态会自动从 RUNNING 变为 STOPPED</li></ul></li><li><strong>RUNNING:</strong> pipe 正在正常工作</li><li><strong>DROPPED:</strong> pipe 任务被永久删除</li></ul><p>下图表明了所有状态以及状态的迁移:</p><figure><img src="https://alioss.timecho.com/docs/img/状态迁移图.png" alt="状态迁移图" tabindex="0" loading="lazy"><figcaption>状态迁移图</figcaption></figure><h2 id="权限管理" tabindex="-1"><a class="header-anchor" href="#权限管理"><span>权限管理</span></a></h2><h3 id="流处理任务" tabindex="-1"><a class="header-anchor" href="#流处理任务"><span>流处理任务</span></a></h3><table><thead><tr><th>权限名称</th><th>描述</th></tr></thead><tbody><tr><td>CREATE_PIPE</td><td>注册流处理任务。路径无关。</td></tr><tr><td>START_PIPE</td><td>开启流处理任务。路径无关。</td></tr><tr><td>STOP_PIPE</td><td>停止流处理任务。路径无关。</td></tr><tr><td>DROP_PIPE</td><td>卸载流处理任务。路径无关。</td></tr><tr><td>SHOW_PIPES</td><td>查询流处理任务。路径无关。</td></tr></tbody></table><h3 id="流处理任务插件" tabindex="-1"><a class="header-anchor" href="#流处理任务插件"><span>流处理任务插件</span></a></h3><table><thead><tr><th>权限名称</th><th>描述</th></tr></thead><tbody><tr><td>CREATE_PIPEPLUGIN</td><td>注册流处理任务插件。路径无关。</td></tr><tr><td>DROP_PIPEPLUGIN</td><td>卸载流处理任务插件。路径无关。</td></tr><tr><td>SHOW_PIPEPLUGINS</td><td>查询流处理任务插件。路径无关。</td></tr></tbody></table><h2 id="配置参数" tabindex="-1"><a class="header-anchor" href="#配置参数"><span>配置参数</span></a></h2><p>在 iotdb-common.properties 中:</p><div class="language-Properties line-numbers-mode" data-ext="Properties" data-title="Properties"><pre class="language-Properties"><code>#################### |
| ### Pipe Configuration |
| #################### |
| |
| # Uncomment the following field to configure the pipe lib directory. |
| # For Windows platform |
| # If its prefix is a drive specifier followed by "\\\\", or if its prefix is "\\\\\\\\", then the path is |
| # absolute. Otherwise, it is relative. |
| # pipe_lib_dir=ext\\\\pipe |
| # For Linux platform |
| # If its prefix is "/", then the path is absolute. Otherwise, it is relative. |
| # pipe_lib_dir=ext/pipe |
| |
| # The maximum number of threads that can be used to execute the pipe subtasks in PipeSubtaskExecutor. |
| # The actual value will be min(pipe_subtask_executor_max_thread_num, max(1, CPU core number / 2)). |
| # pipe_subtask_executor_max_thread_num=5 |
| |
| # The connection timeout (in milliseconds) for the thrift client. |
| # pipe_connector_timeout_ms=900000 |
| </code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div>`,39);function L(V,W){const e=l("font"),o=l("ExternalLinkIcon");return i(),r("div",null,[u,k,n("p",null,[s("我们将"),a(e,{color:"RED"},{default:t(()=>[s("一个数据流处理任务称为 Pipe")]),_:1}),s("。一个流处理任务(Pipe)包含三个子任务:")]),v,n("p",null,[s("例如,用户实现了一个全类名为 edu.tsinghua.iotdb.pipe.ExampleProcessor 的数据处理插件,"),m,s(" 打包后的 jar 资源包存放到了 "),n("a",b,[s("https://example.com:8080/iotdb/pipe-plugin.jar"),a(o)]),s(" 上,用户希望在流处理引擎中使用这个插件,"),h,s(" 将插件标记为 example。那么,这个数据处理插件的创建语句如图所示。")]),g,n("blockquote",null,[f,n("ul",null,[n("li",null,[n("p",null,[s("Pattern 需用反引号修饰不合法字符或者是不合法路径节点,例如如果希望筛选 root.`a@b` 或者 root.`123`,应设置 pattern 为 root.`a@b` 或者 root.`123`(具体参考 "),n("a",P,[s("单双引号和反引号的使用时机"),a(o)]),s(")")])]),E])]),w,n("table",null,[y,n("tbody",null,[n("tr",null,[x,T,n("td",null,[a(e,{color:"red"},{default:t(()=>[s("必填")]),_:1})]),I,C,_]),R,n("tr",null,[q,S,O,D,N,n("td",null,[a(e,{color:"red"},{default:t(()=>[s("是")]),_:1})])]),n("tr",null,[B,A,n("td",null,[a(e,{color:"red"},{default:t(()=>[s("必填")]),_:1})]),U,j,n("td",null,[a(e,{color:"red"},{default:t(()=>[s("是")]),_:1})])])])]),F])}const G=c(d,[["render",L],["__file","Streaming.html.vue"]]),H=JSON.parse('{"path":"/zh/UserGuide/V1.2.x/User-Manual/Streaming.html","title":"IoTDB 流处理框架","lang":"zh-CN","frontmatter":{"description":"IoTDB 流处理框架 IoTDB 流处理框架允许用户实现自定义的流处理逻辑,可以实现对存储引擎变更的监听和捕获、实现对变更数据的变形、实现对变形后数据的向外推送等逻辑。 我们将。一个流处理任务(Pipe)包含三个子任务: 抽取(Extract) 处理(Process) 发送(Connect) 流处理框架允许用户使用 Java 语言自定义编写三个子任务...","head":[["link",{"rel":"alternate","hreflang":"en-us","href":"https://iotdb.apache.org/UserGuide/V1.2.x/User-Manual/Streaming.html"}],["meta",{"property":"og:url","content":"https://iotdb.apache.org/zh/UserGuide/V1.2.x/User-Manual/Streaming.html"}],["meta",{"property":"og:site_name","content":"IoTDB Website"}],["meta",{"property":"og:title","content":"IoTDB 流处理框架"}],["meta",{"property":"og:description","content":"IoTDB 流处理框架 IoTDB 流处理框架允许用户实现自定义的流处理逻辑,可以实现对存储引擎变更的监听和捕获、实现对变更数据的变形、实现对变形后数据的向外推送等逻辑。 我们将。一个流处理任务(Pipe)包含三个子任务: 抽取(Extract) 处理(Process) 发送(Connect) 流处理框架允许用户使用 Java 语言自定义编写三个子任务..."}],["meta",{"property":"og:type","content":"article"}],["meta",{"property":"og:image","content":"https://alioss.timecho.com/docs/img/%E5%90%8C%E6%AD%A5%E5%BC%95%E6%93%8E.jpeg"}],["meta",{"property":"og:locale","content":"zh-CN"}],["meta",{"property":"og:locale:alternate","content":"en-US"}],["meta",{"property":"og:updated_time","content":"2024-04-08T07:45:44.000Z"}],["meta",{"property":"article:modified_time","content":"2024-04-08T07:45:44.000Z"}],["script",{"type":"application/ld+json"},"{\\"@context\\":\\"https://schema.org\\",\\"@type\\":\\"Article\\",\\"headline\\":\\"IoTDB 流处理框架\\",\\"image\\":[\\"https://alioss.timecho.com/docs/img/%E5%90%8C%E6%AD%A5%E5%BC%95%E6%93%8E.jpeg\\",\\"https://alioss.timecho.com/docs/img/%E7%8A%B6%E6%80%81%E8%BF%81%E7%A7%BB%E5%9B%BE.png\\"],\\"dateModified\\":\\"2024-04-08T07:45:44.000Z\\",\\"author\\":[]}"]]},"headers":[{"level":2,"title":"自定义流处理插件开发","slug":"自定义流处理插件开发","link":"#自定义流处理插件开发","children":[{"level":3,"title":"编程开发依赖","slug":"编程开发依赖","link":"#编程开发依赖","children":[]},{"level":3,"title":"事件驱动编程模型","slug":"事件驱动编程模型","link":"#事件驱动编程模型","children":[]},{"level":3,"title":"自定义流处理插件编程接口定义","slug":"自定义流处理插件编程接口定义","link":"#自定义流处理插件编程接口定义","children":[]}]},{"level":2,"title":"自定义流处理插件管理","slug":"自定义流处理插件管理","link":"#自定义流处理插件管理","children":[{"level":3,"title":"加载插件语句","slug":"加载插件语句","link":"#加载插件语句","children":[]},{"level":3,"title":"删除插件语句","slug":"删除插件语句","link":"#删除插件语句","children":[]},{"level":3,"title":"查看插件语句","slug":"查看插件语句","link":"#查看插件语句","children":[]}]},{"level":2,"title":"系统预置的流处理插件","slug":"系统预置的流处理插件","link":"#系统预置的流处理插件","children":[{"level":3,"title":"预置 extractor 插件","slug":"预置-extractor-插件","link":"#预置-extractor-插件","children":[]},{"level":3,"title":"预置 processor 插件","slug":"预置-processor-插件","link":"#预置-processor-插件","children":[]},{"level":3,"title":"预置 connector 插件","slug":"预置-connector-插件","link":"#预置-connector-插件","children":[]}]},{"level":2,"title":"流处理任务管理","slug":"流处理任务管理","link":"#流处理任务管理","children":[{"level":3,"title":"创建流处理任务","slug":"创建流处理任务","link":"#创建流处理任务","children":[]},{"level":3,"title":"启动流处理任务","slug":"启动流处理任务","link":"#启动流处理任务","children":[]},{"level":3,"title":"停止流处理任务","slug":"停止流处理任务","link":"#停止流处理任务","children":[]},{"level":3,"title":"删除流处理任务","slug":"删除流处理任务","link":"#删除流处理任务","children":[]},{"level":3,"title":"展示流处理任务","slug":"展示流处理任务","link":"#展示流处理任务","children":[]},{"level":3,"title":"流处理任务运行状态迁移","slug":"流处理任务运行状态迁移","link":"#流处理任务运行状态迁移","children":[]}]},{"level":2,"title":"权限管理","slug":"权限管理","link":"#权限管理","children":[{"level":3,"title":"流处理任务","slug":"流处理任务","link":"#流处理任务","children":[]},{"level":3,"title":"流处理任务插件","slug":"流处理任务插件","link":"#流处理任务插件","children":[]}]},{"level":2,"title":"配置参数","slug":"配置参数","link":"#配置参数","children":[]}],"git":{"createdTime":1689242051000,"updatedTime":1712562344000,"contributors":[{"name":"Caideyipi","email":"87789683+Caideyipi@users.noreply.github.com","commits":2},{"name":"Steve Yurong Su","email":"rong@apache.org","commits":1},{"name":"wanghui42","email":"105700158+wanghui42@users.noreply.github.com","commits":1}]},"readingTime":{"minutes":21.94,"words":6581},"filePathRelative":"zh/UserGuide/V1.2.x/User-Manual/Streaming.md","localizedDate":"2023年7月13日","autoDesc":true}');export{G as comp,H as data}; |