blob: 927ba6c6fdcc0d04ca6e9d347b7804c33a4b6081 [file] [log] [blame]
import{_ as c,r as o,o as l,c as i,b as s,d as n,a,w as u,e}from"./app-Bp5kEZWW.js";const d={},r=e('<h1 id="数据处理" tabindex="-1"><a class="header-anchor" href="#数据处理"><span>数据处理</span></a></h1><h2 id="用户定义函数-udf" tabindex="-1"><a class="header-anchor" href="#用户定义函数-udf"><span>用户定义函数 (UDF)</span></a></h2><p>UDF(User Defined Function)即用户自定义函数。IoTDB 提供多种内建函数来满足您的计算需求,同时您还可以通过创建自定义函数来满足更多的计算需求。</p><p>根据此文档,您将会很快学会 UDF 的编写、注册、使用等操作。</p><h3 id="udf-类型" tabindex="-1"><a class="header-anchor" href="#udf-类型"><span>UDF 类型</span></a></h3><p>IoTDB 支持两种类型的 UDF 函数,如下表所示。</p><table><thead><tr><th>UDF 分类</th><th>描述</th></tr></thead><tbody><tr><td>UDTF(User Defined Timeseries Generating Function)</td><td>自定义时间序列生成函数。该类函数允许接收多条时间序列,最终会输出一条时间序列,生成的时间序列可以有任意多数量的数据点。</td></tr><tr><td>UDAF(User Defined Aggregation Function)</td><td>正在开发,敬请期待。</td></tr></tbody></table><h3 id="udf-依赖" tabindex="-1"><a class="header-anchor" href="#udf-依赖"><span>UDF 依赖</span></a></h3>',8),k={href:"http://search.maven.org/",target:"_blank",rel:"noopener noreferrer"},m={href:"http://search.maven.org/",target:"_blank",rel:"noopener noreferrer"},v=e(`<div class="language-xml line-numbers-mode" data-ext="xml" data-title="xml"><pre class="language-xml"><code><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>dependency</span><span class="token punctuation">&gt;</span></span>
<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>groupId</span><span class="token punctuation">&gt;</span></span>org.apache.iotdb<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>groupId</span><span class="token punctuation">&gt;</span></span>
<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>artifactId</span><span class="token punctuation">&gt;</span></span>iotdb-server<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>artifactId</span><span class="token punctuation">&gt;</span></span>
<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>version</span><span class="token punctuation">&gt;</span></span>0.13.0-SNAPSHOT<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>version</span><span class="token punctuation">&gt;</span></span>
<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>scope</span><span class="token punctuation">&gt;</span></span>provided<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>scope</span><span class="token punctuation">&gt;</span></span>
<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>dependency</span><span class="token punctuation">&gt;</span></span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h3 id="udtf-user-defined-timeseries-generating-function" tabindex="-1"><a class="header-anchor" href="#udtf-user-defined-timeseries-generating-function"><span>UDTF(User Defined Timeseries Generating Function)</span></a></h3><p>编写一个 UDTF 需要继承<code>org.apache.iotdb.db.query.udf.api.UDTF</code>类,并至少实现<code>beforeStart</code>方法和一种<code>transform</code>方法。</p><p>下表是所有可供用户实现的接口说明。</p><table><thead><tr><th style="text-align:left;">接口定义</th><th style="text-align:left;">描述</th><th>是否必须</th></tr></thead><tbody><tr><td style="text-align:left;"><code>void validate(UDFParameterValidator validator) throws Exception</code></td><td style="text-align:left;">在初始化方法<code>beforeStart</code>调用前执行,用于检测<code>UDFParameters</code>中用户输入的参数是否合法。</td><td>否</td></tr><tr><td style="text-align:left;"><code>void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception</code></td><td style="text-align:left;">初始化方法,在 UDTF 处理输入数据前,调用用户自定义的初始化行为。用户每执行一次 UDTF 查询,框架就会构造一个新的 UDF 类实例,该方法在每个 UDF 类实例被初始化时调用一次。在每一个 UDF 类实例的生命周期内,该方法只会被调用一次。</td><td>是</td></tr><tr><td style="text-align:left;"><code>void transform(Row row, PointCollector collector) throws Exception</code></td><td style="text-align:left;">这个方法由框架调用。当您在<code>beforeStart</code>中选择以<code>RowByRowAccessStrategy</code>的策略消费原始数据时,这个数据处理方法就会被调用。输入参数以<code>Row</code>的形式传入,输出结果通过<code>PointCollector</code>输出。您需要在该方法内自行调用<code>collector</code>提供的数据收集方法,以决定最终的输出数据。</td><td>与下面的方法二选一</td></tr><tr><td style="text-align:left;"><code>void transform(RowWindow rowWindow, PointCollector collector) throws Exception</code></td><td style="text-align:left;">这个方法由框架调用。当您在<code>beforeStart</code>中选择以<code>SlidingSizeWindowAccessStrategy</code>或者<code>SlidingTimeWindowAccessStrategy</code>的策略消费原始数据时,这个数据处理方法就会被调用。输入参数以<code>RowWindow</code>的形式传入,输出结果通过<code>PointCollector</code>输出。您需要在该方法内自行调用<code>collector</code>提供的数据收集方法,以决定最终的输出数据。</td><td>与上面的方法二选一</td></tr><tr><td style="text-align:left;"><code>void terminate(PointCollector collector) throws Exception</code></td><td style="text-align:left;">这个方法由框架调用。该方法会在所有的<code>transform</code>调用执行完成后,在<code>beforeDestory</code>方法执行前被调用。在一个 UDF 查询过程中,该方法会且只会调用一次。您需要在该方法内自行调用<code>collector</code>提供的数据收集方法,以决定最终的输出数据。</td><td>否</td></tr><tr><td style="text-align:left;"><code>void beforeDestroy() </code></td><td style="text-align:left;">UDTF 的结束方法。此方法由框架调用,并且只会被调用一次,即在处理完最后一条记录之后被调用。</td><td>否</td></tr></tbody></table><p>在一个完整的 UDTF 实例生命周期中,各个方法的调用顺序如下:</p><ol><li><code>void validate(UDFParameterValidator validator) throws Exception</code></li><li><code>void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception</code></li><li><code>void transform(Row row, PointCollector collector) throws Exception</code>或者<code>void transform(RowWindow rowWindow, PointCollector collector) throws Exception</code></li><li><code>void terminate(PointCollector collector) throws Exception</code></li><li><code>void beforeDestroy() </code></li></ol><p>注意,框架每执行一次 UDTF 查询,都会构造一个全新的 UDF 类实例,查询结束时,对应的 UDF 类实例即被销毁,因此不同 UDTF 查询(即使是在同一个 SQL 语句中)UDF 类实例内部的数据都是隔离的。您可以放心地在 UDTF 中维护一些状态数据,无需考虑并发对 UDF 类实例内部状态数据的影响。</p><p>下面将详细介绍各个接口的使用方法。</p><ul><li>void validate(UDFParameterValidator validator) throws Exception</li></ul><p><code>validate</code>方法能够对用户输入的参数进行验证。</p><p>您可以在该方法中限制输入序列的数量和类型,检查用户输入的属性或者进行自定义逻辑的验证。</p><p><code>UDFParameterValidator</code>的使用方法请见 Javadoc。</p><ul><li>void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception</li></ul><p><code>beforeStart</code>方法有两个作用:</p><ol><li>帮助用户解析 SQL 语句中的 UDF 参数</li><li>配置 UDF 运行时必要的信息,即指定 UDF 访问原始数据时采取的策略和输出结果序列的类型</li><li>创建资源,比如建立外部链接,打开文件等。</li></ol><h4 id="udfparameters" tabindex="-1"><a class="header-anchor" href="#udfparameters"><span>UDFParameters</span></a></h4><p><code>UDFParameters</code>的作用是解析 SQL 语句中的 UDF 参数(SQL 中 UDF 函数名称后括号中的部分)。参数包括路径(及其序列类型)参数和字符串 key-value 对形式输入的属性参数。</p><p>例子:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SELECT</span> UDF<span class="token punctuation">(</span>s1<span class="token punctuation">,</span> s2<span class="token punctuation">,</span> <span class="token string">&#39;key1&#39;</span><span class="token operator">=</span><span class="token string">&#39;iotdb&#39;</span><span class="token punctuation">,</span> <span class="token string">&#39;key2&#39;</span><span class="token operator">=</span><span class="token string">&#39;123.45&#39;</span><span class="token punctuation">)</span> <span class="token keyword">FROM</span> root<span class="token punctuation">.</span>sg<span class="token punctuation">.</span>d<span class="token punctuation">;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><p>用法:</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token keyword">void</span> <span class="token function">beforeStart</span><span class="token punctuation">(</span><span class="token class-name">UDFParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">UDTFConfigurations</span> configurations<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span> <span class="token punctuation">{</span>
<span class="token comment">// parameters</span>
<span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token class-name">PartialPath</span> path <span class="token operator">:</span> parameters<span class="token punctuation">.</span><span class="token function">getPaths</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
<span class="token class-name">TSDataType</span> dataType <span class="token operator">=</span> parameters<span class="token punctuation">.</span><span class="token function">getDataType</span><span class="token punctuation">(</span>path<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token comment">// do something</span>
<span class="token punctuation">}</span>
<span class="token class-name">String</span> stringValue <span class="token operator">=</span> parameters<span class="token punctuation">.</span><span class="token function">getString</span><span class="token punctuation">(</span><span class="token string">&quot;key1&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token comment">// iotdb</span>
<span class="token class-name">Float</span> floatValue <span class="token operator">=</span> parameters<span class="token punctuation">.</span><span class="token function">getFloat</span><span class="token punctuation">(</span><span class="token string">&quot;key2&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token comment">// 123.45</span>
<span class="token class-name">Double</span> doubleValue <span class="token operator">=</span> parameters<span class="token punctuation">.</span><span class="token function">getDouble</span><span class="token punctuation">(</span><span class="token string">&quot;key3&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token comment">// null</span>
<span class="token keyword">int</span> intValue <span class="token operator">=</span> parameters<span class="token punctuation">.</span><span class="token function">getIntOrDefault</span><span class="token punctuation">(</span><span class="token string">&quot;key4&quot;</span><span class="token punctuation">,</span> <span class="token number">678</span><span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token comment">// 678</span>
<span class="token comment">// do something</span>
<span class="token comment">// configurations</span>
<span class="token comment">// ...</span>
<span class="token punctuation">}</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h4 id="udtfconfigurations" tabindex="-1"><a class="header-anchor" href="#udtfconfigurations"><span>UDTFConfigurations</span></a></h4><p>您必须使用 <code>UDTFConfigurations</code> 指定 UDF 访问原始数据时采取的策略和输出结果序列的类型。</p><p>用法:</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token keyword">void</span> <span class="token function">beforeStart</span><span class="token punctuation">(</span><span class="token class-name">UDFParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">UDTFConfigurations</span> configurations<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span> <span class="token punctuation">{</span>
<span class="token comment">// parameters</span>
<span class="token comment">// ...</span>
<span class="token comment">// configurations</span>
configurations
<span class="token punctuation">.</span><span class="token function">setAccessStrategy</span><span class="token punctuation">(</span><span class="token keyword">new</span> <span class="token class-name">RowByRowAccessStrategy</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token punctuation">.</span><span class="token function">setOutputDataType</span><span class="token punctuation">(</span><span class="token class-name">TSDataType</span><span class="token punctuation">.</span><span class="token constant">INT32</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p>其中<code>setAccessStrategy</code>方法用于设定 UDF 访问原始数据时采取的策略,<code>setOutputDataType</code>用于设定输出结果序列的类型。</p><ul><li>setAccessStrategy</li></ul><p>注意,您在此处设定的原始数据访问策略决定了框架会调用哪一种<code>transform</code>方法 ,请实现与原始数据访问策略对应的<code>transform</code>方法。当然,您也可以根据<code>UDFParameters</code>解析出来的属性参数,动态决定设定哪一种策略,因此,实现两种<code>transform</code>方法也是被允许的。</p><p>下面是您可以设定的访问原始数据的策略:</p><table><thead><tr><th style="text-align:left;">接口定义</th><th style="text-align:left;">描述</th><th>调用的<code>transform</code>方法</th></tr></thead><tbody><tr><td style="text-align:left;"><code>RowByRowAccessStrategy</code></td><td style="text-align:left;">逐行地处理原始数据。框架会为每一行原始数据输入调用一次<code>transform</code>方法。当 UDF 只有一个输入序列时,一行输入就是该输入序列中的一个数据点。当 UDF 有多个输入序列时,一行输入序列对应的是这些输入序列按时间对齐后的结果(一行数据中,可能存在某一列为<code>null</code>值,但不会全部都是<code>null</code>)。</td><td><code>void transform(Row row, PointCollector collector) throws Exception</code></td></tr><tr><td style="text-align:left;"><code>SlidingTimeWindowAccessStrategy</code></td><td style="text-align:left;">以滑动时间窗口的方式处理原始数据。框架会为每一个原始数据输入窗口调用一次<code>transform</code>方法。一个窗口可能存在多行数据,每一行数据对应的是输入序列按时间对齐后的结果(一行数据中,可能存在某一列为<code>null</code>值,但不会全部都是<code>null</code>)。</td><td><code>void transform(RowWindow rowWindow, PointCollector collector) throws Exception</code></td></tr><tr><td style="text-align:left;"><code>SlidingSizeWindowAccessStrategy</code></td><td style="text-align:left;">以固定行数的方式处理原始数据,即每个数据处理窗口都会包含固定行数的数据(最后一个窗口除外)。框架会为每一个原始数据输入窗口调用一次<code>transform</code>方法。一个窗口可能存在多行数据,每一行数据对应的是输入序列按时间对齐后的结果(一行数据中,可能存在某一列为<code>null</code>值,但不会全部都是<code>null</code>)。</td><td><code>void transform(RowWindow rowWindow, PointCollector collector) throws Exception</code></td></tr></tbody></table><p><code>RowByRowAccessStrategy</code>的构造不需要任何参数。</p><p><code>SlidingTimeWindowAccessStrategy</code>有多种构造方法,您可以向构造方法提供 3 类参数:</p><ol><li>时间轴显示时间窗开始和结束时间</li><li>划分时间轴的时间间隔参数(必须为正数)</li><li>滑动步长(不要求大于等于时间间隔,但是必须为正数)</li></ol><p>时间轴显示时间窗开始和结束时间不是必须要提供的。当您不提供这类参数时,时间轴显示时间窗开始时间会被定义为整个查询结果集中最小的时间戳,时间轴显示时间窗结束时间会被定义为整个查询结果集中最大的时间戳。</p><p>滑动步长参数也不是必须的。当您不提供滑动步长参数时,滑动步长会被设定为划分时间轴的时间间隔。</p><p>3 类参数的关系可见下图。策略的构造方法详见 Javadoc。</p><img style="width:100%;max-width:800px;max-height:600px;margin-left:auto;margin-right:auto;display:block;" src="https://alioss.timecho.com/docs/img/github/99787878-47b51480-2b5b-11eb-8ed3-84088c5c30f7.png"><p>注意,最后的一些时间窗口的实际时间间隔可能小于规定的时间间隔参数。另外,可能存在某些时间窗口内数据行数量为 0 的情况,这种情况框架也会为该窗口调用一次<code>transform</code>方法。</p><p><code>SlidingSizeWindowAccessStrategy</code>有多种构造方法,您可以向构造方法提供 2 个参数:</p><ol><li>窗口大小,即一个数据处理窗口包含的数据行数。注意,最后一些窗口的数据行数可能少于规定的数据行数。</li><li>滑动步长,即下一窗口第一个数据行与当前窗口第一个数据行间的数据行数(不要求大于等于窗口大小,但是必须为正数)</li></ol><p>滑动步长参数不是必须的。当您不提供滑动步长参数时,滑动步长会被设定为窗口大小。</p><p>策略的构造方法详见 Javadoc。</p><ul><li>setOutputDataType</li></ul><p>注意,您在此处设定的输出结果序列的类型,决定了<code>transform</code>方法中<code>PointCollector</code>实际能够接收的数据类型。<code>setOutputDataType</code>中设定的输出类型和<code>PointCollector</code>实际能够接收的数据输出类型关系如下:</p><table><thead><tr><th style="text-align:left;"><code>setOutputDataType</code>中设定的输出类型</th><th style="text-align:left;"><code>PointCollector</code>实际能够接收的输出类型</th></tr></thead><tbody><tr><td style="text-align:left;"><code>INT32</code></td><td style="text-align:left;"><code>int</code></td></tr><tr><td style="text-align:left;"><code>INT64</code></td><td style="text-align:left;"><code>long</code></td></tr><tr><td style="text-align:left;"><code>FLOAT</code></td><td style="text-align:left;"><code>float</code></td></tr><tr><td style="text-align:left;"><code>DOUBLE</code></td><td style="text-align:left;"><code>double</code></td></tr><tr><td style="text-align:left;"><code>BOOLEAN</code></td><td style="text-align:left;"><code>boolean</code></td></tr><tr><td style="text-align:left;"><code>TEXT</code></td><td style="text-align:left;"><code>java.lang.String</code> 和 <code>org.apache.iotdb.tsfile.utils.Binary</code></td></tr></tbody></table><p>UDTF 输出序列的类型是运行时决定的。您可以根据输入序列类型动态决定输出序列类型。</p><p>下面是一个简单的例子:</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token keyword">void</span> <span class="token function">beforeStart</span><span class="token punctuation">(</span><span class="token class-name">UDFParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">UDTFConfigurations</span> configurations<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span> <span class="token punctuation">{</span>
<span class="token comment">// do something</span>
<span class="token comment">// ...</span>
configurations
<span class="token punctuation">.</span><span class="token function">setAccessStrategy</span><span class="token punctuation">(</span><span class="token keyword">new</span> <span class="token class-name">RowByRowAccessStrategy</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token punctuation">.</span><span class="token function">setOutputDataType</span><span class="token punctuation">(</span>parameters<span class="token punctuation">.</span><span class="token function">getDataType</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><ul><li>void transform(Row row, PointCollector collector) throws Exception</li></ul><p>当您在<code>beforeStart</code>方法中指定 UDF 读取原始数据的策略为 <code>RowByRowAccessStrategy</code>,您就需要实现该方法,在该方法中增加对原始数据处理的逻辑。</p><p>该方法每次处理原始数据的一行。原始数据由<code>Row</code>读入,由<code>PointCollector</code>输出。您可以选择在一次<code>transform</code>方法调用中输出任意数量的数据点。需要注意的是,输出数据点的类型必须与您在<code>beforeStart</code>方法中设置的一致,而输出数据点的时间戳必须是严格单调递增的。</p><p>下面是一个实现了<code>void transform(Row row, PointCollector collector) throws Exception</code>方法的完整 UDF 示例。它是一个加法器,接收两列时间序列输入,当这两个数据点都不为<code>null</code>时,输出这两个数据点的代数和。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span></span><span class="token class-name">UDTF</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>access<span class="token punctuation">.</span></span><span class="token class-name">Row</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>collector<span class="token punctuation">.</span></span><span class="token class-name">PointCollector</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>config<span class="token punctuation">.</span></span><span class="token class-name">UDTFConfigurations</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>parameter<span class="token punctuation">.</span></span><span class="token class-name">UDFParameters</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>strategy<span class="token punctuation">.</span></span><span class="token class-name">RowByRowAccessStrategy</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>tsfile<span class="token punctuation">.</span>file<span class="token punctuation">.</span>metadata<span class="token punctuation">.</span>enums<span class="token punctuation">.</span></span><span class="token class-name">TSDataType</span></span><span class="token punctuation">;</span>
<span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">Adder</span> <span class="token keyword">implements</span> <span class="token class-name">UDTF</span> <span class="token punctuation">{</span>
<span class="token annotation punctuation">@Override</span>
<span class="token keyword">public</span> <span class="token keyword">void</span> <span class="token function">beforeStart</span><span class="token punctuation">(</span><span class="token class-name">UDFParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">UDTFConfigurations</span> configurations<span class="token punctuation">)</span> <span class="token punctuation">{</span>
configurations
<span class="token punctuation">.</span><span class="token function">setOutputDataType</span><span class="token punctuation">(</span><span class="token class-name">TSDataType</span><span class="token punctuation">.</span><span class="token constant">INT64</span><span class="token punctuation">)</span>
<span class="token punctuation">.</span><span class="token function">setAccessStrategy</span><span class="token punctuation">(</span><span class="token keyword">new</span> <span class="token class-name">RowByRowAccessStrategy</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token annotation punctuation">@Override</span>
<span class="token keyword">public</span> <span class="token keyword">void</span> <span class="token function">transform</span><span class="token punctuation">(</span><span class="token class-name">Row</span> row<span class="token punctuation">,</span> <span class="token class-name">PointCollector</span> collector<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span> <span class="token punctuation">{</span>
<span class="token keyword">if</span> <span class="token punctuation">(</span>row<span class="token punctuation">.</span><span class="token function">isNull</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">)</span> <span class="token operator">||</span> row<span class="token punctuation">.</span><span class="token function">isNull</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
<span class="token keyword">return</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
collector<span class="token punctuation">.</span><span class="token function">putLong</span><span class="token punctuation">(</span>row<span class="token punctuation">.</span><span class="token function">getTime</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> row<span class="token punctuation">.</span><span class="token function">getLong</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">)</span> <span class="token operator">+</span> row<span class="token punctuation">.</span><span class="token function">getLong</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><ul><li>void transform(RowWindow rowWindow, PointCollector collector) throws Exception</li></ul><p>当您在<code>beforeStart</code>方法中指定 UDF 读取原始数据的策略为 <code>SlidingTimeWindowAccessStrategy</code>或者<code>SlidingSizeWindowAccessStrategy</code>时,您就需要实现该方法,在该方法中增加对原始数据处理的逻辑。</p><p>该方法每次处理固定行数或者固定时间间隔内的一批数据,我们称包含这一批数据的容器为窗口。原始数据由<code>RowWindow</code>读入,由<code>PointCollector</code>输出。<code>RowWindow</code>能够帮助您访问某一批次的<code>Row</code>,它提供了对这一批次的<code>Row</code>进行随机访问和迭代访问的接口。您可以选择在一次<code>transform</code>方法调用中输出任意数量的数据点,需要注意的是,输出数据点的类型必须与您在<code>beforeStart</code>方法中设置的一致,而输出数据点的时间戳必须是严格单调递增的。</p><p>下面是一个实现了<code>void transform(RowWindow rowWindow, PointCollector collector) throws Exception</code>方法的完整 UDF 示例。它是一个计数器,接收任意列数的时间序列输入,作用是统计并输出指定时间范围内每一个时间窗口中的数据行数。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token keyword">import</span> <span class="token import"><span class="token namespace">java<span class="token punctuation">.</span>io<span class="token punctuation">.</span></span><span class="token class-name">IOException</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span></span><span class="token class-name">UDTF</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>access<span class="token punctuation">.</span></span><span class="token class-name">RowWindow</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>collector<span class="token punctuation">.</span></span><span class="token class-name">PointCollector</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>config<span class="token punctuation">.</span></span><span class="token class-name">UDTFConfigurations</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>parameter<span class="token punctuation">.</span></span><span class="token class-name">UDFParameters</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>strategy<span class="token punctuation">.</span></span><span class="token class-name">SlidingTimeWindowAccessStrategy</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>tsfile<span class="token punctuation">.</span>file<span class="token punctuation">.</span>metadata<span class="token punctuation">.</span>enums<span class="token punctuation">.</span></span><span class="token class-name">TSDataType</span></span><span class="token punctuation">;</span>
<span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">Counter</span> <span class="token keyword">implements</span> <span class="token class-name">UDTF</span> <span class="token punctuation">{</span>
<span class="token annotation punctuation">@Override</span>
<span class="token keyword">public</span> <span class="token keyword">void</span> <span class="token function">beforeStart</span><span class="token punctuation">(</span><span class="token class-name">UDFParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">UDTFConfigurations</span> configurations<span class="token punctuation">)</span> <span class="token punctuation">{</span>
configurations
<span class="token punctuation">.</span><span class="token function">setOutputDataType</span><span class="token punctuation">(</span><span class="token class-name">TSDataType</span><span class="token punctuation">.</span><span class="token constant">INT32</span><span class="token punctuation">)</span>
<span class="token punctuation">.</span><span class="token function">setAccessStrategy</span><span class="token punctuation">(</span><span class="token keyword">new</span> <span class="token class-name">SlidingTimeWindowAccessStrategy</span><span class="token punctuation">(</span>
parameters<span class="token punctuation">.</span><span class="token function">getLong</span><span class="token punctuation">(</span><span class="token string">&quot;time_interval&quot;</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
parameters<span class="token punctuation">.</span><span class="token function">getLong</span><span class="token punctuation">(</span><span class="token string">&quot;sliding_step&quot;</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
parameters<span class="token punctuation">.</span><span class="token function">getLong</span><span class="token punctuation">(</span><span class="token string">&quot;display_window_begin&quot;</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
parameters<span class="token punctuation">.</span><span class="token function">getLong</span><span class="token punctuation">(</span><span class="token string">&quot;display_window_end&quot;</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token annotation punctuation">@Override</span>
<span class="token keyword">public</span> <span class="token keyword">void</span> <span class="token function">transform</span><span class="token punctuation">(</span><span class="token class-name">RowWindow</span> rowWindow<span class="token punctuation">,</span> <span class="token class-name">PointCollector</span> collector<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">Exception</span> <span class="token punctuation">{</span>
<span class="token keyword">if</span> <span class="token punctuation">(</span>rowWindow<span class="token punctuation">.</span><span class="token function">windowSize</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">!=</span> <span class="token number">0</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
collector<span class="token punctuation">.</span><span class="token function">putInt</span><span class="token punctuation">(</span>rowWindow<span class="token punctuation">.</span><span class="token function">getRow</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">getTime</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> rowWindow<span class="token punctuation">.</span><span class="token function">windowSize</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><ul><li>void terminate(PointCollector collector) throws Exception</li></ul><p>在一些场景下,UDF 需要遍历完所有的原始数据后才能得到最后的输出结果。<code>terminate</code>接口为这类 UDF 提供了支持。</p><p>该方法会在所有的<code>transform</code>调用执行完成后,在<code>beforeDestory</code>方法执行前被调用。您可以选择使用<code>transform</code>方法进行单纯的数据处理,最后使用<code>terminate</code>将处理结果输出。</p><p>结果需要由<code>PointCollector</code>输出。您可以选择在一次<code>terminate</code>方法调用中输出任意数量的数据点。需要注意的是,输出数据点的类型必须与您在<code>beforeStart</code>方法中设置的一致,而输出数据点的时间戳必须是严格单调递增的。</p><p>下面是一个实现了<code>void terminate(PointCollector collector) throws Exception</code>方法的完整 UDF 示例。它接收一个<code>INT32</code>类型的时间序列输入,作用是输出该序列的最大值点。</p><div class="language-java line-numbers-mode" data-ext="java" data-title="java"><pre class="language-java"><code><span class="token keyword">import</span> <span class="token import"><span class="token namespace">java<span class="token punctuation">.</span>io<span class="token punctuation">.</span></span><span class="token class-name">IOException</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span></span><span class="token class-name">UDTF</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>access<span class="token punctuation">.</span></span><span class="token class-name">Row</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>collector<span class="token punctuation">.</span></span><span class="token class-name">PointCollector</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>config<span class="token punctuation">.</span></span><span class="token class-name">UDTFConfigurations</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>parameter<span class="token punctuation">.</span></span><span class="token class-name">UDFParameters</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>db<span class="token punctuation">.</span>query<span class="token punctuation">.</span>udf<span class="token punctuation">.</span>api<span class="token punctuation">.</span>customizer<span class="token punctuation">.</span>strategy<span class="token punctuation">.</span></span><span class="token class-name">RowByRowAccessStrategy</span></span><span class="token punctuation">;</span>
<span class="token keyword">import</span> <span class="token import"><span class="token namespace">org<span class="token punctuation">.</span>apache<span class="token punctuation">.</span>iotdb<span class="token punctuation">.</span>tsfile<span class="token punctuation">.</span>file<span class="token punctuation">.</span>metadata<span class="token punctuation">.</span>enums<span class="token punctuation">.</span></span><span class="token class-name">TSDataType</span></span><span class="token punctuation">;</span>
<span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">Max</span> <span class="token keyword">implements</span> <span class="token class-name">UDTF</span> <span class="token punctuation">{</span>
<span class="token keyword">private</span> <span class="token class-name">Long</span> time<span class="token punctuation">;</span>
<span class="token keyword">private</span> <span class="token keyword">int</span> value<span class="token punctuation">;</span>
<span class="token annotation punctuation">@Override</span>
<span class="token keyword">public</span> <span class="token keyword">void</span> <span class="token function">beforeStart</span><span class="token punctuation">(</span><span class="token class-name">UDFParameters</span> parameters<span class="token punctuation">,</span> <span class="token class-name">UDTFConfigurations</span> configurations<span class="token punctuation">)</span> <span class="token punctuation">{</span>
configurations
<span class="token punctuation">.</span><span class="token function">setOutputDataType</span><span class="token punctuation">(</span><span class="token class-name">TSDataType</span><span class="token punctuation">.</span><span class="token constant">INT32</span><span class="token punctuation">)</span>
<span class="token punctuation">.</span><span class="token function">setAccessStrategy</span><span class="token punctuation">(</span><span class="token keyword">new</span> <span class="token class-name">RowByRowAccessStrategy</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token annotation punctuation">@Override</span>
<span class="token keyword">public</span> <span class="token keyword">void</span> <span class="token function">transform</span><span class="token punctuation">(</span><span class="token class-name">Row</span> row<span class="token punctuation">,</span> <span class="token class-name">PointCollector</span> collector<span class="token punctuation">)</span> <span class="token punctuation">{</span>
<span class="token keyword">int</span> candidateValue <span class="token operator">=</span> row<span class="token punctuation">.</span><span class="token function">getInt</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token keyword">if</span> <span class="token punctuation">(</span>time <span class="token operator">==</span> <span class="token keyword">null</span> <span class="token operator">||</span> value <span class="token operator">&lt;</span> candidateValue<span class="token punctuation">)</span> <span class="token punctuation">{</span>
time <span class="token operator">=</span> row<span class="token punctuation">.</span><span class="token function">getTime</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
value <span class="token operator">=</span> candidateValue<span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token punctuation">}</span>
<span class="token annotation punctuation">@Override</span>
<span class="token keyword">public</span> <span class="token keyword">void</span> <span class="token function">terminate</span><span class="token punctuation">(</span><span class="token class-name">PointCollector</span> collector<span class="token punctuation">)</span> <span class="token keyword">throws</span> <span class="token class-name">IOException</span> <span class="token punctuation">{</span>
<span class="token keyword">if</span> <span class="token punctuation">(</span>time <span class="token operator">!=</span> <span class="token keyword">null</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
collector<span class="token punctuation">.</span><span class="token function">putInt</span><span class="token punctuation">(</span>time<span class="token punctuation">,</span> value<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><ul><li>void beforeDestroy()</li></ul><p>UDTF 的结束方法,您可以在此方法中进行一些资源释放等的操作。</p><p>此方法由框架调用。对于一个 UDF 类实例而言,生命周期中会且只会被调用一次,即在处理完最后一条记录之后被调用。</p><h3 id="完整-maven-项目示例" tabindex="-1"><a class="header-anchor" href="#完整-maven-项目示例"><span>完整 Maven 项目示例</span></a></h3>`,69),g={href:"http://search.maven.org/",target:"_blank",rel:"noopener noreferrer"},b={href:"https://github.com/apache/iotdb/tree/master/example/udf",target:"_blank",rel:"noopener noreferrer"},f=e(`<h4 id="udf-注册" tabindex="-1"><a class="header-anchor" href="#udf-注册"><span>UDF 注册</span></a></h4><p>注册一个 UDF 可以按如下流程进行:</p><ol><li><p>实现一个完整的 UDF 类,假定这个类的全类名为<code>org.apache.iotdb.udf.UDTFExample</code></p></li><li><p>将项目打成 JAR 包,如果您使用 Maven 管理项目,可以参考上述 Maven 项目示例的写法</p></li><li><p>将 JAR 包放置到目录 <code>iotdb-server-0.13.0-SNAPSHOT-all-bin/ext/udf</code> (也可以是<code>iotdb-server-0.13.0-SNAPSHOT-all-bin/ext/udf</code>的子目录)下。<br><strong>注意,在部署集群的时候,需要保证每一个节点的 UDF JAR 包路径下都存在相应的 JAR 包。</strong></p><blockquote><p>您可以通过修改配置文件中的<code>udf_root_dir</code>来指定 UDF 加载 Jar 的根路径。</p></blockquote></li><li><p>使用 SQL 语句注册该 UDF,假定赋予该 UDF 的名字为<code>example</code></p></li></ol><p>注册 UDF 的 SQL 语法如下:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">CREATE</span> <span class="token keyword">FUNCTION</span> <span class="token operator">&lt;</span>UDF<span class="token operator">-</span>NAME<span class="token operator">&gt;</span> <span class="token keyword">AS</span> <span class="token operator">&lt;</span>UDF<span class="token operator">-</span>CLASS<span class="token operator">-</span><span class="token keyword">FULL</span><span class="token operator">-</span>PATHNAME<span class="token operator">&gt;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><p>例子中注册 UDF 的 SQL 语句如下:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">CREATE</span> <span class="token keyword">FUNCTION</span> example <span class="token keyword">AS</span> <span class="token string">&#39;org.apache.iotdb.udf.UDTFExample&#39;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><p>由于 IoTDB 的 UDF 是通过反射技术动态装载的,因此您在装载过程中无需启停服务器。</p><p>注意:UDF 函数名称是大小写不敏感的。</p><p>注意:请不要给 UDF 函数注册一个内置函数的名字。使用内置函数的名字给 UDF 注册会失败。</p><p>注意:不同的 JAR 包中最好不要有全类名相同但实现功能逻辑不一样的类。例如 UDF(UDAF/UDTF):<code>udf1</code>、<code>udf2</code>分别对应资源<code>udf1.jar</code>、<code>udf2.jar</code>。如果两个 JAR 包里都包含一个<code>org.apache.iotdb.udf.UDTFExample</code>类,当同一个 SQL 中同时使用到这两个 UDF 时,系统会随机加载其中一个类,导致 UDF 执行行为不一致。</p><h4 id="udf-卸载" tabindex="-1"><a class="header-anchor" href="#udf-卸载"><span>UDF 卸载</span></a></h4><p>卸载 UDF 的 SQL 语法如下:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">DROP</span> <span class="token keyword">FUNCTION</span> <span class="token operator">&lt;</span>UDF<span class="token operator">-</span>NAME<span class="token operator">&gt;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><p>可以通过如下 SQL 语句卸载上面例子中的 UDF:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">DROP</span> <span class="token keyword">FUNCTION</span> example
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><h4 id="udf-查询" tabindex="-1"><a class="header-anchor" href="#udf-查询"><span>UDF 查询</span></a></h4><p>UDF 的使用方法与普通内建函数的类似。</p><h5 id="支持的基础-sql-语法" tabindex="-1"><a class="header-anchor" href="#支持的基础-sql-语法"><span>支持的基础 SQL 语法</span></a></h5><ul><li><code>SLIMIT</code> / <code>SOFFSET</code></li><li><code>LIMIT</code> / <code>OFFSET</code></li><li><code>NON ALIGN</code></li><li>支持值过滤</li><li>支持时间过滤</li></ul><h5 id="对齐时间序列查询" tabindex="-1"><a class="header-anchor" href="#对齐时间序列查询"><span>对齐时间序列查询</span></a></h5><p>UDF 查询目前不支持对对齐时间序列(Aligned Timeseries)进行查询,当您在<code>SELECT</code>子句中选择的序列中包含对齐时间序列时,会提示错误。</p><h5 id="带-查询" tabindex="-1"><a class="header-anchor" href="#带-查询"><span>带 * 查询</span></a></h5><p>假定现在有时间序列 <code>root.sg.d1.s1</code>和 <code>root.sg.d1.s2</code>。</p><ul><li><strong>执行<code>SELECT example(*) from root.sg.d1</code></strong></li></ul><p>那么结果集中将包括<code>example(root.sg.d1.s1)</code>和<code>example(root.sg.d1.s2)</code>的结果。</p><ul><li><strong>执行<code>SELECT example(s1, *) from root.sg.d1</code></strong></li></ul><p>那么结果集中将包括<code>example(root.sg.d1.s1, root.sg.d1.s1)</code>和<code>example(root.sg.d1.s1, root.sg.d1.s2)</code>的结果。</p><ul><li><strong>执行<code>SELECT example(*, *) from root.sg.d1</code></strong></li></ul><p>那么结果集中将包括<code>example(root.sg.d1.s1, root.sg.d1.s1)</code>,<code>example(root.sg.d1.s2, root.sg.d1.s1)</code>,<code>example(root.sg.d1.s1, root.sg.d1.s2)</code> 和 <code>example(root.sg.d1.s2, root.sg.d1.s2)</code>的结果。</p><h5 id="带自定义输入参数的查询" tabindex="-1"><a class="header-anchor" href="#带自定义输入参数的查询"><span>带自定义输入参数的查询</span></a></h5><p>您可以在进行 UDF 查询的时候,向 UDF 传入任意数量的键值对参数。键值对中的键和值都需要被单引号或者双引号引起来。注意,键值对参数只能在所有时间序列后传入。下面是一组例子:</p><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SELECT</span> example<span class="token punctuation">(</span>s1<span class="token punctuation">,</span> <span class="token string">&#39;key1&#39;</span><span class="token operator">=</span><span class="token string">&#39;value1&#39;</span><span class="token punctuation">,</span> <span class="token string">&#39;key2&#39;</span><span class="token operator">=</span><span class="token string">&#39;value2&#39;</span><span class="token punctuation">)</span><span class="token punctuation">,</span> example<span class="token punctuation">(</span><span class="token operator">*</span><span class="token punctuation">,</span> <span class="token string">&#39;key3&#39;</span><span class="token operator">=</span><span class="token string">&#39;value3&#39;</span><span class="token punctuation">)</span> <span class="token keyword">FROM</span> root<span class="token punctuation">.</span>sg<span class="token punctuation">.</span>d1<span class="token punctuation">;</span>
<span class="token keyword">SELECT</span> example<span class="token punctuation">(</span>s1<span class="token punctuation">,</span> s2<span class="token punctuation">,</span> <span class="token string">&#39;key1&#39;</span><span class="token operator">=</span><span class="token string">&#39;value1&#39;</span><span class="token punctuation">,</span> <span class="token string">&#39;key2&#39;</span><span class="token operator">=</span><span class="token string">&#39;value2&#39;</span><span class="token punctuation">)</span> <span class="token keyword">FROM</span> root<span class="token punctuation">.</span>sg<span class="token punctuation">.</span>d1<span class="token punctuation">;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div></div></div><h5 id="与其他查询的嵌套查询" tabindex="-1"><a class="header-anchor" href="#与其他查询的嵌套查询"><span>与其他查询的嵌套查询</span></a></h5><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SELECT</span> s1<span class="token punctuation">,</span> s2<span class="token punctuation">,</span> example<span class="token punctuation">(</span>s1<span class="token punctuation">,</span> s2<span class="token punctuation">)</span> <span class="token keyword">FROM</span> root<span class="token punctuation">.</span>sg<span class="token punctuation">.</span>d1<span class="token punctuation">;</span>
<span class="token keyword">SELECT</span> <span class="token operator">*</span><span class="token punctuation">,</span> example<span class="token punctuation">(</span><span class="token operator">*</span><span class="token punctuation">)</span> <span class="token keyword">FROM</span> root<span class="token punctuation">.</span>sg<span class="token punctuation">.</span>d1 <span class="token keyword">DISABLE</span> ALIGN<span class="token punctuation">;</span>
<span class="token keyword">SELECT</span> s1 <span class="token operator">*</span> example<span class="token punctuation">(</span><span class="token operator">*</span> <span class="token operator">/</span> s1 <span class="token operator">+</span> s2<span class="token punctuation">)</span> <span class="token keyword">FROM</span> root<span class="token punctuation">.</span>sg<span class="token punctuation">.</span>d1<span class="token punctuation">;</span>
<span class="token keyword">SELECT</span> s1<span class="token punctuation">,</span> s2<span class="token punctuation">,</span> s1 <span class="token operator">+</span> example<span class="token punctuation">(</span>s1<span class="token punctuation">,</span> s2<span class="token punctuation">)</span><span class="token punctuation">,</span> s1 <span class="token operator">-</span> example<span class="token punctuation">(</span>s1 <span class="token operator">+</span> example<span class="token punctuation">(</span>s1<span class="token punctuation">,</span> s2<span class="token punctuation">)</span> <span class="token operator">/</span> s2<span class="token punctuation">)</span> <span class="token keyword">FROM</span> root<span class="token punctuation">.</span>sg<span class="token punctuation">.</span>d1<span class="token punctuation">;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h4 id="查看所有注册的-udf" tabindex="-1"><a class="header-anchor" href="#查看所有注册的-udf"><span>查看所有注册的 UDF</span></a></h4><div class="language-sql line-numbers-mode" data-ext="sql" data-title="sql"><pre class="language-sql"><code><span class="token keyword">SHOW</span> FUNCTIONS
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div></div></div><h4 id="用户权限管理" tabindex="-1"><a class="header-anchor" href="#用户权限管理"><span>用户权限管理</span></a></h4><p>用户在使用 UDF 时会涉及到 3 种权限:</p><ul><li><code>CREATE_FUNCTION</code>:具备该权限的用户才被允许执行 UDF 注册操作</li><li><code>DROP_FUNCTION</code>:具备该权限的用户才被允许执行 UDF 卸载操作</li><li><code>READ_TIMESERIES</code>:具备该权限的用户才被允许使用 UDF 进行查询</li></ul>`,40),h=e('<h4 id="配置项" tabindex="-1"><a class="header-anchor" href="#配置项"><span>配置项</span></a></h4><p>在 SQL 语句中使用自定义函数时,可能提示内存不足。这种情况下,您可以通过更改配置文件<code>iotdb-engine.properties</code>中的<code>udf_initial_byte_array_length_for_memory_control</code>,<code>udf_memory_budget_in_mb</code>和<code>udf_reader_transformer_collector_memory_proportion</code>并重启服务来解决此问题。</p><h3 id="贡献-udf" tabindex="-1"><a class="header-anchor" href="#贡献-udf"><span>贡献 UDF</span></a></h3><p>该部分主要讲述了外部用户如何将自己编写的 UDF 贡献给 IoTDB 社区。</p><h4 id="前提条件" tabindex="-1"><a class="header-anchor" href="#前提条件"><span>前提条件</span></a></h4><ol><li><p>UDF 具有通用性。</p><p>通用性主要指的是:UDF 在某些业务场景下,可以被广泛使用。换言之,就是 UDF 具有复用价值,可被社区内其他用户直接使用。</p><p>如果您不确定自己写的 UDF 是否具有通用性,可以发邮件到 <code>dev@iotdb.apache.org</code> 或直接创建 ISSUE 发起讨论。</p></li><li><p>UDF 已经完成测试,且能够正常运行在用户的生产环境中。</p></li></ol><h4 id="贡献清单" tabindex="-1"><a class="header-anchor" href="#贡献清单"><span>贡献清单</span></a></h4><ol><li>UDF 的源代码</li><li>UDF 的测试用例</li><li>UDF 的使用说明</li></ol><h5 id="源代码" tabindex="-1"><a class="header-anchor" href="#源代码"><span>源代码</span></a></h5><ol><li>在<code>src/main/java/org/apache/iotdb/db/query/udf/builtin</code>或者它的子文件夹中创建 UDF 主类和相关的辅助类。</li><li>在<code>src/main/java/org/apache/iotdb/db/query/udf/builtin/BuiltinFunction.java</code>中注册您编写的 UDF。</li></ol><h5 id="测试用例" tabindex="-1"><a class="header-anchor" href="#测试用例"><span>测试用例</span></a></h5><p>您至少需要为您贡献的 UDF 编写集成测试。</p><p>您可以在<code>server/src/test/java/org/apache/iotdb/db/integration</code>中为您贡献的 UDF 新增一个测试类进行测试。</p><h5 id="使用说明" tabindex="-1"><a class="header-anchor" href="#使用说明"><span>使用说明</span></a></h5><p>使用说明需要包含:UDF 的名称、UDF 的作用、执行函数必须的属性参数、函数的适用的场景以及使用示例等。</p><p>使用说明需包含中英文两个版本。应分别在 <code>docs/zh/UserGuide/Operation Manual/DML Data Manipulation Language.md</code> 和 <code>docs/UserGuide/Operation Manual/DML Data Manipulation Language.md</code> 中新增使用说明。</p><h4 id="提交-pr" tabindex="-1"><a class="header-anchor" href="#提交-pr"><span>提交 PR</span></a></h4>',17),y={href:"https://github.com/apache/iotdb",target:"_blank",rel:"noopener noreferrer"},w={href:"https://iotdb.apache.org/Development/HowToCommit.html",target:"_blank",rel:"noopener noreferrer"},D=s("p",null,"当 PR 评审通过并被合并后,您的 UDF 就已经贡献给 IoTDB 社区了!",-1),F=s("h3",{id:"已知的-udf-库实现",tabindex:"-1"},[s("a",{class:"header-anchor",href:"#已知的-udf-库实现"},[s("span",null,"已知的 UDF 库实现")])],-1),U={href:"https://iotdb.apache.org/zh/UserGuide/Master/UDF-Library/Quick-Start.html",target:"_blank",rel:"noopener noreferrer"},x=e('<h3 id="q-a" tabindex="-1"><a class="header-anchor" href="#q-a"><span>Q&amp;A</span></a></h3><p>Q1: 如何修改已经注册的 UDF?</p><p>A1: 假设 UDF 的名称为<code>example</code>,全类名为<code>org.apache.iotdb.udf.UDTFExample</code>,由<code>example.jar</code>引入</p><ol><li>首先卸载已经注册的<code>example</code>函数,执行<code>DROP FUNCTION example</code></li><li>删除 <code>iotdb-server-0.13.0-SNAPSHOT-all-bin/ext/udf</code> 目录下的<code>example.jar</code></li><li>修改<code>org.apache.iotdb.udf.UDTFExample</code>中的逻辑,重新打包,JAR 包的名字可以仍然为<code>example.jar</code></li><li>将新的 JAR 包上传至 <code>iotdb-server-0.13.0-SNAPSHOT-all-bin/ext/udf</code> 目录下</li><li>装载新的 UDF,执行<code>CREATE FUNCTION example AS &quot;org.apache.iotdb.udf.UDTFExample&quot;</code></li></ol>',4);function T(S,E){const t=o("ExternalLinkIcon"),p=o("RouteLink");return l(),i("div",null,[r,s("p",null,[n("如果您使用 "),s("a",k,[n("Maven"),a(t)]),n(",可以从 "),s("a",m,[n("Maven 库"),a(t)]),n(" 中搜索下面示例中的依赖。请注意选择和目标 IoTDB 服务器版本相同的依赖版本。")]),v,s("p",null,[n("如果您使用 "),s("a",g,[n("Maven"),a(t)]),n(",可以参考我们编写的示例项目** udf-example**。您可以在 "),s("a",b,[n("这里"),a(t)]),n(" 找到它。")]),f,s("p",null,[n("更多用户权限相关的内容,请参考 "),a(p,{to:"/zh/UserGuide/V0.13.x/Administration-Management/Administration.html"},{default:u(()=>[n("权限管理语句")]),_:1}),n("。")]),h,s("p",null,[n("当您准备好源代码、测试用例和使用说明后,就可以将 UDF 贡献到 IoTDB 社区了。在 "),s("a",y,[n("Github"),a(t)]),n(" 上面提交 Pull Request (PR) 即可。具体提交方式见:"),s("a",w,[n("Pull Request Guide"),a(t)]),n("。")]),D,F,s("ul",null,[s("li",null,[s("a",U,[n("IoTDB-Quality"),a(t)]),n(",一个关于数据质量的 UDF 库实现,包括数据画像、数据质量评估与修复等一系列函数。")])]),x])}const q=c(d,[["render",T],["__file","UDF-User-Defined-Function.html.vue"]]),R=JSON.parse('{"path":"/zh/UserGuide/V0.13.x/Process-Data/UDF-User-Defined-Function.html","title":"数据处理","lang":"zh-CN","frontmatter":{"description":"数据处理 用户定义函数 (UDF) UDF(User Defined Function)即用户自定义函数。IoTDB 提供多种内建函数来满足您的计算需求,同时您还可以通过创建自定义函数来满足更多的计算需求。 根据此文档,您将会很快学会 UDF 的编写、注册、使用等操作。 UDF 类型 IoTDB 支持两种类型的 UDF 函数,如下表所示。 UDF 依赖...","head":[["link",{"rel":"alternate","hreflang":"en-us","href":"https://iotdb.apache.org/UserGuide/V0.13.x/Process-Data/UDF-User-Defined-Function.html"}],["meta",{"property":"og:url","content":"https://iotdb.apache.org/zh/UserGuide/V0.13.x/Process-Data/UDF-User-Defined-Function.html"}],["meta",{"property":"og:site_name","content":"IoTDB Website"}],["meta",{"property":"og:title","content":"数据处理"}],["meta",{"property":"og:description","content":"数据处理 用户定义函数 (UDF) UDF(User Defined Function)即用户自定义函数。IoTDB 提供多种内建函数来满足您的计算需求,同时您还可以通过创建自定义函数来满足更多的计算需求。 根据此文档,您将会很快学会 UDF 的编写、注册、使用等操作。 UDF 类型 IoTDB 支持两种类型的 UDF 函数,如下表所示。 UDF 依赖..."}],["meta",{"property":"og:type","content":"article"}],["meta",{"property":"og:locale","content":"zh-CN"}],["meta",{"property":"og:locale:alternate","content":"en-US"}],["meta",{"property":"og:updated_time","content":"2023-07-10T03:11:17.000Z"}],["meta",{"property":"article:modified_time","content":"2023-07-10T03:11:17.000Z"}],["script",{"type":"application/ld+json"},"{\\"@context\\":\\"https://schema.org\\",\\"@type\\":\\"Article\\",\\"headline\\":\\"数据处理\\",\\"image\\":[\\"\\"],\\"dateModified\\":\\"2023-07-10T03:11:17.000Z\\",\\"author\\":[]}"]]},"headers":[{"level":2,"title":"用户定义函数 (UDF)","slug":"用户定义函数-udf","link":"#用户定义函数-udf","children":[{"level":3,"title":"UDF 类型","slug":"udf-类型","link":"#udf-类型","children":[]},{"level":3,"title":"UDF 依赖","slug":"udf-依赖","link":"#udf-依赖","children":[]},{"level":3,"title":"UDTF(User Defined Timeseries Generating Function)","slug":"udtf-user-defined-timeseries-generating-function","link":"#udtf-user-defined-timeseries-generating-function","children":[]},{"level":3,"title":"完整 Maven 项目示例","slug":"完整-maven-项目示例","link":"#完整-maven-项目示例","children":[]},{"level":3,"title":"贡献 UDF","slug":"贡献-udf","link":"#贡献-udf","children":[]},{"level":3,"title":"已知的 UDF 库实现","slug":"已知的-udf-库实现","link":"#已知的-udf-库实现","children":[]},{"level":3,"title":"Q&A","slug":"q-a","link":"#q-a","children":[]}]}],"git":{"createdTime":1688958677000,"updatedTime":1688958677000,"contributors":[{"name":"CritasWang","email":"critas@outlook.com","commits":1}]},"readingTime":{"minutes":17.61,"words":5282},"filePathRelative":"zh/UserGuide/V0.13.x/Process-Data/UDF-User-Defined-Function.md","localizedDate":"2023年7月10日","autoDesc":true}');export{q as comp,R as data};