blob: 31d25c427e277fbe0d8089f17a50bf76661b2130 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Advanced Flink Application Patterns Vol.2: Dynamic Updates of Application Logic</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<!-- We need to load Jquery in the header for custom google analytics event tracking-->
<script src="/js/jquery.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Main content. -->
<div class="container">
<div class="row">
<div id="sidebar" class="col-sm-3">
<!-- Top navbar. -->
<nav class="navbar navbar-default">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/">
<img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
</a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-main">
<!-- First menu section explains visitors what Flink is -->
<!-- What is Stream Processing? -->
<!--
<li><a href="/streamprocessing1.html">What is Stream Processing?</a></li>
-->
<!-- What is Flink? -->
<li><a href="/flink-architecture.html">What is Apache Flink?</a></li>
<!-- What is Stateful Functions? -->
<li><a href="/stateful-functions.html">What is Stateful Functions?</a></li>
<!-- Use cases -->
<li><a href="/usecases.html">Use Cases</a></li>
<!-- Powered by -->
<li><a href="/poweredby.html">Powered By</a></li>
&nbsp;
<!-- Second menu section aims to support Flink users -->
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Getting Started -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Getting Started<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/getting-started/index.html" target="_blank">With Flink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1/getting-started/project-setup.html" target="_blank">With Flink Stateful Functions <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="/training.html">Training Course</a></li>
</ul>
</li>
<!-- Documentation -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10" target="_blank">Flink 1.10 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-docs-master" target="_blank">Flink Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1" target="_blank">Flink Stateful Functions 2.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-master" target="_blank">Flink Stateful Functions Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
</ul>
</li>
<!-- getting help -->
<li><a href="/gettinghelp.html">Getting Help</a></li>
<!-- Blog -->
<li class="active"><a href="/blog/"><b>Flink Blog</b></a></li>
<!-- Flink-packages -->
<li>
<a href="https://flink-packages.org" target="_blank">flink-packages.org <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Third menu section aim to support community and contributors -->
<!-- Community -->
<li><a href="/community.html">Community &amp; Project Info</a></li>
<!-- Roadmap -->
<li><a href="/roadmap.html">Roadmap</a></li>
<!-- Contribute -->
<li><a href="/contributing/how-to-contribute.html">How to Contribute</a></li>
<!-- GitHub -->
<li>
<a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Language Switcher -->
<li>
<!-- link to the Chinese home page when current is blog page -->
<a href="/zh">中文版</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-bottom">
<hr />
<!-- Twitter -->
<li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<!-- Visualizer -->
<li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<hr />
<li><a href="https://apache.org" target="_blank">Apache Software Foundation <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li>
<style>
.smalllinks:link {
display: inline-block !important; background: none; padding-top: 0px; padding-bottom: 0px; padding-right: 0px; min-width: 75px;
}
</style>
<a class="smalllinks" href="https://www.apache.org/licenses/" target="_blank">License</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/security/" target="_blank">Security</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Donate</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</div>
<div class="col-sm-9">
<div class="row-fluid">
<div class="col-sm-12">
<div class="row">
<h1>Advanced Flink Application Patterns Vol.2: Dynamic Updates of Application Logic</h1>
<p><i></i></p>
<article>
<p>24 Mar 2020 Alexander Fedulov (<a href="https://twitter.com/alex_fedulov">@alex_fedulov</a>)</p>
<p>In the <a href="https://flink.apache.org/news/2020/01/15/demo-fraud-detection.html">first article</a> of the series, we gave a high-level description of the objectives and required functionality of a Fraud Detection engine. We also described how to make data partitioning in Apache Flink customizable based on modifiable rules instead of using a hardcoded <code>KeysExtractor</code> implementation.</p>
<p>We intentionally omitted details of how the applied rules are initialized and what possibilities exist for updating them at runtime. In this post, we will address exactly these details. You will learn how the approach to data partitioning described in <a href="https://flink.apache.org/news/2020/01/15/demo-fraud-detection.html">Part 1</a> can be applied in combination with a dynamic configuration. These two patterns, when used together, can eliminate the need to recompile the code and redeploy your Flink job for a wide range of modifications of the business logic.</p>
<h2 id="rules-broadcasting">Rules Broadcasting</h2>
<p>Let’s first have a look at the <a href="https://flink.apache.org/news/2020/01/15/demo-fraud-detection.html#dynamic-data-partitioning">previously-defined</a> data-processing pipeline:</p>
<div class="highlight"><pre><code class="language-java"><span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Alert</span><span class="o">&gt;</span> <span class="n">alerts</span> <span class="o">=</span>
<span class="n">transactions</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="k">new</span> <span class="nf">DynamicKeyFunction</span><span class="o">())</span>
<span class="o">.</span><span class="na">keyBy</span><span class="o">((</span><span class="n">keyed</span><span class="o">)</span> <span class="o">-&gt;</span> <span class="n">keyed</span><span class="o">.</span><span class="na">getKey</span><span class="o">());</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="k">new</span> <span class="nf">DynamicAlertFunction</span><span class="o">())</span></code></pre></div>
<p><code>DynamicKeyFunction</code> provides dynamic data partitioning while <code>DynamicAlertFunction</code> is responsible for executing the main logic of processing transactions and sending alert messages according to defined rules.</p>
<p>Vol.1 of this series simplified the use case and assumed that the applied set of rules is pre-initialized and accessible via the <code>List&lt;Rules&gt;</code> within <code>DynamicKeyFunction</code>.</p>
<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">class</span> <span class="nc">DynamicKeyFunction</span>
<span class="kd">extends</span> <span class="n">ProcessFunction</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">Keyed</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;&gt;</span> <span class="o">{</span>
<span class="cm">/* Simplified */</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">Rule</span><span class="o">&gt;</span> <span class="n">rules</span> <span class="o">=</span> <span class="cm">/* Rules that are initialized somehow.*/</span><span class="o">;</span>
<span class="o">...</span>
<span class="o">}</span></code></pre></div>
<p>Adding rules to this list is obviously possible directly inside the code of the Flink Job at the stage of its initialization (Create a <code>List</code> object; use it’s <code>add</code> method). A major drawback of doing so is that it will require recompilation of the job with each rule modification. In a real Fraud Detection system, rules are expected to change on a frequent basis, making this approach unacceptable from the point of view of business and operational requirements. A different approach is needed.</p>
<p>Next, let’s take a look at a sample rule definition that we introduced in the previous post of the series:</p>
<center>
<img src="/img/blog/patterns-blog-2/rule-dsl.png" width="800px" alt="Figure 1: Rule definition" />
<br />
<i><small>Figure 1: Rule definition</small></i>
</center>
<p><br /></p>
<p>The previous post covered use of <code>groupingKeyNames</code> by <code>DynamicKeyFunction</code> to extract message keys. Parameters from the second part of this rule are used by <code>DynamicAlertFunction</code>: they define the actual logic of the performed operations and their parameters (such as the alert-triggering limit). This means that the same rule must be present in both <code>DynamicKeyFunction</code> and <code>DynamicAlertFunction</code>. To achieve this result, we will use the <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/stream/state/broadcast_state.html">broadcast data distribution mechanism</a> of Apache Flink.</p>
<p>Figure 2 presents the final job graph of the system that we are building:</p>
<center>
<img src="/img/blog/patterns-blog-2/job-graph.png" width="800px" alt="Figure 2: Job Graph of the Fraud Detection Flink Job" />
<br />
<i><small>Figure 2: Job Graph of the Fraud Detection Flink Job</small></i>
</center>
<p><br /></p>
<p>The main blocks of the Transactions processing pipeline are:<br /></p>
<ul>
<li>
<p><strong>Transaction Source</strong> that consumes transaction messages from Kafka partitions in parallel. <br /></p>
</li>
<li>
<p><strong>Dynamic Key Function</strong> that performs data enrichment with a dynamic key. The subsequent <code>keyBy</code> hashes this dynamic key and partitions the data accordingly among all parallel instances of the following operator.</p>
</li>
<li>
<p><strong>Dynamic Alert Function</strong> that accumulates a data window and creates Alerts based on it.</p>
</li>
</ul>
<h2 id="data-exchange-inside-apache-flink">Data Exchange inside Apache Flink</h2>
<p>The job graph above also indicates various data exchange patterns between the operators. In order to understand how the broadcast pattern works, let’s take a short detour and discuss what methods of message propagation exist in Apache Flink’s distributed runtime.</p>
<ul>
<li>The <strong>FORWARD</strong> connection after the Transaction Source means that all data consumed by one of the parallel instances of the Transaction Source operator is transferred to exactly one instance of the subsequent <code>DynamicKeyFunction</code> operator. It also indicates the same level of parallelism of the two connected operators (12 in the above case). This communication pattern is illustrated in Figure 3. Orange circles represent transactions, and dotted rectangles depict parallel instances of the conjoined operators.</li>
</ul>
<center>
<img src="/img/blog/patterns-blog-2/forward.png" width="800px" alt="Figure 3: FORWARD message passing across operator instances" />
<br />
<i><small>Figure 3: FORWARD message passing across operator instances</small></i>
</center>
<p><br /></p>
<ul>
<li>The <strong>HASH</strong> connection between <code>DynamicKeyFunction</code> and <code>DynamicAlertFunction</code> means that for each message a hash code is calculated and messages are evenly distributed among available parallel instances of the next operator. Such a connection needs to be explicitly “requested” from Flink by using <code>keyBy</code>.</li>
</ul>
<center>
<img src="/img/blog/patterns-blog-2/hash.png" width="800px" alt="Figure 4: HASHED message passing across operator instances (via `keyBy`)" />
<br />
<i><small>Figure 4: HASHED message passing across operator instances (via `keyBy`)</small></i>
</center>
<p><br /></p>
<ul>
<li>A <strong>REBALANCE</strong> distribution is either caused by an explicit call to <code>rebalance()</code> or by a change of parallelism (12 -&gt; 1 in the case of the job graph from Figure 2). Calling <code>rebalance()</code> causes data to be repartitioned in a round-robin fashion and can help to mitigate data skew in certain scenarios.</li>
</ul>
<center>
<img src="/img/blog/patterns-blog-2/rebalance.png" width="800px" alt="Figure 5: REBALANCE message passing across operator instances" />
<br />
<i><small>Figure 5: REBALANCE message passing across operator instances</small></i>
</center>
<p><br /></p>
<p>The Fraud Detection job graph in Figure 2 contains an additional data source: <em>Rules Source</em>. It also consumes from Kafka. Rules are “mixed into” the main processing data flow through the <strong>BROADCAST</strong> channel. Unlike other methods of transmitting data between operators, such as <code>forward</code>, <code>hash</code> or <code>rebalance</code> that make each message available for processing in only one of the parallel instances of the receiving operator, <code>broadcast</code> makes each message available at the input of all of the parallel instances of the operator to which the <em>broadcast stream</em> is connected. This makes <code>broadcast</code> applicable to a wide range of tasks that need to affect the processing of all messages, regardless of their key or source partition.</p>
<center>
<img src="/img/blog/patterns-blog-2/broadcast.png" width="800px" alt="Figure 6: BROADCAST message passing across operator instances" />
<br />
<i><small>Figure 6: BROADCAST message passing across operator instances</small></i>
</center>
<p><br /></p>
<div class="alert alert-info">
<p><span class="label label-info" style="display: inline-block"><span class="glyphicon glyphicon-info-sign" aria-hidden="true"></span> Note</span>
There are actually a few more specialized data partitioning schemes in Flink which we did not mention here. If you want to find out more, please refer to Flink’s documentation on <strong><a href="https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/operators/#physical-partitioning">stream partitioning</a></strong>.</p>
</div>
<h2 id="broadcast-state-pattern">Broadcast State Pattern</h2>
<p>In order to make use of the Rules Source, we need to “connect” it to the main data stream:</p>
<div class="highlight"><pre><code class="language-java"><span class="c1">// Streams setup</span>
<span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">&gt;</span> <span class="n">transactions</span> <span class="o">=</span> <span class="o">[...]</span>
<span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Rule</span><span class="o">&gt;</span> <span class="n">rulesUpdateStream</span> <span class="o">=</span> <span class="o">[...]</span>
<span class="n">BroadcastStream</span><span class="o">&lt;</span><span class="n">Rule</span><span class="o">&gt;</span> <span class="n">rulesStream</span> <span class="o">=</span> <span class="n">rulesUpdateStream</span><span class="o">.</span><span class="na">broadcast</span><span class="o">(</span><span class="n">RULES_STATE_DESCRIPTOR</span><span class="o">);</span>
<span class="c1">// Processing pipeline setup</span>
<span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Alert</span><span class="o">&gt;</span> <span class="n">alerts</span> <span class="o">=</span>
<span class="n">transactions</span>
<span class="o">.</span><span class="na">connect</span><span class="o">(</span><span class="n">rulesStream</span><span class="o">)</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="k">new</span> <span class="nf">DynamicKeyFunction</span><span class="o">())</span>
<span class="o">.</span><span class="na">keyBy</span><span class="o">((</span><span class="n">keyed</span><span class="o">)</span> <span class="o">-&gt;</span> <span class="n">keyed</span><span class="o">.</span><span class="na">getKey</span><span class="o">())</span>
<span class="o">.</span><span class="na">connect</span><span class="o">(</span><span class="n">rulesStream</span><span class="o">)</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="k">new</span> <span class="nf">DynamicAlertFunction</span><span class="o">())</span></code></pre></div>
<p>As you can see, the broadcast stream can be created from any regular stream by calling the <code>broadcast</code> method and specifying a state descriptor. Flink assumes that broadcasted data needs to be stored and retrieved while processing events of the main data flow and, therefore, always automatically creates a corresponding <em>broadcast state</em> from this state descriptor. This is different from any other Apache Flink state type in which you need to initialize it in the <code>open()</code> method of the processing function. Also note that broadcast state always has a key-value format (<code>MapState</code>).</p>
<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">static</span> <span class="kd">final</span> <span class="n">MapStateDescriptor</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Rule</span><span class="o">&gt;</span> <span class="n">RULES_STATE_DESCRIPTOR</span> <span class="o">=</span>
<span class="k">new</span> <span class="n">MapStateDescriptor</span><span class="o">&lt;&gt;(</span><span class="s">&quot;rules&quot;</span><span class="o">,</span> <span class="n">Integer</span><span class="o">.</span><span class="na">class</span><span class="o">,</span> <span class="n">Rule</span><span class="o">.</span><span class="na">class</span><span class="o">);</span></code></pre></div>
<p>Connecting to <code>rulesStream</code> causes some changes in the signature of the processing functions. The previous article presented it in a slightly simplified way as a <code>ProcessFunction</code>. However, <code>DynamicKeyFunction</code> is actually a <code>BroadcastProcessFunction</code>.</p>
<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">abstract</span> <span class="kd">class</span> <span class="nc">BroadcastProcessFunction</span><span class="o">&lt;</span><span class="n">IN1</span><span class="o">,</span> <span class="n">IN2</span><span class="o">,</span> <span class="n">OUT</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="kd">public</span> <span class="kd">abstract</span> <span class="kt">void</span> <span class="nf">processElement</span><span class="o">(</span><span class="n">IN1</span> <span class="n">value</span><span class="o">,</span>
<span class="n">ReadOnlyContext</span> <span class="n">ctx</span><span class="o">,</span>
<span class="n">Collector</span><span class="o">&lt;</span><span class="n">OUT</span><span class="o">&gt;</span> <span class="n">out</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span><span class="o">;</span>
<span class="kd">public</span> <span class="kd">abstract</span> <span class="kt">void</span> <span class="nf">processBroadcastElement</span><span class="o">(</span><span class="n">IN2</span> <span class="n">value</span><span class="o">,</span>
<span class="n">Context</span> <span class="n">ctx</span><span class="o">,</span>
<span class="n">Collector</span><span class="o">&lt;</span><span class="n">OUT</span><span class="o">&gt;</span> <span class="n">out</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span><span class="o">;</span>
<span class="o">}</span></code></pre></div>
<p>The difference is the addition of the <code>processBroadcastElement</code> method through which messages of the rules stream will arrive. The following new version of <code>DynamicKeyFunction</code> allows modifying the list of data-distribution keys at runtime through this stream:</p>
<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">class</span> <span class="nc">DynamicKeyFunction</span>
<span class="kd">extends</span> <span class="n">BroadcastProcessFunction</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">Rule</span><span class="o">,</span> <span class="n">Keyed</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;&gt;</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="kt">void</span> <span class="nf">processBroadcastElement</span><span class="o">(</span><span class="n">Rule</span> <span class="n">rule</span><span class="o">,</span>
<span class="n">Context</span> <span class="n">ctx</span><span class="o">,</span>
<span class="n">Collector</span><span class="o">&lt;</span><span class="n">Keyed</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;&gt;</span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span>
<span class="n">BroadcastState</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Rule</span><span class="o">&gt;</span> <span class="n">broadcastState</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="na">getBroadcastState</span><span class="o">(</span><span class="n">RULES_STATE_DESCRIPTOR</span><span class="o">);</span>
<span class="n">broadcastState</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">rule</span><span class="o">.</span><span class="na">getRuleId</span><span class="o">(),</span> <span class="n">rule</span><span class="o">);</span>
<span class="o">}</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="kt">void</span> <span class="nf">processElement</span><span class="o">(</span><span class="n">Transaction</span> <span class="n">event</span><span class="o">,</span>
<span class="n">ReadOnlyContext</span> <span class="n">ctx</span><span class="o">,</span>
<span class="n">Collector</span><span class="o">&lt;</span><span class="n">Keyed</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;&gt;</span> <span class="n">out</span><span class="o">){</span>
<span class="n">ReadOnlyBroadcastState</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Rule</span><span class="o">&gt;</span> <span class="n">rulesState</span> <span class="o">=</span>
<span class="n">ctx</span><span class="o">.</span><span class="na">getBroadcastState</span><span class="o">(</span><span class="n">RULES_STATE_DESCRIPTOR</span><span class="o">);</span>
<span class="k">for</span> <span class="o">(</span><span class="n">Map</span><span class="o">.</span><span class="na">Entry</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Rule</span><span class="o">&gt;</span> <span class="n">entry</span> <span class="o">:</span> <span class="n">rulesState</span><span class="o">.</span><span class="na">immutableEntries</span><span class="o">())</span> <span class="o">{</span>
<span class="kd">final</span> <span class="n">Rule</span> <span class="n">rule</span> <span class="o">=</span> <span class="n">entry</span><span class="o">.</span><span class="na">getValue</span><span class="o">();</span>
<span class="n">out</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span>
<span class="k">new</span> <span class="n">Keyed</span><span class="o">&lt;&gt;(</span>
<span class="n">event</span><span class="o">,</span> <span class="n">KeysExtractor</span><span class="o">.</span><span class="na">getKey</span><span class="o">(</span><span class="n">rule</span><span class="o">.</span><span class="na">getGroupingKeyNames</span><span class="o">(),</span> <span class="n">event</span><span class="o">),</span> <span class="n">rule</span><span class="o">.</span><span class="na">getRuleId</span><span class="o">()));</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="o">}</span></code></pre></div>
<p>In the above code, <code>processElement()</code> receives Transactions, and <code>processBroadcastElement()</code> receives Rule updates. When a new rule is created, it is distributed as depicted in Figure 6 and saved in all parallel instances of the operator using <code>processBroadcastState</code>. We use a Rule’s ID as the key to store and reference individual rules. Instead of iterating over a hardcoded <code>List&lt;Rules&gt;</code>, we iterate over entries in the dynamically-updated broadcast state.</p>
<p><code>DynamicAlertFunction</code> follows the same logic with respect to storing the rules in the broadcast <code>MapState</code>. As described in <a href="https://flink.apache.org/news/2020/01/15/demo-fraud-detection.html">Part 1</a>, each message in the <code>processElement</code> input is intended to be processed by one specific rule and comes “pre-marked” with a corresponding ID by <code>DynamicKeyFunction</code>. All we need to do is retrieve the definition of the corresponding rule from <code>BroadcastState</code> by using the provided ID and process it according to the logic required by that rule. At this stage, we will also add messages to the internal function state in order to perform calculations on the required time window of data. We will consider how this is done in the final blog of the series about Fraud Detection.</p>
<h1 id="summary">Summary</h1>
<p>In this blog post, we continued our investigation of the use case of a Fraud Detection System built with Apache Flink. We looked into different ways in which data can be distributed between parallel operator instances and, most importantly, examined broadcast state. We demonstrated how dynamic partitioning — a pattern described in the <a href="https://flink.apache.org/news/2020/01/15/demo-fraud-detection.html">first part</a> of the series — can be combined and enhanced by the functionality provided by the broadcast state pattern. The ability to send dynamic updates at runtime is a powerful feature of Apache Flink that is applicable in a variety of other use cases, such as controlling state (cleanup/insert/fix), running A/B experiments or executing updates of ML model coefficients.</p>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
</div>
</div>
<hr />
<div class="row">
<div class="footer text-center col-sm-12">
<p>Copyright © 2014-2019 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div>
</div><!-- /.container -->
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.matchHeight/0.7.0/jquery.matchHeight-min.js"></script>
<script src="/js/codetabs.js"></script>
<script src="/js/stickysidebar.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>