blob: dcb51b43a84d60323e22d537776fc6ec15299252 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Advanced Flink Application Patterns Vol.1: Case Study of a Fraud Detection System</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<!-- We need to load Jquery in the header for custom google analytics event tracking-->
<script src="/js/jquery.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Main content. -->
<div class="container">
<div class="row">
<div id="sidebar" class="col-sm-3">
<!-- Top navbar. -->
<nav class="navbar navbar-default">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/">
<img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
</a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-main">
<!-- First menu section explains visitors what Flink is -->
<!-- What is Stream Processing? -->
<!--
<li><a href="/streamprocessing1.html">What is Stream Processing?</a></li>
-->
<!-- What is Flink? -->
<li><a href="/flink-architecture.html">What is Apache Flink?</a></li>
<!-- What is Stateful Functions? -->
<li><a href="/stateful-functions.html">What is Stateful Functions?</a></li>
<!-- Use cases -->
<li><a href="/usecases.html">Use Cases</a></li>
<!-- Powered by -->
<li><a href="/poweredby.html">Powered By</a></li>
&nbsp;
<!-- Second menu section aims to support Flink users -->
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Getting Started -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Getting Started<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.11/getting-started/index.html" target="_blank">With Flink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1/getting-started/project-setup.html" target="_blank">With Flink Stateful Functions <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="/training.html">Training Course</a></li>
</ul>
</li>
<!-- Documentation -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.11" target="_blank">Flink 1.11 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-docs-master" target="_blank">Flink Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1" target="_blank">Flink Stateful Functions 2.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-master" target="_blank">Flink Stateful Functions Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
</ul>
</li>
<!-- getting help -->
<li><a href="/gettinghelp.html">Getting Help</a></li>
<!-- Blog -->
<li class="active"><a href="/blog/"><b>Flink Blog</b></a></li>
<!-- Flink-packages -->
<li>
<a href="https://flink-packages.org" target="_blank">flink-packages.org <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Third menu section aim to support community and contributors -->
<!-- Community -->
<li><a href="/community.html">Community &amp; Project Info</a></li>
<!-- Roadmap -->
<li><a href="/roadmap.html">Roadmap</a></li>
<!-- Contribute -->
<li><a href="/contributing/how-to-contribute.html">How to Contribute</a></li>
<!-- GitHub -->
<li>
<a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Language Switcher -->
<li>
<!-- link to the Chinese home page when current is blog page -->
<a href="/zh">中文版</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-bottom">
<hr />
<!-- Twitter -->
<li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<!-- Visualizer -->
<li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<hr />
<li><a href="https://apache.org" target="_blank">Apache Software Foundation <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li>
<style>
.smalllinks:link {
display: inline-block !important; background: none; padding-top: 0px; padding-bottom: 0px; padding-right: 0px; min-width: 75px;
}
</style>
<a class="smalllinks" href="https://www.apache.org/licenses/" target="_blank">License</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/security/" target="_blank">Security</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Donate</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</div>
<div class="col-sm-9">
<div class="row-fluid">
<div class="col-sm-12">
<div class="row">
<h1>Advanced Flink Application Patterns Vol.1: Case Study of a Fraud Detection System</h1>
<p><i></i></p>
<article>
<p>15 Jan 2020 Alexander Fedulov (<a href="https://twitter.com/alex_fedulov">@alex_fedulov</a>)</p>
<p>In this series of blog posts you will learn about three powerful Flink patterns for building streaming applications:</p>
<ul>
<li><a href="/news/2020/03/24/demo-fraud-detection-2.html">Dynamic updates of application logic</a></li>
<li>Dynamic data partitioning (shuffle), controlled at runtime</li>
<li>Low latency alerting based on custom windowing logic (without using the window API)</li>
</ul>
<p>These patterns expand the possibilities of what is achievable with statically defined data flows and provide the building blocks to fulfill complex business requirements.</p>
<p><strong>Dynamic updates of application logic</strong> allow Flink jobs to change at runtime, without downtime from stopping and resubmitting the code.<br />
<br />
<strong>Dynamic data partitioning</strong> provides the ability to change how events are distributed and grouped by Flink at runtime. Such functionality often becomes a natural requirement when building jobs with dynamically reconfigurable application logic.<br />
<br />
<strong>Custom window management</strong> demonstrates how you can utilize the low level <a href="https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/operators/process_function.html">process function API</a>, when the native <a href="https://ci.apache.org/projects/flink/flink-docs-stable/dev/stream/operators/windows.html">window API</a> is not exactly matching your requirements. Specifically, you will learn how to implement low latency alerting on windows and how to limit state growth with timers.</p>
<p>These patterns build on top of core Flink functionality, however, they might not be immediately apparent from the framework’s documentation as explaining and presenting the motivation behind them is not always trivial without a concrete use case. That is why we will showcase these patterns with a practical example that offers a real-world usage scenario for Apache Flink — a <em>Fraud Detection</em> engine.
We hope that this series will place these powerful approaches into your tool belt and enable you to take on new and exciting tasks.</p>
<p>In the first blog post of the series we will look at the high-level architecture of the demo application, describe its components and their interactions. We will then deep dive into the implementation details of the first pattern in the series - <strong>dynamic data partitioning</strong>.</p>
<p>You will be able to run the full Fraud Detection Demo application locally and look into the details of the implementation by using the accompanying GitHub repository.</p>
<h3 id="fraud-detection-demo">Fraud Detection Demo</h3>
<p>The full source code for our fraud detection demo is open source and available online. To run it locally, check out the following repository and follow the steps in the README:</p>
<p><a href="https://github.com/afedulov/fraud-detection-demo">https://github.com/afedulov/fraud-detection-demo</a></p>
<p>You will see the demo is a self-contained application - it only requires <code>docker</code> and <code>docker-compose</code> to be built from sources and includes the following components:</p>
<ul>
<li>Apache Kafka (message broker) with ZooKeeper</li>
<li>Apache Flink (<a href="https://ci.apache.org/projects/flink/flink-docs-stable/concepts/glossary.html#flink-application-cluster">application cluster</a>)</li>
<li>Fraud Detection Web App</li>
</ul>
<p>The high-level goal of the Fraud Detection engine is to consume a stream of financial transactions and evaluate them against a set of rules. These rules are subject to frequent changes and tweaks. In a real production system, it is important to be able to add and remove them at runtime, without incurring an expensive penalty of stopping and restarting the job.</p>
<p>When you navigate to the demo URL in your browser, you will be presented with the following UI:</p>
<center>
<img src="/img/blog/2019-11-19-demo-fraud-detection/ui.png" width="800px" alt="Figure 1: Demo UI" />
<br />
<i><small>Figure 1: Fraud Detection Demo UI</small></i>
</center>
<p><br /></p>
<p>On the left side, you can see a visual representation of financial transactions flowing through the system after you click the “Start” button. The slider at the top allows you to control the number of generated transactions per second. The middle section is devoted to managing the rules evaluated by Flink. From here, you can create new rules as well as issue control commands, such as clearing Flink’s state.</p>
<p>The demo out-of-the-box comes with a set of predefined sample rules. You can click the <em>Start</em> button and, after some time, will observe alerts displayed in the right section of the UI. These alerts are the result of Flink evaluating the generated transactions stream against the predefined rules.</p>
<p>Our sample fraud detection system consists of three main components:</p>
<ol>
<li>Frontend (React)</li>
<li>Backend (SpringBoot)</li>
<li>Fraud Detection application (Apache Flink)</li>
</ol>
<p>Interactions between the main elements are depicted in <em>Figure 2</em>.</p>
<center>
<img src="/img/blog/2019-11-19-demo-fraud-detection/architecture.png" width="800px" alt="Figure 2: Demo Components" />
<br />
<i><small>Figure 2: Fraud Detection Demo Components</small></i>
</center>
<p><br /></p>
<p>The Backend exposes a REST API to the Frontend for creating/deleting rules as well as issuing control commands for managing the demo execution. It then relays those Frontend actions to Flink by sending them via a “Control” Kafka topic. The Backend additionally includes a <em>Transactions Generator</em> component, which sends an emulated stream of money transfer events to Flink via a separate “Transactions” topic. Alerts generated by Flink are consumed by the Backend from “Alerts” topic and relayed to the UI via WebSockets.</p>
<p>Now that you are familiar with the overall layout and the goal of our Fraud Detection engine, let’s now go into the details of what is required to implement such a system.</p>
<h3 id="dynamic-data-partitioning">Dynamic Data Partitioning</h3>
<p>The first pattern we will look into is Dynamic Data Partitioning.</p>
<p>If you have used Flink’s DataStream API in the past, you are undoubtedly familiar with the <strong>keyBy</strong> method. Keying a stream shuffles all the records such that elements with the same key are assigned to the same partition. This means all records with the same key are processed by the same physical instance of the next operator.</p>
<p>In a typical streaming application, the choice of key is fixed, determined by some static field within the elements. For instance, when building a simple window-based aggregation of a stream of transactions, we might always group by the transactions account id.</p>
<div class="highlight"><pre><code class="language-java"><span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">&gt;</span> <span class="n">input</span> <span class="o">=</span> <span class="c1">// [...]</span>
<span class="n">DataStream</span><span class="o">&lt;...&gt;</span> <span class="n">windowed</span> <span class="o">=</span> <span class="n">input</span>
<span class="o">.</span><span class="na">keyBy</span><span class="o">(</span><span class="nl">Transaction:</span><span class="o">:</span><span class="n">getAccountId</span><span class="o">)</span>
<span class="o">.</span><span class="na">window</span><span class="o">(</span><span class="cm">/*window specification*/</span><span class="o">);</span></code></pre></div>
<p>This approach is the main building block for achieving horizontal scalability in a wide range of use cases. However, in the case of an application striving to provide flexibility in business logic at runtime, this is not enough.
To understand why this is the case, let us start with articulating a realistic sample rule definition for our fraud detection system in the form of a functional requirement:</p>
<p><em>“Whenever the <strong>sum</strong> of the accumulated <strong>payment amount</strong> from the same <strong>payer</strong> to the same <strong>beneficiary</strong> within the <strong>duration of a week</strong> is <strong>greater</strong> than <strong>1 000 000 $</strong> - fire an alert.”</em></p>
<p>In this formulation we can spot a number of parameters that we would like to be able to specify in a newly-submitted rule and possibly even later modify or tweak at runtime:</p>
<ol>
<li>Aggregation field (payment amount)</li>
<li>Grouping fields (payer + beneficiary)</li>
<li>Aggregation function (sum)</li>
<li>Window duration (1 week)</li>
<li>Limit (1 000 000)</li>
<li>Limit operator (greater)</li>
</ol>
<p>Accordingly, we will use the following simple JSON format to define the aforementioned parameters:</p>
<div class="highlight"><pre><code class="language-json"><span class="p">{</span>
<span class="nt">&quot;ruleId&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
<span class="nt">&quot;ruleState&quot;</span><span class="p">:</span> <span class="s2">&quot;ACTIVE&quot;</span><span class="p">,</span>
<span class="nt">&quot;groupingKeyNames&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;payerId&quot;</span><span class="p">,</span> <span class="s2">&quot;beneficiaryId&quot;</span><span class="p">],</span>
<span class="nt">&quot;aggregateFieldName&quot;</span><span class="p">:</span> <span class="s2">&quot;paymentAmount&quot;</span><span class="p">,</span>
<span class="nt">&quot;aggregatorFunctionType&quot;</span><span class="p">:</span> <span class="s2">&quot;SUM&quot;</span><span class="p">,</span>
<span class="nt">&quot;limitOperatorType&quot;</span><span class="p">:</span> <span class="s2">&quot;GREATER&quot;</span><span class="p">,</span>
<span class="nt">&quot;limit&quot;</span><span class="p">:</span> <span class="mi">1000000</span><span class="p">,</span>
<span class="nt">&quot;windowMinutes&quot;</span><span class="p">:</span> <span class="mi">10080</span>
<span class="p">}</span></code></pre></div>
<p>At this point, it is important to understand that <strong><code>groupingKeyNames</code></strong> determine the actual physical grouping of events - all Transactions with the same values of specified parameters (e.g. <em>payer #25 -&gt; beneficiary #12</em>) have to be aggregated in the same physical instance of the evaluating operator. Naturally, the process of distributing data in such a way in Flink’s API is realised by a <code>keyBy()</code> function.</p>
<p>Most examples in Flink’s <code>keyBy()</code><a href="https://ci.apache.org/projects/flink/flink-docs-stable/dev/api_concepts.html#define-keys-using-field-expressions">documentation</a> use a hard-coded <code>KeySelector</code>, which extracts specific fixed events’ fields. However, to support the desired flexibility, we have to extract them in a more dynamic fashion based on the specifications of the rules. For this, we will have to use one additional operator that prepares every event for dispatching to a correct aggregating instance.</p>
<p>On a high level, our main processing pipeline looks like this:</p>
<div class="highlight"><pre><code class="language-java"><span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Alert</span><span class="o">&gt;</span> <span class="n">alerts</span> <span class="o">=</span>
<span class="n">transactions</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="k">new</span> <span class="nf">DynamicKeyFunction</span><span class="o">())</span>
<span class="o">.</span><span class="na">keyBy</span><span class="o">(</span><span class="cm">/* some key selector */</span><span class="o">);</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="cm">/* actual calculations and alerting */</span><span class="o">)</span></code></pre></div>
<p>We have previously established that each rule defines a <strong><code>groupingKeyNames</code></strong> parameter that specifies which combination of fields will be used for the incoming events’ grouping. Each rule might use an arbitrary combination of these fields. At the same time, every incoming event potentially needs to be evaluated against multiple rules. This implies that events might simultaneously need to be present at multiple parallel instances of evaluating operators that correspond to different rules and hence will need to be forked. Ensuring such events dispatching is the purpose of <code>DynamicKeyFunction()</code>.</p>
<center>
<img src="/img/blog/2019-11-19-demo-fraud-detection/shuffle_function_1.png" width="800px" alt="Figure 3: Forking events with Dynamic Key Function" />
<br />
<i><small>Figure 3: Forking events with Dynamic Key Function</small></i>
</center>
<p><br /></p>
<p><code>DynamicKeyFunction</code> iterates over a set of defined rules and prepares every event to be processed by a <code>keyBy()</code> function by extracting the required grouping keys:</p>
<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">class</span> <span class="nc">DynamicKeyFunction</span>
<span class="kd">extends</span> <span class="n">ProcessFunction</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">Keyed</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;&gt;</span> <span class="o">{</span>
<span class="o">...</span>
<span class="cm">/* Simplified */</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">Rule</span><span class="o">&gt;</span> <span class="n">rules</span> <span class="o">=</span> <span class="cm">/* Rules that are initialized somehow.</span>
<span class="cm"> Details will be discussed in a future blog post. */</span><span class="o">;</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="kt">void</span> <span class="nf">processElement</span><span class="o">(</span>
<span class="n">Transaction</span> <span class="n">event</span><span class="o">,</span>
<span class="n">Context</span> <span class="n">ctx</span><span class="o">,</span>
<span class="n">Collector</span><span class="o">&lt;</span><span class="n">Keyed</span><span class="o">&lt;</span><span class="n">Transaction</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;&gt;</span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span>
<span class="k">for</span> <span class="o">(</span><span class="n">Rule</span> <span class="n">rule</span> <span class="o">:</span><span class="n">rules</span><span class="o">)</span> <span class="o">{</span>
<span class="n">out</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span>
<span class="k">new</span> <span class="n">Keyed</span><span class="o">&lt;&gt;(</span>
<span class="n">event</span><span class="o">,</span>
<span class="n">KeysExtractor</span><span class="o">.</span><span class="na">getKey</span><span class="o">(</span><span class="n">rule</span><span class="o">.</span><span class="na">getGroupingKeyNames</span><span class="o">(),</span> <span class="n">event</span><span class="o">),</span>
<span class="n">rule</span><span class="o">.</span><span class="na">getRuleId</span><span class="o">()));</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="o">...</span>
<span class="o">}</span></code></pre></div>
<p><code>KeysExtractor.getKey()</code> uses reflection to extract the required values of <code>groupingKeyNames</code> fields from events and combines them as a single concatenated String key, e.g <code>"{payerId=25;beneficiaryId=12}"</code>. Flink will calculate the hash of this key and assign the processing of this particular combination to a specific server in the cluster. This will allow tracking all transactions between <em>payer #25</em> and <em>beneficiary #12</em> and evaluating defined rules within the desired time window.</p>
<p>Notice that a wrapper class <code>Keyed</code> with the following signature was introduced as the output type of <code>DynamicKeyFunction</code>:</p>
<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">class</span> <span class="nc">Keyed</span><span class="o">&lt;</span><span class="n">IN</span><span class="o">,</span> <span class="n">KEY</span><span class="o">,</span> <span class="n">ID</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="kd">private</span> <span class="n">IN</span> <span class="n">wrapped</span><span class="o">;</span>
<span class="kd">private</span> <span class="n">KEY</span> <span class="n">key</span><span class="o">;</span>
<span class="kd">private</span> <span class="n">ID</span> <span class="n">id</span><span class="o">;</span>
<span class="o">...</span>
<span class="kd">public</span> <span class="n">KEY</span> <span class="nf">getKey</span><span class="o">(){</span>
<span class="k">return</span> <span class="n">key</span><span class="o">;</span>
<span class="o">}</span>
<span class="o">}</span></code></pre></div>
<p>Fields of this POJO carry the following information: <code>wrapped</code> is the original transaction event, <code>key</code> is the result of using <code>KeysExtractor</code> and <code>id</code> is the ID of the Rule that caused the dispatch of the event (according to the rule-specific grouping logic).</p>
<p>Events of this type will be the input to the <code>keyBy()</code> function in the main processing pipeline and allow the use of a simple lambda-expression as a <a href="https://ci.apache.org/projects/flink/flink-docs-stable/dev/api_concepts.html#define-keys-using-key-selector-functions"><code>KeySelector</code></a> for the final step of implementing dynamic data shuffle.</p>
<div class="highlight"><pre><code class="language-java"><span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Alert</span><span class="o">&gt;</span> <span class="n">alerts</span> <span class="o">=</span>
<span class="n">transactions</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="k">new</span> <span class="nf">DynamicKeyFunction</span><span class="o">())</span>
<span class="o">.</span><span class="na">keyBy</span><span class="o">((</span><span class="n">keyed</span><span class="o">)</span> <span class="o">-&gt;</span> <span class="n">keyed</span><span class="o">.</span><span class="na">getKey</span><span class="o">());</span>
<span class="o">.</span><span class="na">process</span><span class="o">(</span><span class="k">new</span> <span class="nf">DynamicAlertFunction</span><span class="o">())</span></code></pre></div>
<p>By applying <code>DynamicKeyFunction</code> we are implicitly copying events for performing parallel per-rule evaluation within a Flink cluster. By doing so, we achieve an important property - horizontal scalability of rules’ processing. Our system will be capable of handling more rules by adding more servers to the cluster, i.e. increasing the parallelism. This property is achieved at the cost of data duplication, which might become an issue depending on the specific set of parameters, such as incoming data rate, available network bandwidth, event payload size etc. In a real-life scenario, additional optimizations can be applied, such as combined evaluation of rules which have the same <code>groupingKeyNames</code>, or a filtering layer, which would strip events of all the fields that are not required for processing of a particular rule.</p>
<h3 id="summary">Summary:</h3>
<p>In this blog post, we have discussed the motivation behind supporting dynamic, runtime changes to a Flink application by looking at a sample use case - a Fraud Detection engine. We have described the overall architecture and interactions between its components as well as provided references for building and running a demo Fraud Detection application in a dockerized setup. We then showed the details of implementing a <strong>dynamic data partitioning pattern</strong> as the first underlying building block to enable flexible runtime configurations.</p>
<p>To remain focused on describing the core mechanics of the pattern, we kept the complexity of the DSL and the underlying rules engine to a minimum. Going forward, it is easy to imagine adding extensions such as allowing more sophisticated rule definitions, including filtering of certain events, logical rules chaining, and other more advanced functionality.</p>
<p>In the second part of this series, we will describe how the rules make their way into the running Fraud Detection engine. Additionally, we will go over the implementation details of the main processing function of the pipeline - <em>DynamicAlertFunction()</em>.</p>
<center>
<img src="/img/blog/2019-11-19-demo-fraud-detection/end-to-end.png" width="800px" alt="Figure 4: End-to-end pipeline" />
<br />
<i><small>Figure 4: End-to-end pipeline</small></i>
</center>
<p><br /></p>
<p>In the <a href="/news/2020/03/24/demo-fraud-detection-2.html">next article</a>, we will see how Flink’s broadcast streams can be utilized to help steer the processing within the Fraud Detection engine at runtime (Dynamic Application Updates pattern).</p>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
</div>
</div>
<hr />
<div class="row">
<div class="footer text-center col-sm-12">
<p>Copyright © 2014-2019 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div>
</div><!-- /.container -->
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.matchHeight/0.7.0/jquery.matchHeight-min.js"></script>
<script src="/js/codetabs.js"></script>
<script src="/js/stickysidebar.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>