blob: 3e513bc544c1e567a7501374edf64a41d00ecae8 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Memory Management Improvements with Apache Flink 1.10</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<!-- We need to load Jquery in the header for custom google analytics event tracking-->
<script src="/js/jquery.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Main content. -->
<div class="container">
<div class="row">
<div id="sidebar" class="col-sm-3">
<!-- Top navbar. -->
<nav class="navbar navbar-default">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/">
<img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
</a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-main">
<!-- First menu section explains visitors what Flink is -->
<!-- What is Stream Processing? -->
<!--
<li><a href="/streamprocessing1.html">What is Stream Processing?</a></li>
-->
<!-- What is Flink? -->
<li><a href="/flink-architecture.html">What is Apache Flink?</a></li>
<ul class="nav navbar-nav navbar-subnav">
<li >
<a href="/flink-architecture.html">Architecture</a>
</li>
<li >
<a href="/flink-applications.html">Applications</a>
</li>
<li >
<a href="/flink-operations.html">Operations</a>
</li>
</ul>
<!-- What is Stateful Functions? -->
<li><a href="/stateful-functions.html">What is Stateful Functions?</a></li>
<!-- Use cases -->
<li><a href="/usecases.html">Use Cases</a></li>
<!-- Powered by -->
<li><a href="/poweredby.html">Powered By</a></li>
&nbsp;
<!-- Second menu section aims to support Flink users -->
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Getting Started -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Getting Started<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.11/getting-started/index.html" target="_blank">With Flink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1/getting-started/project-setup.html" target="_blank">With Flink Stateful Functions <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="/training.html">Training Course</a></li>
</ul>
</li>
<!-- Documentation -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.11" target="_blank">Flink 1.11 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-docs-master" target="_blank">Flink Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1" target="_blank">Flink Stateful Functions 2.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-master" target="_blank">Flink Stateful Functions Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
</ul>
</li>
<!-- getting help -->
<li><a href="/gettinghelp.html">Getting Help</a></li>
<!-- Blog -->
<li class="active"><a href="/blog/"><b>Flink Blog</b></a></li>
<!-- Flink-packages -->
<li>
<a href="https://flink-packages.org" target="_blank">flink-packages.org <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Third menu section aim to support community and contributors -->
<!-- Community -->
<li><a href="/community.html">Community &amp; Project Info</a></li>
<!-- Roadmap -->
<li><a href="/roadmap.html">Roadmap</a></li>
<!-- Contribute -->
<li><a href="/contributing/how-to-contribute.html">How to Contribute</a></li>
<!-- GitHub -->
<li>
<a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Language Switcher -->
<li>
<!-- link to the Chinese home page when current is blog page -->
<a href="/zh">中文版</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-bottom">
<hr />
<!-- Twitter -->
<li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<!-- Visualizer -->
<li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<hr />
<li><a href="https://apache.org" target="_blank">Apache Software Foundation <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li>
<style>
.smalllinks:link {
display: inline-block !important; background: none; padding-top: 0px; padding-bottom: 0px; padding-right: 0px; min-width: 75px;
}
</style>
<a class="smalllinks" href="https://www.apache.org/licenses/" target="_blank">License</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/security/" target="_blank">Security</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Donate</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</div>
<div class="col-sm-9">
<div class="row-fluid">
<div class="col-sm-12">
<div class="row">
<h1>Memory Management Improvements with Apache Flink 1.10</h1>
<p><i></i></p>
<article>
<p>21 Apr 2020 Andrey Zagrebin </p>
<p>Apache Flink 1.10 comes with significant changes to the memory model of the Task Managers and configuration options for your Flink applications. These recently-introduced changes make Flink more adaptable to all kinds of deployment environments (e.g. Kubernetes, Yarn, Mesos), providing strict control over its memory consumption. In this post, we describe Flink’s memory model, as it stands in Flink 1.10, how to set up and manage memory consumption of your Flink applications and the recent changes the community implemented in the latest Apache Flink release.</p>
<h2 id="introduction-to-flinks-memory-model">Introduction to Flink’s memory model</h2>
<p>Having a clear understanding of Apache Flink’s memory model allows you to manage resources for the various workloads more efficiently. The following diagram illustrates the main memory components in Flink:</p>
<center>
<img src="/img/blog/2020-04-21-memory-management-improvements-flink-1.10/total-process-memory.svg" width="400px" alt="Flink: Total Process Memory" />
<br />
<i><small>Flink: Total Process Memory</small></i>
</center>
<p><br /></p>
<p>The Task Manager process is a JVM process. On a high level, its memory consists of the <em>JVM Heap</em> and <em>Off-Heap</em> memory. These types of memory are consumed by Flink directly or by JVM for its specific purposes (i.e. metaspace etc.). There are two major memory consumers within Flink: the user code of job operator tasks and the framework itself consuming memory for internal data structures, network buffers, etc.</p>
<p><strong>Please note that</strong> the user code has direct access to all memory types: <em>JVM Heap, Direct</em> and <em>Native memory</em>. Therefore, Flink cannot really control its allocation and usage. There are however two types of Off-Heap memory which are consumed by tasks and controlled explicitly by Flink:</p>
<ul>
<li><em>Managed Memory</em> (Off-Heap)</li>
<li><em>Network Buffers</em></li>
</ul>
<p>The latter is part of the <em>JVM Direct Memory</em>, allocated for user record data exchange between operator tasks.</p>
<h2 id="how-to-set-up-flink-memory">How to set up Flink memory</h2>
<p>With the latest release of Flink 1.10 and in order to provide better user experience, the framework comes with both high-level and fine-grained tuning of memory components. There are essentially three alternatives to setting up memory in Task Managers.</p>
<p>The first two — and simplest — alternatives are configuring one of the two following options for total memory available for the JVM process of the Task Manager:</p>
<ul>
<li><em>Total Process Memory</em>: total memory consumed by the Flink Java application (including user code) and by the JVM to run the whole process.</li>
<li><em>Total Flink Memory</em>: only memory consumed by the Flink Java application, including user code but excluding memory allocated by JVM to run it</li>
</ul>
<p>It is advisable to configure the <em>Total Flink Memory</em> for standalone deployments where explicitly declaring how much memory is given to Flink is a common practice, while the outer <em>JVM overhead</em> is of little interest. For the cases of deploying Flink in containerized environments (such as <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/deployment/kubernetes.html">Kubernetes</a>, <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/deployment/yarn_setup.html">Yarn</a> or <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/deployment/mesos.html">Mesos</a>), the <em>Total Process Memory</em> option is recommended instead, because it becomes the size for the total memory of the requested container. Containerized environments usually strictly enforce this memory limit.</p>
<p>If you want more fine-grained control over the size of <em>JVM Heap</em> and <em>Managed Memory</em> (Off-Heap), there is also a second alternative to configure both <em><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_setup.html#task-operator-heap-memory">Task Heap</a></em> and <em><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_setup.html#managed-memory">Managed Memory</a></em>. This alternative gives a clear separation between the heap memory and any other memory types.</p>
<p>In line with the community’s efforts to <a href="https://flink.apache.org/news/2019/02/13/unified-batch-streaming-blink.html">unify batch and stream processing</a>, this model works universally for both scenarios. It allows sharing the <em>JVM Heap</em> memory between the user code of operator tasks in any workload and the heap state backend in stream processing scenarios. In a similar way, the <em>Managed Memory</em> can be used for batch spilling and for the RocksDB state backend in streaming.</p>
<p>The remaining memory components are automatically adjusted either based on their default values or additionally configured parameters. Flink also checks the overall consistency. You can find more information about the different memory components in the corresponding <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_detail.html">documentation</a>. Additionally, you can try different configuration options with the <a href="https://docs.google.com/spreadsheets/d/1mJaMkMPfDJJ-w6nMXALYmTc4XxiV30P5U7DzgwLkSoE/edit#gid=0">configuration spreadsheet</a> of <a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-49%3A+Unified+Memory+Configuration+for+TaskExecutors">FLIP-49</a> and check the corresponding results for your individual case.</p>
<p>If you are migrating from a Flink version older than 1.10, we suggest following the steps in the <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_migration.html">migration guide</a> of the Flink documentation.</p>
<h2 id="other-components">Other components</h2>
<p>While configuring Flink’s memory, the size of different memory components can either be fixed with the value of the respective option or tuned using multiple options. Below we provide some more insight about the memory setup.</p>
<h3 id="fractions-of-the-total-flink-memory">Fractions of the Total Flink Memory</h3>
<p>This method allows a proportional breakdown of the <em>Total Flink Memory</em> where the <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_setup.html#managed-memory">Managed Memory</a> (if not set explicitly) and <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_detail.html#capped-fractionated-components">Network Buffers</a> can take certain fractions of it. The remaining memory is then assigned to the <a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_setup.html#task-operator-heap-memory">Task Heap</a> (if not set explicitly) and other fixed <em>JVM Heap</em> and <em>Off-Heap components</em>. The following picture represents an example of such a setup:</p>
<center>
<img src="/img/blog/2020-04-21-memory-management-improvements-flink-1.10/flink-memory-setup.svg" width="800px" alt="Flink: Example of Memory Setup" />
<br />
<i><small>Flink: Example of Memory Setup</small></i>
</center>
<p><br /></p>
<p><strong>Please note that</strong></p>
<ul>
<li>Flink will verify that the size of the derived <em>Network Memory</em> is between its minimum and maximum value, otherwise Flink’s startup will fail. The maximum and minimum limits have default values which can be overwritten by the respective configuration options.</li>
<li>In general, the configured fractions are treated by Flink as hints. Under certain scenarios, the derived value might not match the fraction. For example, if the <em>Total Flink Memory</em> and the <em>Task Heap</em> are configured to fixed values, the <em>Managed Memory</em> will get a certain fraction and the <em>Network Memory</em> will get the remaining memory which might not exactly match its fraction.</li>
</ul>
<h3 id="more-hints-to-control-the-container-memory-limit">More hints to control the container memory limit</h3>
<p>The heap and direct memory usage are managed by the JVM. There are also many other possible sources of native memory consumption in Apache Flink or its user applications which are not managed by Flink or the JVM. Controlling their limits is often difficult which complicates debugging of potential memory leaks. If Flink’s process allocates too much memory in an unmanaged way, it can often result in killing Task Manager containers in containerized environments. In this case, it may be hard to understand which type of memory consumption has exceeded its limit. Flink 1.10 introduces some specific tuning options to clearly represent such components. Although Flink cannot always enforce strict limits and borders among them, the idea here is to explicitly plan the memory usage. Below we provide some examples of how memory setup can prevent containers exceeding their memory limit:</p>
<ul>
<li>
<p><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_tuning.html#rocksdb-state-backend">RocksDB state cannot grow too big</a>. The memory consumption of RocksDB state backend is accounted for in the <em>Managed Memory</em>. RocksDB respects its limit by default (only since Flink 1.10). You can increase the <em>Managed Memory</em> size to improve RocksDB’s performance or decrease it to save resources.</p>
</li>
<li>
<p><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_setup.html#configure-off-heap-memory-direct-or-native">User code or its dependencies consume significant off-heap memory</a>. Tuning the <em>Task Off-Heap</em> option can assign additional direct or native memory to the user code or any of its dependencies. Flink cannot control native allocations but it sets the limit for <em>JVM Direct</em> memory allocations. The <em>Direct</em> memory limit is enforced by the JVM.</p>
</li>
<li>
<p><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_detail.html#jvm-parameters">JVM metaspace requires additional memory</a>. If you encounter <code>OutOfMemoryError: Metaspace</code>, Flink provides an option to increase its limit and the JVM will ensure that it is not exceeded.</p>
</li>
<li>
<p><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/ops/memory/mem_detail.html#capped-fractionated-components">JVM requires more internal memory</a>. There is no direct control over certain types of JVM process allocations but Flink provides <em>JVM Overhead</em> options. The options allow declaring an additional amount of memory, anticipated for those allocations and not covered by other options.</p>
</li>
</ul>
<h2 id="conclusion">Conclusion</h2>
<p>The latest Flink release (Flink 1.10) introduces some significant changes to Flink’s memory configuration, making it possible to manage your application memory and debug Flink significantly better than before. Future developments in this area also include adopting a similar memory model for the job manager process in <a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP+116%3A+Unified+Memory+Configuration+for+Job+Managers">FLIP-116</a>, so stay tuned for more additions and features in upcoming releases. If you have any suggestions or questions for the community, we encourage you to sign up to the Apache Flink <a href="https://flink.apache.org/community.html#mailing-lists">mailing lists</a> and become part of the discussion there.</p>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
</div>
</div>
<hr />
<div class="row">
<div class="footer text-center col-sm-12">
<p>Copyright © 2014-2019 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div>
</div><!-- /.container -->
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.matchHeight/0.7.0/jquery.matchHeight-min.js"></script>
<script src="/js/codetabs.js"></script>
<script src="/js/stickysidebar.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>