blob: 0640494a0409ebecc537cd0b7ad0a937f755e8a1 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir=ZgotmplZ>
<head>
<link rel="stylesheet" href="/bootstrap/css/bootstrap.min.css">
<script src="/bootstrap/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" type="text/css" href="/font-awesome/css/font-awesome.min.css">
<script src="/js/anchor.min.js"></script>
<script src="/js/flink.js"></script>
<link rel="canonical" href="https://flink.apache.org/2020/09/01/memory-management-improvements-for-flinks-jobmanager-in-apache-flink-1.11/">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="Apache Flink 1.11 comes with significant changes to the memory model of Flink’s JobManager and configuration options for your Flink clusters. These recently-introduced changes make Flink adaptable to all kinds of deployment environments (e.g. Kubernetes, Yarn, Mesos), providing better control over its memory consumption.
The previous blog post focused on the memory model of the TaskManagers and how it was improved in Flink 1.10. This post addresses the same topic but for the JobManager instead.">
<meta name="theme-color" content="#FFFFFF"><meta property="og:title" content="Memory Management improvements for Flink’s JobManager in Apache Flink 1.11" />
<meta property="og:description" content="Apache Flink 1.11 comes with significant changes to the memory model of Flink’s JobManager and configuration options for your Flink clusters. These recently-introduced changes make Flink adaptable to all kinds of deployment environments (e.g. Kubernetes, Yarn, Mesos), providing better control over its memory consumption.
The previous blog post focused on the memory model of the TaskManagers and how it was improved in Flink 1.10. This post addresses the same topic but for the JobManager instead." />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://flink.apache.org/2020/09/01/memory-management-improvements-for-flinks-jobmanager-in-apache-flink-1.11/" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2020-09-01T15:30:00+00:00" />
<meta property="article:modified_time" content="2020-09-01T15:30:00+00:00" />
<title>Memory Management improvements for Flink’s JobManager in Apache Flink 1.11 | Apache Flink</title>
<link rel="manifest" href="/manifest.json">
<link rel="icon" href="/favicon.png" type="image/x-icon">
<link rel="stylesheet" href="/book.min.22eceb4d17baa9cdc0f57345edd6f215a40474022dfee39b63befb5fb3c596b5.css" integrity="sha256-IuzrTRe6qc3A9XNF7dbyFaQEdAIt/uObY777X7PFlrU=">
<script defer src="/en.search.min.2698f0d1b683dae4d6cb071668b310a55ebcf1c48d11410a015a51d90105b53e.js" integrity="sha256-Jpjw0baD2uTWywcWaLMQpV688cSNEUEKAVpR2QEFtT4="></script>
<!--
Made with Book Theme
https://github.com/alex-shpak/hugo-book
-->
<meta name="generator" content="Hugo 0.124.1">
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(["setDomains", ["*.flink.apache.org","*.nightlies.apache.org/flink"]]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '1']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
<body dir=ZgotmplZ>
<header>
<nav class="navbar navbar-expand-xl">
<div class="container-fluid">
<a class="navbar-brand" href="/">
<img src="/img/logo/png/100/flink_squirrel_100_color.png" alt="Apache Flink" height="47" width="47" class="d-inline-block align-text-middle">
<span>Apache Flink</span>
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<i class="fa fa-bars navbar-toggler-icon"></i>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">About</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/what-is-flink/flink-architecture/">Architecture</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-applications/">Applications</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-operations/">Operations</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/use-cases/">Use Cases</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/powered-by/">Powered By</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/roadmap/">Roadmap</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/community/">Community & Project Info</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/security/">Security</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/special-thanks/">Special Thanks</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Getting Started</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/try-flink/local_installation/">With Flink<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/docs/try-flink-kubernetes-operator/quick-start/">With Flink Kubernetes Operator<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable/docs/get-started/introduction/">With Flink CDC<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/docs/try-flink-ml/quick-start/">With Flink ML<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/getting-started/project-setup.html">With Flink Stateful Functions<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/learn-flink/overview/">Training Course<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Documentation</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/">Flink 1.19 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-master/">Flink Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/">Kubernetes Operator 1.8 (latest)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main">Kubernetes Operator Main (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable">CDC 3.0 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-master">CDC Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/">ML 2.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-master">ML Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/">Stateful Functions 3.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-master">Stateful Functions Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">How to Contribute</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/how-to-contribute/overview/">Overview</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-code/">Contribute Code</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/reviewing-prs/">Review Pull Requests</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/code-style-and-quality-preamble/">Code Style and Quality Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-documentation/">Contribute Documentation</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/documentation-style-guide/">Documentation Style Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/improve-website/">Contribute to the Website</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/getting-help/">Getting Help</a>
</li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link" href="/posts/">Flink Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/downloads/">Downloads</a>
</li>
</ul>
<div class="book-search">
<div class="book-search-spinner hidden">
<i class="fa fa-refresh fa-spin"></i>
</div>
<form class="search-bar d-flex" onsubmit="return false;"su>
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/">
<i class="fa fa-search search"></i>
<i class="fa fa-circle-o-notch fa-spin spinner"></i>
</form>
<div class="book-search-spinner hidden"></div>
<ul id="book-search-results"></ul>
</div>
</div>
</div>
</nav>
<div class="navbar-clearfix"></div>
</header>
<main class="flex">
<section class="container book-page">
<article class="markdown">
<h1>
<a href="/2020/09/01/memory-management-improvements-for-flinks-jobmanager-in-apache-flink-1.11/">Memory Management improvements for Flink’s JobManager in Apache Flink 1.11</a>
</h1>
September 1, 2020 -
Andrey Zagrebin
<p><p>Apache Flink 1.11 comes with significant changes to the memory model of Flink’s JobManager and configuration options for your Flink clusters.
These recently-introduced changes make Flink adaptable to all kinds of deployment environments (e.g. Kubernetes, Yarn, Mesos),
providing better control over its memory consumption.</p>
<p>The <a href="https://flink.apache.org/news/2020/04/21/memory-management-improvements-flink-1.10.html">previous blog post</a>
focused on the memory model of the TaskManagers and how it was improved in Flink 1.10. This post addresses the same topic but for the JobManager instead.
Flink 1.11 unifies the memory model of Flink’s processes. The newly-introduced memory model of the JobManager follows a similar approach to that of the TaskManagers;
it is simpler and has fewer components and tuning knobs. This post might consequently seem very similar to our previous story on Flink’s memory
but aims at providing a complete overview of Flink’s JobManager memory model as of Flink 1.11. Read on for a full list of updates and changes below!</p>
<h2 id="introduction-to-flinks-process-memory-model">
Introduction to Flink’s process memory model
<a class="anchor" href="#introduction-to-flinks-process-memory-model">#</a>
</h2>
<p>Having a clear understanding of Apache Flink’s process memory model allows you to manage resources for the various workloads more efficiently.
The following diagram illustrates the main memory components of a Flink process:</p>
<center>
<img src="/img/blog/2020-09-01-flink-1.11-memory-management-improvements/total-process-memory-flink-1.11.png" width="400px" alt="Backpressure sampling:high"/>
<br/>
<i><small>Flink: Total Process Memory</small></i>
</center>
<br/>
<p>The JobManager process is a JVM process. On a high level, its memory consists of the JVM Heap and Off-Heap memory.
These types of memory are consumed by Flink directly or by the JVM for its specific purposes (i.e. metaspace).
There are two major memory consumers within the JobManager process: the framework itself consuming memory for internal data structures, network communication, etc.
and the user code which runs within the JobManager process, e.g. in certain batch sources or during checkpoint completion callbacks.</p>
<div class="alert alert-info" markdown="1">
<span class="label label-info" style="display: inline-block"><span class="glyphicon glyphicon-info-sign" aria-hidden="true"></span> Note</span>
Please note that the user code has direct access to all memory types: JVM Heap, Direct and Native memory. Therefore, Flink cannot really control its allocation and usage.
</div>
<h2 id="how-to-set-up-jobmanager-memory">
How to set up JobManager memory
<a class="anchor" href="#how-to-set-up-jobmanager-memory">#</a>
</h2>
<p>With the release of Flink 1.11 and in order to provide better user experience, the Flink community introduced three alternatives to setting up memory in JobManagers.</p>
<p>The first two — and simplest — alternatives are configuring one of the two following options for total memory available for the JVM process of the JobManager:</p>
<ul>
<li><strong><em>Total Process Memory:</em></strong> total memory consumed by the Flink Java application (including user code) and by the JVM to run the whole process.</li>
<li><strong><em>Total Flink Memory:</em></strong> only the memory consumed by the Flink Java application, including user code but excluding any memory allocated by the JVM to run it.</li>
</ul>
<p>It is advisable to configure the <em>Total Flink Memory</em> for standalone deployments where explicitly declaring how much memory is given to Flink is a common practice,
while the outer JVM overhead is of little interest. For the cases of deploying Flink in containerized environments
(such as <a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/deployment/kubernetes.html">Kubernetes</a>,
<a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/deployment/yarn_setup.html">Yarn</a> or
<a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/deployment/mesos.html">Mesos</a>),
the <em>Total Process Memory</em> option is recommended instead, because it becomes the size for the total memory of the requested container.
Containerized environments usually strictly enforce this memory limit.</p>
<p>If you want more fine-grained control over the size of the <em>JVM Heap</em>, there is also the third alternative of configuring it directly.
This alternative gives a clear separation between the heap memory and any other memory types.</p>
<p>The remaining memory components will be automatically adjusted either based on their default values or additionally-configured parameters.
Apache Flink also checks the overall consistency. You can find more information about the different memory components in the corresponding
<a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_setup_jobmanager.html">documentation</a>.
You can try different configuration options with the <a href="https://docs.google.com/spreadsheets/d/1mJaMkMPfDJJ-w6nMXALYmTc4XxiV30P5U7DzgwLkSoE/edit#gid=605121894">configuration spreadsheet</a>
(you have to make a copy of the spreadsheet to edit it) of <a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-116%3A&#43;Unified&#43;Memory&#43;Configuration&#43;for&#43;Job&#43;Managers">FLIP-116</a>
and check the corresponding results for your individual case.</p>
<p>If you are migrating from a Flink version older than 1.11, we suggest following the steps in the
<a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_migration.html#migrate-job-manager-memory-configuration">migration guide</a> of the Flink documentation.</p>
<p>Additionally, you can configure separately the <a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_setup_jobmanager.html#configure-off-heap-memory">Off-heap memory</a>
(<em>JVM direct and non-direct memory</em>) as well as the <a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_setup_jobmanager.html#detailed-configuration">JVM metaspace &amp; overhead</a>.
The <em>JVM overhead</em> is a <a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_setup.html#capped-fractionated-components">fraction</a> of the <em>Total Process Memory</em>.
The <em>JVM overhead</em> can be configured in a similar way as other fractions described in <a href="https://flink.apache.org/news/2020/04/21/memory-management-improvements-flink-1.10.html#fractions-of-the-total-flink-memory">our previous blog post</a>
about the TaskManager’s memory model.</p>
<h2 id="more-hints-to-control-the-container-memory-limit">
More hints to control the container memory limit
<a class="anchor" href="#more-hints-to-control-the-container-memory-limit">#</a>
</h2>
<p>The heap and direct memory usage are managed by the JVM. There are also many other possible sources of native memory consumption in Apache Flink or its user applications
which are not managed directly by Flink or the JVM. Controlling their limits is often difficult which complicates debugging of potential memory leaks.
If Flink’s process allocates too much memory in an unmanaged way, it can often result in killing its containers for containerized environments.
In this case, understanding which type of memory consumption has exceeded its limit might be difficult to grasp and resolve.
Flink 1.11 introduces some specific tuning options to clearly represent such components for the JobManager’s process.
Although Flink cannot always enforce strict limits and borders among them, the idea here is to explicitly plan the memory usage.
Below we provide some examples of how memory setup can prevent containers from exceeding their memory limit:</p>
<ul>
<li>
<p><strong><a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_setup_jobmanager.html#configure-off-heap-memory">User code or its dependencies consume significant off-heap memory</a>.</strong>
Tuning the <em>Off-heap</em> option can assign additional direct or native memory to the user code or any of its dependencies.
Flink cannot control native allocations but it sets the limit for <em>JVM Direct</em> memory allocations. The Direct memory limit is enforced by the JVM.</p>
</li>
<li>
<p><strong><a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_setup_jobmanager.html#detailed-configuration">JVM metaspace requires additional memory</a>.</strong>
If you encounter <code>OutOfMemoryError: Metaspace</code>, Flink provides an option to increase its default limit and the JVM will ensure that it is not exceeded.
The metaspace size of a Flink JVM process is always explicitly set in contrast to the default JVM settings where it is not limited.</p>
</li>
<li>
<p><strong><a href="//nightlies.apache.org/flink/flink-docs-release-1.11/ops/memory/mem_setup_jobmanager.html#detailed-configuration">JVM requires more internal memory</a>.</strong>
There is no direct control over certain types of JVM process allocations but Flink provides <em>JVM Overhead</em> options.
The <em>JVM Overhead</em> options allow declaring an additional amount of memory, anticipated for those allocations and not covered by other options.</p>
</li>
</ul>
<h2 id="conclusion">
Conclusion
<a class="anchor" href="#conclusion">#</a>
</h2>
<p>The latest Flink release (<a href="https://flink.apache.org/downloads.html#apache-flink-1111">Flink 1.11</a>) introduces some notable changes to the memory configuration of Flink’s JobManager,
making its memory management significantly easier than before. Stay tuned for more additions and features in upcoming releases.
If you have any suggestions or questions for the Flink community, we encourage you to sign up to the Apache Flink <a href="https://flink.apache.org/community.html#mailing-lists">mailing lists</a>
and become part of the discussion.</p>
</p>
</article>
<div class="edit-this-page">
<p>
<a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Translation+Specifications">Want to contribute translation?</a>
</p>
<p>
<a href="//github.com/apache/flink-web/edit/asf-site/docs/content/posts/2020-09-01-flink-1.11-memory-management-improvements.md">
Edit This Page<i class="fa fa-edit fa-fw"></i>
</a>
</p>
</div>
</section>
<aside class="book-toc">
<nav id="TableOfContents"><h3>On This Page <a href="javascript:void(0)" class="toc" onclick="collapseToc()"><i class="fa fa-times" aria-hidden="true"></i></a></h3>
<ul>
<li>
<ul>
<li><a href="#introduction-to-flinks-process-memory-model">Introduction to Flink’s process memory model</a></li>
<li><a href="#how-to-set-up-jobmanager-memory">How to set up JobManager memory</a></li>
<li><a href="#more-hints-to-control-the-container-memory-limit">More hints to control the container memory limit</a></li>
<li><a href="#conclusion">Conclusion</a></li>
</ul>
</li>
</ul>
</nav>
</aside>
<aside class="expand-toc hidden">
<a class="toc" onclick="expandToc()" href="javascript:void(0)">
<i class="fa fa-bars" aria-hidden="true"></i>
</a>
</aside>
</main>
<footer>
<div class="separator"></div>
<div class="panels">
<div class="wrapper">
<div class="panel">
<ul>
<li>
<a href="https://flink-packages.org/">flink-packages.org</a>
</li>
<li>
<a href="https://www.apache.org/">Apache Software Foundation</a>
</li>
<li>
<a href="https://www.apache.org/licenses/">License</a>
</li>
<li>
<a href="/zh/">
<i class="fa fa-globe" aria-hidden="true"></i>&nbsp;中文版
</a>
</li>
</ul>
</div>
<div class="panel">
<ul>
<li>
<a href="/what-is-flink/security">Security</a-->
</li>
<li>
<a href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
</li>
<li>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</li>
</ul>
</div>
<div class="panel icons">
<div>
<a href="/posts">
<div class="icon flink-blog-icon"></div>
<span>Flink blog</span>
</a>
</div>
<div>
<a href="https://github.com/apache/flink">
<div class="icon flink-github-icon"></div>
<span>Github</span>
</a>
</div>
<div>
<a href="https://twitter.com/apacheflink">
<div class="icon flink-twitter-icon"></div>
<span>Twitter</span>
</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="container disclaimer">
<p>The contents of this website are © 2024 Apache Software Foundation under the terms of the Apache License v2. Apache Flink, Flink, and the Flink logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</footer>
</body>
</html>