blob: 6239eec4116f5a037bad700f8109f6674f19679a [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir=ZgotmplZ>
<head>
<link rel="stylesheet" href="/bootstrap/css/bootstrap.min.css">
<script src="/bootstrap/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" type="text/css" href="/font-awesome/css/font-awesome.min.css">
<script src="/js/anchor.min.js"></script>
<script src="/js/flink.js"></script>
<link rel="canonical" href="https://flink.apache.org/2015/09/16/off-heap-memory-in-apache-flink-and-the-curious-jit-compiler/">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="Running data-intensive code in the JVM and making it well-behaved is tricky. Systems that put billions of data objects naively onto the JVM heap face unpredictable OutOfMemoryErrors and Garbage Collection stalls. Of course, you still want to to keep your data in memory as much as possible, for speed and responsiveness of the processing applications. In that context, &ldquo;off-heap&rdquo; has become almost something like a magic word to solve these problems.">
<meta name="theme-color" content="#FFFFFF"><meta property="og:title" content="Off-heap Memory in Apache Flink and the curious JIT compiler" />
<meta property="og:description" content="Running data-intensive code in the JVM and making it well-behaved is tricky. Systems that put billions of data objects naively onto the JVM heap face unpredictable OutOfMemoryErrors and Garbage Collection stalls. Of course, you still want to to keep your data in memory as much as possible, for speed and responsiveness of the processing applications. In that context, &ldquo;off-heap&rdquo; has become almost something like a magic word to solve these problems." />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://flink.apache.org/2015/09/16/off-heap-memory-in-apache-flink-and-the-curious-jit-compiler/" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2015-09-16T08:00:00+00:00" />
<meta property="article:modified_time" content="2015-09-16T08:00:00+00:00" />
<title>Off-heap Memory in Apache Flink and the curious JIT compiler | Apache Flink</title>
<link rel="manifest" href="/manifest.json">
<link rel="icon" href="/favicon.png" type="image/x-icon">
<link rel="stylesheet" href="/book.min.22eceb4d17baa9cdc0f57345edd6f215a40474022dfee39b63befb5fb3c596b5.css" integrity="sha256-IuzrTRe6qc3A9XNF7dbyFaQEdAIt/uObY777X7PFlrU=">
<script defer src="/en.search.min.2698f0d1b683dae4d6cb071668b310a55ebcf1c48d11410a015a51d90105b53e.js" integrity="sha256-Jpjw0baD2uTWywcWaLMQpV688cSNEUEKAVpR2QEFtT4="></script>
<!--
Made with Book Theme
https://github.com/alex-shpak/hugo-book
-->
<meta name="generator" content="Hugo 0.124.1">
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(["setDomains", ["*.flink.apache.org","*.nightlies.apache.org/flink"]]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '1']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
<body dir=ZgotmplZ>
<header>
<nav class="navbar navbar-expand-xl">
<div class="container-fluid">
<a class="navbar-brand" href="/">
<img src="/img/logo/png/100/flink_squirrel_100_color.png" alt="Apache Flink" height="47" width="47" class="d-inline-block align-text-middle">
<span>Apache Flink</span>
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<i class="fa fa-bars navbar-toggler-icon"></i>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">About</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/what-is-flink/flink-architecture/">Architecture</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-applications/">Applications</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-operations/">Operations</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/use-cases/">Use Cases</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/powered-by/">Powered By</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/roadmap/">Roadmap</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/community/">Community & Project Info</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/security/">Security</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/special-thanks/">Special Thanks</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Getting Started</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/try-flink/local_installation/">With Flink<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/docs/try-flink-kubernetes-operator/quick-start/">With Flink Kubernetes Operator<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable/docs/get-started/introduction/">With Flink CDC<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/docs/try-flink-ml/quick-start/">With Flink ML<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/getting-started/project-setup.html">With Flink Stateful Functions<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/learn-flink/overview/">Training Course<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Documentation</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/">Flink 1.19 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-master/">Flink Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/">Kubernetes Operator 1.8 (latest)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main">Kubernetes Operator Main (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable">CDC 3.0 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-master">CDC Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/">ML 2.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-master">ML Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/">Stateful Functions 3.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-master">Stateful Functions Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">How to Contribute</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/how-to-contribute/overview/">Overview</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-code/">Contribute Code</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/reviewing-prs/">Review Pull Requests</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/code-style-and-quality-preamble/">Code Style and Quality Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-documentation/">Contribute Documentation</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/documentation-style-guide/">Documentation Style Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/improve-website/">Contribute to the Website</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/getting-help/">Getting Help</a>
</li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link" href="/posts/">Flink Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/downloads/">Downloads</a>
</li>
</ul>
<div class="book-search">
<div class="book-search-spinner hidden">
<i class="fa fa-refresh fa-spin"></i>
</div>
<form class="search-bar d-flex" onsubmit="return false;"su>
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/">
<i class="fa fa-search search"></i>
<i class="fa fa-circle-o-notch fa-spin spinner"></i>
</form>
<div class="book-search-spinner hidden"></div>
<ul id="book-search-results"></ul>
</div>
</div>
</div>
</nav>
<div class="navbar-clearfix"></div>
</header>
<main class="flex">
<section class="container book-page">
<article class="markdown">
<h1>
<a href="/2015/09/16/off-heap-memory-in-apache-flink-and-the-curious-jit-compiler/">Off-heap Memory in Apache Flink and the curious JIT compiler</a>
</h1>
September 16, 2015 -
<p><p>Running data-intensive code in the JVM and making it well-behaved is tricky. Systems that put billions of data objects naively onto the JVM heap face unpredictable OutOfMemoryErrors and Garbage Collection stalls. Of course, you still want to to keep your data in memory as much as possible, for speed and responsiveness of the processing applications. In that context, &ldquo;off-heap&rdquo; has become almost something like a magic word to solve these problems.</p>
<p>In this blog post, we will look at how Flink exploits off-heap memory. The feature is part of the upcoming release, but you can try it out with the latest nightly builds. We will also give a few interesting insights into the behavior for Java&rsquo;s JIT compiler for highly optimized methods and loops.</p>
<h2 id="recap-memory-management-in-flink">
Recap: Memory Management in Flink
<a class="anchor" href="#recap-memory-management-in-flink">#</a>
</h2>
<p>To understand Flink’s approach to off-heap memory, we need to recap Flink’s approach to custom managed memory. We have written an <a href="/news/2015/05/11/Juggling-with-Bits-and-Bytes.html">earlier blog post about how Flink manages JVM memory itself</a></p>
<p>As a summary, the core part is that Flink implements its algorithms not against Java objects, arrays, or lists, but actually against a data structure similar to <code>java.nio.ByteBuffer</code>. Flink uses its own specialized version, called <a href="https://github.com/apache/flink/blob/release-0.9.1-rc1/flink-core/src/main/java/org/apache/flink/core/memory/MemorySegment.java"><code>MemorySegment</code></a> on which algorithms put and get at specific positions ints, longs, byte arrays, etc, and compare and copy memory. The memory segments are held and distributed by a central component (called <code>MemoryManager</code>) from which algorithms request segments according to their calculated memory budgets.</p>
<p>Don&rsquo;t believe that this can be fast? Have a look at the <a href="/news/2015/05/11/Juggling-with-Bits-and-Bytes.html">benchmarks in the earlier blogpost</a>, which show that it is actually often much faster than working on objects, due to better control over data layout (cache efficiency, data size), and reducing the pressure on Java&rsquo;s Garbage Collector.</p>
<p>This form of memory management has been in Flink for a long time. Anecdotally, the first public demo of Flink&rsquo;s predecessor project <em>Stratosphere</em>, at the VLDB conference in 2010, was running its programs with custom managed memory (although I believe few attendees were aware of that).</p>
<h2 id="why-actually-bother-with-off-heap-memory">
Why actually bother with off-heap memory?
<a class="anchor" href="#why-actually-bother-with-off-heap-memory">#</a>
</h2>
<p>Given that Flink has a sophisticated level of managing on-heap memory, why do we even bother with off-heap memory? It is true that <em>&ldquo;out of memory&rdquo;</em> has been much less of a problem for Flink because of its heap memory management techniques. Nonetheless, there are a few good reasons to offer the possibility to move Flink&rsquo;s managed memory out of the JVM heap:</p>
<ul>
<li>
<p>Very large JVMs (100s of GBytes heap memory) tend to be tricky. It takes long to start them (allocate and initialize heap) and garbage collection stalls can be huge (minutes). While newer incremental garbage collectors (like G1) mitigate this problem to some extend, an even better solution is to just make the heap much smaller and allocate Flink&rsquo;s managed memory chunks outside the heap.</p>
</li>
<li>
<p>I/O and network efficiency: In many cases, we write MemorySegments to disk (spilling) or to the network (data transfer). Off-heap memory can be written/transferred with zero copies, while heap memory always incurs an additional memory copy.</p>
</li>
<li>
<p>Off-heap memory can actually be owned by other processes. That way, cached data survives process crashes (due to user code exceptions) and can be used for recovery. Flink does not exploit that, yet, but it is interesting future work.</p>
</li>
</ul>
<p>The opposite question is also valid. Why should Flink ever not use off-heap memory?</p>
<ul>
<li>
<p>On-heap is easier and interplays better with tools. Some container environments and monitoring tools get confused when the monitored heap size does not remotely reflect the amount of memory used by the process.</p>
</li>
<li>
<p>Short lived memory segments are cheaper on the heap. Flink sometimes needs to allocate some short lived buffers, which works cheaper on the heap than off-heap.</p>
</li>
<li>
<p>Some operations are actually a bit faster on heap memory (or the JIT compiler understands them better).</p>
</li>
</ul>
<h2 id="the-off-heap-memory-implementation">
The off-heap Memory Implementation
<a class="anchor" href="#the-off-heap-memory-implementation">#</a>
</h2>
<p>Given that all memory intensive internal algorithms are already implemented against the <code>MemorySegment</code>, our implementation to switch to off-heap memory is actually trivial. You can compare it to replacing all <code>ByteBuffer.allocate(numBytes)</code> calls with <code>ByteBuffer.allocateDirect(numBytes)</code>. In Flink&rsquo;s case it meant that we made the <code>MemorySegment</code> abstract and added the <code>HeapMemorySegment</code> and <code>OffHeapMemorySegment</code> subclasses. The <code>OffHeapMemorySegment</code> takes the off-heap memory pointer from a <code>java.nio.DirectByteBuffer</code> and implements its specialized access methods using <code>sun.misc.Unsafe</code>. We also made a few adjustments to the startup scripts and the deployment code to make sure that the JVM is permitted enough off-heap memory (direct memory, <em>-XX:MaxDirectMemorySize</em>).</p>
<p>In practice we had to go one step further, to make the implementation perform well. While the <code>ByteBuffer</code> is used in I/O code paths to compose headers and move bulk memory into place, the MemorySegment is part of the innermost loops of many algorithms (sorting, hash tables, &hellip;). That means that the access methods have to be as fast as possible.</p>
<h2 id="understanding-the-jit-and-tuning-the-implementation">
Understanding the JIT and tuning the implementation
<a class="anchor" href="#understanding-the-jit-and-tuning-the-implementation">#</a>
</h2>
<p>The <code>MemorySegment</code> was (before our change) a standalone class, it was <em>final</em> (had no subclasses). Via <em>Class Hierarchy Analysis (CHA)</em>, the JIT compiler was able to determine that all of the accessor method calls go to one specific implementation. That way, all method calls can be perfectly de-virtualized and inlined, which is essential to performance, and the basis for all further optimizations (like vectorization of the calling loop).</p>
<p>With two different memory segments loaded at the same time, the JIT compiler cannot perform the same level of optimization any more, which results in a noticeable difference in performance: A slowdown of about 2.7 x in the following example:</p>
<pre tabindex="0"><code>Writing 100000 x 32768 bytes to 32768 bytes segment:
HeapMemorySegment (standalone) : 1,441 msecs
OffHeapMemorySegment (standalone) : 1,628 msecs
HeapMemorySegment (subclass) : 3,841 msecs
OffHeapMemorySegment (subclass) : 3,847 msecs
</code></pre><p>To get back to the original performance, we explored two approaches:</p>
<h3 id="approach-1-make-sure-that-only-one-memory-segment-implementation-is-ever-loaded">
Approach 1: Make sure that only one memory segment implementation is ever loaded.
<a class="anchor" href="#approach-1-make-sure-that-only-one-memory-segment-implementation-is-ever-loaded">#</a>
</h3>
<p>We re-structured the code a bit to make sure that all places that produce long-lived and short-lived memory segments instantiate the same MemorySegment subclass (Heap- or Off-Heap segment). Using factories rather than directly instantiating the memory segment classes, this was straightforward.</p>
<p>Experiments (see appendix) showed that the JIT compiler properly detects this (via hierarchy analysis) and that it can perform the same level of aggressive optimization as before, when there was only one <code>MemorySegment</code> class.</p>
<h3 id="approach-2-write-one-segment-that-handles-both-heap-and-off-heap-memory">
Approach 2: Write one segment that handles both heap and off-heap memory
<a class="anchor" href="#approach-2-write-one-segment-that-handles-both-heap-and-off-heap-memory">#</a>
</h3>
<p>We created a class <code>HybridMemorySegment</code> which handles transparently both heap- and off-heap memory. It can be initialized either with a byte array (heap memory), or with a pointer to a memory region outside the heap (off-heap memory).</p>
<p>Fortunately, there is a nice trick to do this without introducing code branches and specialized handling of the two different memory types. The trick is based on the way that the <code>sun.misc.Unsafe</code> methods interpret object references. To illustrate this, we take the method that gets a long integer from a memory position:</p>
<pre tabindex="0"><code>sun.misc.Unsafe.getLong(Object reference, long offset)
</code></pre><p>The method accepts an object reference, takes its memory address, and add the offset to obtain a pointer. It then fetches the eight bytes at the address pointed to and interprets them as a long integer. Since the method accepts <em>null</em> as the reference (and interprets it a <em>zero</em>) one can write a method that fetches a long integer seamlessly from heap and off-heap memory as follows:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-java" data-lang="java"><span class="line"><span class="cl"><span class="kd">public</span><span class="w"> </span><span class="kd">class</span> <span class="nc">HybridMemorySegment</span><span class="w"> </span><span class="p">{</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kd">private</span><span class="w"> </span><span class="kd">final</span><span class="w"> </span><span class="kt">byte</span><span class="o">[]</span><span class="w"> </span><span class="n">heapMemory</span><span class="p">;</span><span class="w"> </span><span class="c1">// non-null in heap case, null in off-heap case</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kd">private</span><span class="w"> </span><span class="kd">final</span><span class="w"> </span><span class="kt">long</span><span class="w"> </span><span class="n">address</span><span class="p">;</span><span class="w"> </span><span class="c1">// may be absolute, or relative to byte[]</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// method of interest</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kd">public</span><span class="w"> </span><span class="kt">long</span><span class="w"> </span><span class="nf">getLong</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">pos</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">UNSAFE</span><span class="p">.</span><span class="na">getLong</span><span class="p">(</span><span class="n">heapMemory</span><span class="p">,</span><span class="w"> </span><span class="n">address</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">pos</span><span class="p">);</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">}</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// initialize for heap memory</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kd">public</span><span class="w"> </span><span class="nf">HybridMemorySegment</span><span class="p">(</span><span class="kt">byte</span><span class="o">[]</span><span class="w"> </span><span class="n">heapMemory</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">this</span><span class="p">.</span><span class="na">heapMemory</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">heapMemory</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">this</span><span class="p">.</span><span class="na">address</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">UNSAFE</span><span class="p">.</span><span class="na">arrayBaseOffset</span><span class="p">(</span><span class="kt">byte</span><span class="o">[]</span><span class="p">.</span><span class="na">class</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">}</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="c1">// initialize for off-heap memory</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="kd">public</span><span class="w"> </span><span class="nf">HybridMemorySegment</span><span class="p">(</span><span class="kt">long</span><span class="w"> </span><span class="n">offheapPointer</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">this</span><span class="p">.</span><span class="na">heapMemory</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kc">null</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">this</span><span class="p">.</span><span class="na">address</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">offheapPointer</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">}</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">}</span><span class="w">
</span></span></span></code></pre></div><p>To check whether both cases (heap and off-heap) really result in the same code paths (no hidden branches inside the <code>Unsafe.getLong(Object, long)</code> method) one can check out the C++ source code of <code>sun.misc.Unsafe</code>, available here: <a href="http://hg.openjdk.java.net/jdk8/jdk8/hotspot/file/tip/src/share/vm/prims/unsafe.cpp">http://hg.openjdk.java.net/jdk8/jdk8/hotspot/file/tip/src/share/vm/prims/unsafe.cpp</a></p>
<p>Of particular interest is the macro in line 155, which is the base of all GET methods. Tracing the function calls (many are no-ops), one can see that both variants of Unsafe’s <code>getLong()</code> result in the same code:
Either <code>0 + absolutePointer</code> or <code>objectRefAddress + offset</code>.</p>
<h2 id="summary">
Summary
<a class="anchor" href="#summary">#</a>
</h2>
<p>We ended up choosing a combination of both techniques:</p>
<ul>
<li>
<p>For off-heap memory, we use the <code>HybridMemorySegment</code> from approach (2) which can represent both heap and off-heap memory. That way, the same class represents the long-lived off-heap memory as the short-lived temporary buffers allocated (or wrapped) on the heap.</p>
</li>
<li>
<p>We follow approach (1) to use factories to make sure that one segment is ever only loaded, which gives peak performance. We can exploit the performance benefits of the <code>HeapMemorySegment</code> on individual byte operations, and we have a mechanism in place to add further implementations of <code>MemorySegments</code> for the case that Oracle really removes <code>sun.misc.Unsafe</code> in future Java versions.</p>
</li>
</ul>
<p>The final code can be found in the Flink repository, under <a href="https://github.com/apache/flink/tree/master/flink-core/src/main/java/org/apache/flink/core/memory">https://github.com/apache/flink/tree/master/flink-core/src/main/java/org/apache/flink/core/memory</a></p>
<p>Detailed micro benchmarks are in the appendix. A summary of the findings is as follows:</p>
<ul>
<li>
<p>The <code>HybridMemorySegment</code> performs equally well in heap and off-heap memory, as is to be expected (the code paths are the same)</p>
</li>
<li>
<p>The <code>HeapMemorySegment</code> is quite a bit faster in reading individual bytes, not so much at writing them. Access to a <em>byte[]</em> is after all a bit cheaper than an invocation of a <code>sun.misc.Unsafe</code> method, even when JIT-ed.</p>
</li>
<li>
<p>The abstract class <code>MemorySegment</code> (with its subclasses <code>HeapMemorySegment</code> and <code>HybridMemorySegment</code>) performs as well as any specialized non-abstract class, as long as only one subclass is loaded. When both are loaded, performance may suffer by a factor of 2.7 x on certain operations.</p>
</li>
<li>
<p>How badly the performance degrades in cases where both MemorySegment subclasses are loaded seems to depend a lot on which subclass is loaded and operated on before and after which. Sometimes, performance is affected more than other times. It seems to be an artifact of the JIT’s code profiling and how heavily it performs optimistic specialization towards certain subclasses.</p>
</li>
</ul>
<p>There is still a bit of mystery left, specifically why sometimes code is faster when it performs more checks (has more instructions and an additional branch). Even though the branch is perfectly predictable, this seems counter-intuitive. The only explanation that we could come up with is that the branch optimizations (such as optimistic elimination etc) result in code that does better register allocation (for whatever reason, maybe the intermediate instructions just fit the allocation algorithm better).</p>
<h2 id="tldr">
tl;dr
<a class="anchor" href="#tldr">#</a>
</h2>
<ul>
<li>
<p>Off-heap memory in Flink complements the already very fast on-heap memory management. It improves the scalability to very large heap sizes and reduces memory copies for network and disk I/O.</p>
</li>
<li>
<p>Flink’s already present memory management infrastructure made the addition of off-heap memory simple. Off-heap memory is not only used for caching data, Flink can actually sort data off-heap and build hash tables off-heap.</p>
</li>
<li>
<p>We play a few nice tricks in the implementation to make sure the code is as friendly as possible to the JIT compiler and processor, to make the managed memory accesses are as fast as possible.</p>
</li>
<li>
<p>Understanding the JVM’s JIT compiler is tough - one needs a lot of (randomized) micro benchmarking to examine its behavior.</p>
</li>
</ul>
<hr>
<h2 id="appendix-detailed-micro-benchmarks">
Appendix: Detailed Micro Benchmarks
<a class="anchor" href="#appendix-detailed-micro-benchmarks">#</a>
</h2>
<p>These microbenchmarks test the performance of the different memory segment implementations on various operation.</p>
<p>Each experiments tests the different implementations multiple times in different orders, to balance the advantage/disadvantage of the JIT compiler specializing towards certain code paths. All experiments were run 5x, discarding the fastest and slowest run, and then averaged. This compensated for delay before the JIT kicks in.</p>
<p>My setup:</p>
<ul>
<li>Oracle Java 8 (1.8.0_25)</li>
<li>4 GBytes JVM heap (the experiments need 1.4 GBytes Heap + 1 GBytes direct memory)</li>
<li>Intel Core i7-4700MQ CPU, 2.40GHz (4 cores, 8 hardware contexts)</li>
</ul>
<p>The tested implementations are</p>
<table class="table">
<thead>
<tr>
<th>Type</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code> <em>(exclusive)</em></td>
<td>The case where it is the only loaded MemorySegment subclass.</td>
</tr>
<tr>
<td><code>HeapMemorySegment</code> <em>(mixed)</em></td>
<td>The case where both the HeapMemorySegment and the HybridMemorySegment are loaded.</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code> <em>(heap-exclusive)</em></td>
<td>Backed by heap memory, and the case where it is the only loaded MemorySegment class.</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code> <em>(heap-mixed)</em></td>
<td>Backed by heap memory, and the case where both the HeapMemorySegment and the HybridMemorySegment are loaded.</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code> <em>(off-heap-exclusive)</em></td>
<td>Backed by off-heap memory, and the case where it is the only loaded MemorySegment class.</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code> <em>(off-heap-mixed)</em></td>
<td>Backed by heap off-memory, and the case where both the HeapMemorySegment and the HybridMemorySegment are loaded.</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>Has no class hierarchy and virtual methods at all.</td>
</tr>
<tr>
<td><code>PureHybridSegment</code> <em>(heap)</em></td>
<td>Has no class hierarchy and virtual methods at all, backed by heap memory.</td>
</tr>
<tr>
<td><code>PureHybridSegment</code> <em>(off-heap)</em></td>
<td>Has no class hierarchy and virtual methods at all, backed by off-heap memory.</td>
</tr>
</tbody>
</table>
<div class="small">
<h3 id="byte-accesses">Byte accesses</h3>
<p><strong>Writing 100000 x 32768 bytes to 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, exclusive</td>
<td>1,441 msecs</td>
</tr>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>3,841 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, exclusive</td>
<td>1,626 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, exclusive</td>
<td>1,628 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>3,848 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>3,847 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>1,442 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>1,623 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>1,620 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 100000 x 32768 bytes from 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, exclusive</td>
<td>1,326 msecs</td>
</tr>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>1,378 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, exclusive</td>
<td>2,029 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, exclusive</td>
<td>2,030 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>2,047 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>2,049 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>1,331 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>2,030 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>2,030 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Writing 10 x 1073741824 bytes to 1073741824 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, exclusive</td>
<td>5,602 msecs</td>
</tr>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>12,570 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, exclusive</td>
<td>5,691 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, exclusive</td>
<td>5,691 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>12,566 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>12,556 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>5,599 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>5,687 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>5,681 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 10 x 1073741824 bytes from 1073741824 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, exclusive</td>
<td>4,243 msecs</td>
</tr>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>4,265 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, exclusive</td>
<td>6,730 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, exclusive</td>
<td>6,725 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>6,933 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>6,926 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>4,247 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>6,919 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>6,916 msecs</td>
</tr>
</tbody>
</table>
<h3 id="byte-array-accesses">Byte Array accesses</h3>
<p><strong>Writing 100000 x 32 byte[1024] to 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>164 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>163 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>163 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>165 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>182 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>176 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 100000 x 32 byte[1024] from 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>157 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>155 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>162 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>161 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>175 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>179 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Writing 10 x 1048576 byte[1024] to 1073741824 bytes segment</strong> </p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>1,164 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>1,173 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>1,157 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>1,169 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>1,174 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>1,166 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 10 x 1048576 byte[1024] from 1073741824 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>854 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>853 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>854 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>857 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>896 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>887 msecs</td>
</tr>
</tbody>
</table>
<h3 id="long-integer-accesses">Long integer accesses</h3>
<p><em>(note that the heap and off-heap segments use the same or comparable code for this)</em></p>
<p><strong>Writing 100000 x 4096 longs to 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>221 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>222 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>221 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>194 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>220 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>221 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 100000 x 4096 longs from 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>233 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>232 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>231 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>232 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>232 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>233 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Writing 10 x 134217728 longs to 1073741824 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>1,120 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>1,120 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>1,115 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>1,148 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>1,116 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>1,113 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 10 x 134217728 longs from 1073741824 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>1,097 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>1,099 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>1,093 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>917 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>1,105 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>1,097 msecs</td>
</tr>
</tbody>
</table>
<h3 id="integer-accesses">Integer accesses</h3>
<p><em>(note that the heap and off-heap segments use the same or comparable code for this)</em></p>
<p><strong>Writing 100000 x 8192 ints to 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>578 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>580 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>576 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>624 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>576 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>578 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 100000 x 8192 ints from 32768 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>464 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>464 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>465 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>463 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>464 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>463 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Writing 10 x 268435456 ints to 1073741824 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>2,187 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>2,161 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>2,152 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>2,770 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>2,161 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>2,157 msecs</td>
</tr>
</tbody>
</table>
<p><strong>Reading 10 x 268435456 ints from 1073741824 bytes segment</strong></p>
<table class="table">
<thead>
<tr>
<th>Segment</th>
<th>Time</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>HeapMemorySegment</code>, mixed</td>
<td>1,782 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, heap, mixed</td>
<td>1,783 msecs</td>
</tr>
<tr>
<td><code>HybridMemorySegment</code>, off-heap, mixed</td>
<td>1,774 msecs</td>
</tr>
<tr>
<td><code>PureHeapSegment</code></td>
<td>1,501 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, heap</td>
<td>1,774 msecs</td>
</tr>
<tr>
<td><code>PureHybridSegment</code>, off-heap</td>
<td>1,771 msecs</td>
</tr>
</tbody>
</table>
</div>
</p>
</article>
<div class="edit-this-page">
<p>
<a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Translation+Specifications">Want to contribute translation?</a>
</p>
<p>
<a href="//github.com/apache/flink-web/edit/asf-site/docs/content/posts/2015-09-16-off-heap-memory.md">
Edit This Page<i class="fa fa-edit fa-fw"></i>
</a>
</p>
</div>
</section>
<aside class="book-toc">
<nav id="TableOfContents"><h3>On This Page <a href="javascript:void(0)" class="toc" onclick="collapseToc()"><i class="fa fa-times" aria-hidden="true"></i></a></h3>
<ul>
<li>
<ul>
<li><a href="#recap-memory-management-in-flink">Recap: Memory Management in Flink</a></li>
<li><a href="#why-actually-bother-with-off-heap-memory">Why actually bother with off-heap memory?</a></li>
<li><a href="#the-off-heap-memory-implementation">The off-heap Memory Implementation</a></li>
<li><a href="#understanding-the-jit-and-tuning-the-implementation">Understanding the JIT and tuning the implementation</a>
<ul>
<li><a href="#approach-1-make-sure-that-only-one-memory-segment-implementation-is-ever-loaded">Approach 1: Make sure that only one memory segment implementation is ever loaded.</a></li>
<li><a href="#approach-2-write-one-segment-that-handles-both-heap-and-off-heap-memory">Approach 2: Write one segment that handles both heap and off-heap memory</a></li>
</ul>
</li>
<li><a href="#summary">Summary</a></li>
<li><a href="#tldr">tl;dr</a></li>
<li><a href="#appendix-detailed-micro-benchmarks">Appendix: Detailed Micro Benchmarks</a></li>
</ul>
</li>
</ul>
</nav>
</aside>
<aside class="expand-toc hidden">
<a class="toc" onclick="expandToc()" href="javascript:void(0)">
<i class="fa fa-bars" aria-hidden="true"></i>
</a>
</aside>
</main>
<footer>
<div class="separator"></div>
<div class="panels">
<div class="wrapper">
<div class="panel">
<ul>
<li>
<a href="https://flink-packages.org/">flink-packages.org</a>
</li>
<li>
<a href="https://www.apache.org/">Apache Software Foundation</a>
</li>
<li>
<a href="https://www.apache.org/licenses/">License</a>
</li>
<li>
<a href="/zh/">
<i class="fa fa-globe" aria-hidden="true"></i>&nbsp;中文版
</a>
</li>
</ul>
</div>
<div class="panel">
<ul>
<li>
<a href="/what-is-flink/security">Security</a-->
</li>
<li>
<a href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
</li>
<li>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</li>
</ul>
</div>
<div class="panel icons">
<div>
<a href="/posts">
<div class="icon flink-blog-icon"></div>
<span>Flink blog</span>
</a>
</div>
<div>
<a href="https://github.com/apache/flink">
<div class="icon flink-github-icon"></div>
<span>Github</span>
</a>
</div>
<div>
<a href="https://twitter.com/apacheflink">
<div class="icon flink-twitter-icon"></div>
<span>Twitter</span>
</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="container disclaimer">
<p>The contents of this website are © 2024 Apache Software Foundation under the terms of the Apache License v2. Apache Flink, Flink, and the Flink logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</footer>
</body>
</html>