blob: 297e3d03b0bfc83333805616203a918ca97c1d8f [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Features</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Top navbar. -->
<nav class="navbar navbar-default navbar-fixed-top">
<div class="container">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/"><img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"></a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav">
<!-- Overview -->
<li><a href="/index.html">Overview</a></li>
<!-- Quickstart -->
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Quickstart <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/setup_quickstart.html">Setup</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/java_api_quickstart.html">Java API</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/scala_api_quickstart.html">Scala API</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/run_example_quickstart.html">Run Step-by-Step Example</a></li>
</ul>
</li>
<!-- Features -->
<li class="active"><a href="/features.html">Features</a></li>
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Documentation -->
<li class="dropdown">
<a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Documentation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Latest stable release -->
<li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9">0.9.0 Documentation</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/java" class="active">0.9.0 Javadocs</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/scala/index.html" class="active">0.9.0 ScalaDocs</a></li>
<!-- Snapshot docs -->
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master">0.10 Documentation</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">0.10 Javadocs</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">0.10 ScalaDocs</a></li>
<!-- Wiki -->
<li class="divider"></li>
<li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
</ul>
</li>
<!-- FAQ -->
<li><a href="/faq.html">FAQ</a></li>
</ul>
<ul class="nav navbar-nav navbar-right">
<!-- Blog -->
<li class=" hidden-md hidden-sm"><a href="/blog/">Blog</a></li>
<li class="dropdown hidden-md hidden-sm">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Community -->
<li role="presentation" class="dropdown-header"><strong>Community</strong></li>
<li><a href="/community.html#mailing-lists">Mailing Lists</a></li>
<li><a href="/community.html#irc">IRC</a></li>
<li><a href="/community.html#stack-overflow">Stack Overflow</a></li>
<li><a href="/community.html#issue-tracker">Issue Tracker</a></li>
<li><a href="/community.html#source-code">Source Code</a></li>
<li><a href="/community.html#people">People</a></li>
<!-- Contribute -->
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Contribute</strong></li>
<li><a href="/how-to-contribute.html">How to Contribute</a></li>
<li><a href="/coding-guidelines.html">Coding Guidelines</a></li>
</ul>
</li>
<li class="dropdown hidden-md hidden-sm">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Project -->
<li role="presentation" class="dropdown-header"><strong>Project</strong></li>
<li><a href="/material.html">Material</a></li>
<li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li>
<li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
</ul>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</div><!-- /.container -->
</nav>
<!-- Main content. -->
<div class="container">
<div class="row">
<div class="col-sm-10 col-sm-offset-1">
<!-- --------------------------------------------- -->
<!-- Streaming
<!-- --------------------------------------------- -->
<hr />
<div class="row" style="padding: 0 0 0 0">
<div class="col-sm-12" style="text-align: center;">
<h1><b>Streaming</b></h1>
</div>
</div>
<hr />
<!-- High Performance -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="performance"><i>High Performance</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-12">
<p class="lead">Flink's data streaming runtime achieves high throughput rates and low latencies with little configuration.</p>
</div>
</div>
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12 img-column">
<img src="/img/features/streaming_performance.png" alt="Performance of data streaming applications" style="width:75%" />
</div>
</div>
<hr />
<!-- Exactly-once Semantics -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="exactly_once"><i>Exactly-once Semantics for Stateful Computations</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Streaming applications can maintain custom state during their computation.</p>
<p class="lead">Flink's checkpointing mechanism ensures <i>exactly once</i> semantics for the state in the presence of failures.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/exactly_once_state.png" alt="Exactly-once Semantics for Stateful Computations" style="width:50%" />
</div>
</div>
<hr />
<!-- Continuous streaming -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="streaming_model"><i>Continuous Streaming Model with Flow Control</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Data streaming applications are executed with continuous (long lived) operators.</p>
<p class="lead">Flink's streaming runtime has natural flow control: Slow downstream operators backpressure faster upstream operators.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/continuous_streams.png" alt="Continuous Streaming Model" style="width:60%" />
</div>
</div>
<hr />
<!-- Lightweight distributed snapshots -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="snapshots"><i>Fault-tolerance via Lightweight Distributed Snapshots</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Flink's fault tolerance mechanism is based on Chandy-Lamport distributed snapshots.</p>
<p class="lead">The mechanism is lightweight, allowing the system to maintain high throughput rates and provide strong consistency guarantees at the same time.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/distributed_snapshots.png" alt="Lightweight Distributed Snapshots" style="width:40%" />
</div>
</div>
<hr />
<!-- --------------------------------------------- -->
<!-- Batch
<!-- --------------------------------------------- -->
<div class="row" style="padding: 0 0 0 0">
<div class="col-sm-12" style="text-align: center;">
<h1><b>Batch and Streaming in One System</b></h1>
</div>
</div>
<hr />
<!-- One Runtime for Streaming and Batch Processing -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="one_runtime"><i>One Runtime for Streaming and Batch Processing</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Flink uses one common runtime for data streaming applications and batch processing applications.</p>
<p class="lead">Batch processing applications run efficiently as special cases of stream processing applications.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/one_runtime.png" alt="Unified Runtime for Batch and Stream Data Analysis" style="width:50%" />
</div>
</div>
<hr />
<!-- Memory Management -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="memory_management"><i>Memory Management</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Flink implements its own memory management inside the JVM.</p>
<p class="lead">Applications scale to data sizes beyond main memory and experience less garbage collection overhead.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/memory_heap_division.png" alt="Managed JVM Heap" style="width:50%" />
</div>
</div>
<hr />
<!-- Iterations -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="iterations"><i>Iterations and Delta Iterations</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Flink has dedicated support for iterative computations (as in machine learning and graph analysis).</p>
<p class="lead">Delta iterations can exploit computational dependencies for faster convergence.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/iterations.png" alt="Performance of iterations and delta iterations" style="width:75%" />
</div>
</div>
<hr />
<!-- Optimizer -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="optimizer"><i>Program Optimizer</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Batch programs are automatically optimized to exploit situations where expensive operations (like shuffles and sorts) can be avoided, and when intermediate data should be cached.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/optimizer_choice.png" alt="Optimizer choosing between different execution strategies" style="width:100%" />
</div>
</div>
<hr />
<!-- --------------------------------------------- -->
<!-- APIs and Libraries
<!-- --------------------------------------------- -->
<div class="row" style="padding: 0 0 0 0">
<div class="col-sm-12" style="text-align: center;">
<h1><b>APIs and Libraries</b></h1>
</div>
</div>
<hr />
<!-- Batch Processing API -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="batch_api"><i>Batch Processing Applications</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-5">
<p class="lead">Flink's <i>DataSet</i> API lets you write beautiful type-safe and maintainable code in Java or Scala. It supports a wide range of data types beyond key/value pairs, and a wealth of operators.</p>
<p class="lead">The example shows the core loop of the PageRank algorithm for graphs.</p>
</div>
<div class="col-sm-7">
<div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">case</span> <span class="k">class</span> <span class="nc">Page</span><span class="o">(</span><span class="n">pageId</span><span class="k">:</span> <span class="kt">Long</span><span class="o">,</span> <span class="n">rank</span><span class="k">:</span> <span class="kt">Double</span><span class="o">)</span>
<span class="k">case</span> <span class="k">class</span> <span class="nc">Adjacency</span><span class="o">(</span><span class="n">id</span><span class="k">:</span> <span class="kt">Long</span><span class="o">,</span> <span class="n">neighbors</span><span class="k">:</span> <span class="kt">Array</span><span class="o">[</span><span class="kt">Long</span><span class="o">])</span>
<span class="k">val</span> <span class="n">result</span> <span class="k">=</span> <span class="n">initialRanks</span><span class="o">.</span><span class="n">iterate</span><span class="o">(</span><span class="mi">30</span><span class="o">)</span> <span class="o">{</span> <span class="n">pages</span> <span class="k">=&gt;</span>
<span class="n">pages</span><span class="o">.</span><span class="n">join</span><span class="o">(</span><span class="n">adjacency</span><span class="o">).</span><span class="n">where</span><span class="o">(</span><span class="s">&quot;pageId&quot;</span><span class="o">).</span><span class="n">equalTo</span><span class="o">(</span><span class="s">&quot;pageId&quot;</span><span class="o">)</span> <span class="o">{</span>
<span class="o">(</span><span class="n">page</span><span class="o">,</span> <span class="n">adj</span><span class="o">,</span> <span class="n">out</span> <span class="k">:</span> <span class="kt">Collector</span><span class="o">[</span><span class="kt">Page</span><span class="o">])</span> <span class="k">=&gt;</span> <span class="o">{</span>
<span class="n">out</span><span class="o">.</span><span class="n">collect</span><span class="o">(</span><span class="nc">Page</span><span class="o">(</span><span class="n">page</span><span class="o">.</span><span class="n">id</span><span class="o">,</span> <span class="mf">0.15</span> <span class="o">/</span> <span class="n">numPages</span><span class="o">))</span>
<span class="k">for</span> <span class="o">(</span><span class="n">n</span> <span class="k">&lt;-</span> <span class="n">adj</span><span class="o">.</span><span class="n">neighbors</span><span class="o">)</span> <span class="o">{</span>
<span class="n">out</span><span class="o">.</span><span class="n">collect</span><span class="o">(</span><span class="nc">Page</span><span class="o">(</span><span class="n">n</span><span class="o">,</span> <span class="mf">0.85</span><span class="o">*</span><span class="n">page</span><span class="o">.</span><span class="n">rank</span><span class="o">/</span><span class="n">adj</span><span class="o">.</span><span class="n">neighbors</span><span class="o">.</span><span class="n">length</span><span class="o">))</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="o">.</span><span class="n">groupBy</span><span class="o">(</span><span class="s">&quot;pageId&quot;</span><span class="o">).</span><span class="n">sum</span><span class="o">(</span><span class="s">&quot;rank&quot;</span><span class="o">)</span>
<span class="o">}</span></code></pre></div>
</div>
</div>
<hr />
<!-- Data Streaming API -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="streaming_api"><i>Streaming Data Applications</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-5">
<p class="lead">The <i>DataStream</i> API supports functional transformations on data streams, with user-defined state, and flexible windows.</p>
<p class="lead">The example shows how to compute a sliding histogram of word occurrences of a data stream of texts.</p>
</div>
<div class="col-sm-7">
<p class="lead">WindowWordCount in Flink's DataStream API</p>
<div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">case</span> <span class="k">class</span> <span class="nc">Word</span><span class="o">(</span><span class="n">word</span><span class="k">:</span> <span class="kt">String</span><span class="o">,</span> <span class="n">freq</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span>
<span class="k">val</span> <span class="n">texts</span><span class="k">:</span> <span class="kt">DataStream</span><span class="o">[</span><span class="kt">String</span><span class="o">]</span> <span class="k">=</span> <span class="o">...</span>
<span class="k">val</span> <span class="n">counts</span> <span class="k">=</span> <span class="n">text</span>
<span class="o">.</span><span class="n">flatMap</span> <span class="o">{</span> <span class="n">line</span> <span class="k">=&gt;</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="o">(</span><span class="s">&quot;\\W+&quot;</span><span class="o">)</span> <span class="o">}</span>
<span class="o">.</span><span class="n">map</span> <span class="o">{</span> <span class="n">token</span> <span class="k">=&gt;</span> <span class="nc">Word</span><span class="o">(</span><span class="n">token</span><span class="o">,</span> <span class="mi">1</span><span class="o">)</span> <span class="o">}</span>
<span class="o">.</span><span class="n">groupBy</span><span class="o">(</span><span class="s">&quot;word&quot;</span><span class="o">)</span>
<span class="o">.</span><span class="n">window</span><span class="o">(</span><span class="nc">Time</span><span class="o">.</span><span class="n">of</span><span class="o">(</span><span class="mi">5</span><span class="o">,</span> <span class="nc">SECONDS</span><span class="o">)).</span><span class="n">every</span><span class="o">(</span><span class="nc">Time</span><span class="o">.</span><span class="n">of</span><span class="o">(</span><span class="mi">1</span><span class="o">,</span> <span class="nc">SECONDS</span><span class="o">))</span>
<span class="o">.</span><span class="n">sum</span><span class="o">(</span><span class="s">&quot;freq&quot;</span><span class="o">)</span></code></pre></div>
</div>
</div>
<hr />
<!-- Library Ecosystem -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="libraries"><i>Library Ecosystem</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Flink's stack offers libraries with high-level APIs for different use cases: Machine Learning, Graph Analytics, and Relational Data Processing.</p>
<p class="lead">The libraries are currently in <i>beta</i> status and are heavily developed.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/stack.png" alt="Flink Stack with Libraries" style="width:60%" />
</div>
</div>
<hr />
<!-- --------------------------------------------- -->
<!-- Ecosystem
<!-- --------------------------------------------- -->
<div class="row" style="padding: 0 0 0 0">
<div class="col-sm-12" style="text-align: center;">
<h1><b>Ecosystem</b></h1>
</div>
</div>
<hr />
<!-- Ecosystem -->
<div class="row" style="padding: 0 0 2em 0">
<div class="col-sm-12">
<h1 id="ecosystem"><i>Broad Integration</i></h1>
</div>
</div>
<div class="row">
<div class="col-sm-6">
<p class="lead">Flink is integrated with many other projects in the open-source data processing ecosystem.</p>
<p class="lead">Flink runs on YARN, works with HDFS, streams data from Kafka, can execute Hadoop program code, and connects to various other data storage systems.</p>
</div>
<div class="col-sm-6 img-column">
<img src="/img/features/ecosystem_logos.png" alt="Other projects that Flink is integrated with" style="width:75%" />
</div>
</div>
</div>
</div>
<hr />
<div class="footer text-center">
<p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div><!-- /.container -->
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="/js/codetabs.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>