blob: 18ee8b0de7e67bc414a43492efbcc499cf99b2ca [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Flink 2015: A year in review, and a lookout to 2016</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<!-- We need to load Jquery in the header for custom google analytics event tracking-->
<script src="/js/jquery.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Main content. -->
<div class="container">
<div class="row">
<div id="sidebar" class="col-sm-3">
<!-- Top navbar. -->
<nav class="navbar navbar-default">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/">
<img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
</a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-main">
<!-- First menu section explains visitors what Flink is -->
<!-- What is Stream Processing? -->
<!--
<li><a href="/streamprocessing1.html">What is Stream Processing?</a></li>
-->
<!-- What is Flink? -->
<li><a href="/flink-architecture.html">What is Apache Flink?</a></li>
<!-- What is Stateful Functions? -->
<li><a href="/stateful-functions.html">What is Stateful Functions?</a></li>
<!-- Use cases -->
<li><a href="/usecases.html">Use Cases</a></li>
<!-- Powered by -->
<li><a href="/poweredby.html">Powered By</a></li>
&nbsp;
<!-- Second menu section aims to support Flink users -->
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Getting Started -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Getting Started<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/getting-started/index.html" target="_blank">With Flink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1/getting-started/project-setup.html" target="_blank">With Flink Stateful Functions <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="/training.html">Training Course</a></li>
</ul>
</li>
<!-- Documentation -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10" target="_blank">Flink 1.10 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-docs-master" target="_blank">Flink Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1" target="_blank">Flink Stateful Functions 2.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-master" target="_blank">Flink Stateful Functions Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
</ul>
</li>
<!-- getting help -->
<li><a href="/gettinghelp.html">Getting Help</a></li>
<!-- Blog -->
<li class="active"><a href="/blog/"><b>Flink Blog</b></a></li>
<!-- Flink-packages -->
<li>
<a href="https://flink-packages.org" target="_blank">flink-packages.org <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Third menu section aim to support community and contributors -->
<!-- Community -->
<li><a href="/community.html">Community &amp; Project Info</a></li>
<!-- Roadmap -->
<li><a href="/roadmap.html">Roadmap</a></li>
<!-- Contribute -->
<li><a href="/contributing/how-to-contribute.html">How to Contribute</a></li>
<!-- GitHub -->
<li>
<a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Language Switcher -->
<li>
<!-- link to the Chinese home page when current is blog page -->
<a href="/zh">中文版</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-bottom">
<hr />
<!-- Twitter -->
<li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<!-- Visualizer -->
<li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<hr />
<li><a href="https://apache.org" target="_blank">Apache Software Foundation <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li>
<style>
.smalllinks:link {
display: inline-block !important; background: none; padding-top: 0px; padding-bottom: 0px; padding-right: 0px; min-width: 75px;
}
</style>
<a class="smalllinks" href="https://www.apache.org/licenses/" target="_blank">License</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/security/" target="_blank">Security</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Donate</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</div>
<div class="col-sm-9">
<div class="row-fluid">
<div class="col-sm-12">
<div class="row">
<h1>Flink 2015: A year in review, and a lookout to 2016</h1>
<p><i></i></p>
<article>
<p>18 Dec 2015 by Robert Metzger (<a href="https://twitter.com/rmetzger_">@rmetzger_</a>)</p>
<p>With 2015 ending, we thought that this would be good time to reflect
on the amazing work done by the Flink community over this past year,
and how much this community has grown.</p>
<p>Overall, we have seen Flink grow in terms of functionality from an
engine to one of the most complete open-source stream processing
frameworks available. The community grew from a relatively small and
geographically focused team, to a truly global, and one of the largest
big data communities in the the Apache Software Foundation.</p>
<p>We will also look at some interesting stats, including that the
busiest days for Flink are Mondays (who would have thought :-).</p>
<h1 id="community-growth">Community growth</h1>
<p>Let us start with some simple statistics from <a href="https://github.com/apache/flink">Flink’s
github repository</a>. During 2015, the
Flink community <strong>doubled</strong> in size, from about 75 contributors to
over 150. Forks of the repository more than <strong>tripled</strong> from 160 in
February 2015 to 544 in December 2015, and the number of stars of the
repository almost tripled from 289 to 813.</p>
<center>
<img src="/img/blog/community-growth.png" style="height:400px;margin:15px" />
</center>
<p>Although Flink started out geographically in Berlin, Germany, the
community is by now spread all around the globe, with many
contributors from North America, Europe, and Asia. A simple search at
meetup.com for groups that mention Flink as a focus area reveals <a href="http://apache-flink.meetup.com/">16
meetups around the globe</a>:</p>
<center>
<img src="/img/blog/meetup-map.png" style="height:400px;margin:15px" />
</center>
<h1 id="flink-forward-2015">Flink Forward 2015</h1>
<p>One of the highlights of the year for Flink was undoubtedly the <a href="http://2015.flink-forward.org/">Flink
Forward</a> conference, the first conference
on Apache Flink that was held in October in Berlin. More than 250
participants (roughly half based outside Germany where the conference
was held) attended more than 33 technical talks from organizations
including Google, MongoDB, Bouygues Telecom, NFLabs, Euranova, RedHat,
IBM, Huawei, Intel, Ericsson, Capital One, Zalando, Amadeus, the Otto
Group, and ResearchGate. If you have not yet watched their talks,
check out the <a href="http://2015.flink-forward.org/?post_type=day">slides</a> and
<a href="https://www.youtube.com/playlist?list=PLDX4T_cnKjD31JeWR1aMOi9LXPRQ6nyHO">videos</a>
from Flink Forward.</p>
<center>
<img src="/img/blog/ff-speakers.png" style="height:400px;margin:15px" />
</center>
<h1 id="media-coverage">Media coverage</h1>
<p>And of course, interest in Flink was picked up by the tech
media. During 2015, articles about Flink appeared in
<a href="http://www.infoq.com/Apache-Flink/news/">InfoQ</a>,
<a href="http://www.zdnet.com/article/five-open-source-big-data-projects-to-watch/">ZDNet</a>,
<a href="http://www.datanami.com/tag/apache-flink/">Datanami</a>,
<a href="http://www.infoworld.com/article/2919602/hadoop/flink-hadoops-new-contender-for-mapreduce-spark.html">Infoworld</a>
(including being one of the <a href="http://www.infoworld.com/article/2982429/open-source-tools/bossie-awards-2015-the-best-open-source-big-data-tools.html">best open source big data tools of
2015</a>),
the <a href="http://blogs.gartner.com/nick-heudecker/apache-flink-offers-a-challenge-to-spark/">Gartner
blog</a>,
<a href="http://dataconomy.com/tag/apache-flink/">Dataconomy</a>,
<a href="http://sdtimes.com/tag/apache-flink/">SDTimes</a>, the <a href="https://www.mapr.com/blog/apache-flink-new-way-handle-streaming-data">MapR
blog</a>,
<a href="http://www.kdnuggets.com/2015/08/apache-flink-stream-processing.html">KDnuggets</a>,
and
<a href="http://www.hadoopsphere.com/2015/02/distributed-data-processing-with-apache.html">HadoopSphere</a>.</p>
<center>
<img src="/img/blog/appeared-in.png" style="height:400px;margin:15px" />
</center>
<p>It is interesting to see that Hadoop Summit EMEA 2016 had a whopping
number of 17 (!) talks submitted that are mentioning Flink in their
title and abstract:</p>
<center>
<img src="/img/blog/hadoop-summit.png" style="height:400px;margin:15px" />
</center>
<h1 id="fun-with-stats-when-do-committers-commit">Fun with stats: when do committers commit?</h1>
<p>To get some deeper insight on what is happening in the Flink
community, let us do some analytics on the git log of the project :-)
The easiest thing we can do is count the number of commits at the
repository in 2015. Running</p>
<div class="highlight"><pre><code>git log --pretty=oneline --after=1/1/2015 | wc -l
</code></pre></div>
<p>on the Flink repository yields a total of <strong>2203 commits</strong> in 2015.</p>
<p>To dig deeper, we will use an open source tool called gitstats that
will give us some interesting statistics on the committer
behavior. You can create these also yourself and see many more by
following four easy steps:</p>
<ol>
<li>Download gitstats from the <a href="http://gitstats.sourceforge.net/">project homepage</a>.. E.g., on OS X with homebrew, type</li>
</ol>
<div class="highlight"><pre><code>brew install --HEAD homebrew/head-only/gitstats
</code></pre></div>
<ol>
<li>Clone the Apache Flink git repository:</li>
</ol>
<div class="highlight"><pre><code>git clone git@github.com:apache/flink.git
</code></pre></div>
<ol>
<li>Generate the statistics</li>
</ol>
<div class="highlight"><pre><code>gitstats flink/ flink-stats/
</code></pre></div>
<ol>
<li>View all the statistics as an html page using your favorite browser (e.g., chrome):</li>
</ol>
<div class="highlight"><pre><code>chrome flink-stats/index.html
</code></pre></div>
<p>First, we can see a steady growth of lines of code in Flink since the
initial Apache incubator project. During 2015, the codebase almost
<strong>doubled</strong> from 500,000 LOC to 900,000 LOC.</p>
<center>
<img src="/img/blog/code-growth.png" style="height:400px;margin:15px" />
</center>
<p>It is interesting to see when committers commit. For Flink, Monday
afternoons are by far the most popular times to commit to the
repository:</p>
<center>
<img src="/img/blog/commit-stats.png" style="height:400px;margin:15px" />
</center>
<h1 id="feature-timeline">Feature timeline</h1>
<p>So, what were the major features added to Flink and the Flink
ecosystem during 2015? Here is a (non-exhaustive) chronological list:</p>
<center>
<img src="/img/blog/feature-timeline.png" style="height:400px;margin:15px" />
</center>
<h1 id="roadmap-for-2016">Roadmap for 2016</h1>
<p>With 2015 coming to a close, the Flink community has already started
discussing Flink’s roadmap for the future. Some highlights
are:</p>
<ul>
<li>
<p><strong>Runtime scaling of streaming jobs:</strong> streaming jobs are running
forever, and need to react to a changing environment. Runtime
scaling means dynamically increasing and decreasing the
parallelism of a job to sustain certain SLAs, or react to changing
input throughput.</p>
</li>
<li>
<p><strong>SQL queries for static data sets and streams:</strong> building on top of
Flink’s Table API, users should be able to write SQL
queries for static data sets, as well as SQL queries on data
streams that continuously produce new results.</p>
</li>
<li>
<p><strong>Streaming operators backed by managed memory:</strong> currently,
streaming operators like user-defined state and windows are backed
by JVM heap objects. Moving those to Flink managed memory will add
the ability to spill to disk, GC efficiency, as well as better
control over memory utilization.</p>
</li>
<li>
<p><strong>Library for detecting temporal event patterns:</strong> a common use case
for stream processing is detecting patterns in an event stream
with timestamps. Flink makes this possible with its support for
event time, so many of these operators can be surfaced in the form
of a library.</p>
</li>
<li>
<p><strong>Support for Apache Mesos, and resource-dynamic YARN support:</strong>
support for both Mesos and YARN, including dynamic allocation and
release of resource for more resource elasticity (for both batch
and stream processing).</p>
</li>
<li>
<p><strong>Security:</strong> encrypt both the messages exchanged between
TaskManagers and JobManager, as well as the connections for data
exchange between workers.</p>
</li>
<li>
<p><strong>More streaming connectors, more runtime metrics, and continuous
DataStream API enhancements:</strong> add support for more sources and
sinks (e.g., Amazon Kinesis, Cassandra, Flume, etc), expose more
metrics to the user, and provide continuous improvements to the
DataStream API.</p>
</li>
</ul>
<p>If you are interested in these features, we highly encourage you to
take a look at the <a href="https://docs.google.com/document/d/1ExmtVpeVVT3TIhO1JoBpC5JKXm-778DAD7eqw5GANwE/edit">current
draft</a>,
and <a href="https://mail-archives.apache.org/mod_mbox/flink-dev/201512.mbox/browser">join the
discussion</a>
on the Flink mailing lists.</p>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
</div>
</div>
<hr />
<div class="row">
<div class="footer text-center col-sm-12">
<p>Copyright © 2014-2019 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div>
</div><!-- /.container -->
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.matchHeight/0.7.0/jquery.matchHeight-min.js"></script>
<script src="/js/codetabs.js"></script>
<script src="/js/stickysidebar.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>