blob: b340c1a196f34bd2fcbe1b7abed25d5de6f49fcd [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Announcing Flink 0.9.0-milestone1 preview release</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Top navbar. -->
<nav class="navbar navbar-default navbar-fixed-top">
<div class="container">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/"><img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"></a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav">
<!-- Overview -->
<li><a href="/index.html">Overview</a></li>
<!-- Quickstart -->
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Quickstart <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/setup_quickstart.html">Setup</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/java_api_quickstart.html">Java API</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/scala_api_quickstart.html">Scala API</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/run_example_quickstart.html">Run Step-by-Step Example</a></li>
</ul>
</li>
<!-- Features -->
<li><a href="/features.html">Features</a></li>
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Documentation -->
<li class="dropdown">
<a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Documentation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Latest stable release -->
<li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9">0.9.0 Documentation</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/java" class="active">0.9.0 Javadocs</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/scala/index.html" class="active">0.9.0 ScalaDocs</a></li>
<!-- Snapshot docs -->
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master">0.10 Documentation</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">0.10 Javadocs</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">0.10 ScalaDocs</a></li>
<!-- Wiki -->
<li class="divider"></li>
<li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
</ul>
</li>
<!-- FAQ -->
<li><a href="/faq.html">FAQ</a></li>
</ul>
<ul class="nav navbar-nav navbar-right">
<!-- Blog -->
<li class=" active hidden-md hidden-sm"><a href="/blog/">Blog</a></li>
<li class="dropdown hidden-md hidden-sm">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Community -->
<li role="presentation" class="dropdown-header"><strong>Community</strong></li>
<li><a href="/community.html#mailing-lists">Mailing Lists</a></li>
<li><a href="/community.html#irc">IRC</a></li>
<li><a href="/community.html#stack-overflow">Stack Overflow</a></li>
<li><a href="/community.html#issue-tracker">Issue Tracker</a></li>
<li><a href="/community.html#source-code">Source Code</a></li>
<li><a href="/community.html#people">People</a></li>
<!-- Contribute -->
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Contribute</strong></li>
<li><a href="/how-to-contribute.html">How to Contribute</a></li>
<li><a href="/coding-guidelines.html">Coding Guidelines</a></li>
</ul>
</li>
<li class="dropdown hidden-md hidden-sm">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Project -->
<li role="presentation" class="dropdown-header"><strong>Project</strong></li>
<li><a href="/material.html">Material</a></li>
<li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li>
<li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
</ul>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</div><!-- /.container -->
</nav>
<!-- Main content. -->
<div class="container">
<div class="row">
<div class="col-sm-8 col-sm-offset-2">
<div class="row">
<h1>Announcing Flink 0.9.0-milestone1 preview release</h1>
<article>
<p>13 Apr 2015</p>
<p>The Apache Flink community is pleased to announce the availability of
the 0.9.0-milestone-1 release. The release is a preview of the
upcoming 0.9.0 release. It contains many new features which will be
available in the upcoming 0.9 release. Interested users are encouraged
to try it out and give feedback. As the version number indicates, this
release is a preview release that contains known issues.</p>
<p>You can download the release
<a href="http://flink.apache.org/downloads.html#preview">here</a> and check out the
latest documentation
<a href="http://ci.apache.org/projects/flink/flink-docs-master/">here</a>. Feedback
through the Flink <a href="http://flink.apache.org/community.html#mailing-lists">mailing
lists</a> is, as
always, very welcome!</p>
<h2 id="new-features">New Features</h2>
<h3 id="table-api">Table API</h3>
<p>Flink’s new Table API offers a higher-level abstraction for
interacting with structured data sources. The Table API allows users
to execute logical, SQL-like queries on distributed data sets while
allowing them to freely mix declarative queries with regular Flink
operators. Here is an example that groups and joins two tables:</p>
<div class="highlight"><pre><code class="language-scala"><span class="k">val</span> <span class="n">clickCounts</span> <span class="k">=</span> <span class="n">clicks</span>
<span class="o">.</span><span class="n">groupBy</span><span class="o">(</span><span class="-Symbol">&#39;user</span><span class="o">).</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">&#39;userId</span><span class="o">,</span> <span class="-Symbol">&#39;url</span><span class="o">.</span><span class="n">count</span> <span class="n">as</span> <span class="-Symbol">&#39;count</span><span class="o">)</span>
<span class="k">val</span> <span class="n">activeUsers</span> <span class="k">=</span> <span class="n">users</span><span class="o">.</span><span class="n">join</span><span class="o">(</span><span class="n">clickCounts</span><span class="o">)</span>
<span class="o">.</span><span class="n">where</span><span class="o">(</span><span class="-Symbol">&#39;id</span> <span class="o">===</span> <span class="-Symbol">&#39;userId</span> <span class="o">&amp;&amp;</span> <span class="-Symbol">&#39;count</span> <span class="o">&gt;</span> <span class="mi">10</span><span class="o">).</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">&#39;username</span><span class="o">,</span> <span class="-Symbol">&#39;count</span><span class="o">,</span> <span class="o">...)</span></code></pre></div>
<p>Tables consist of logical attributes that can be selected by name
rather than physical Java and Scala data types. This alleviates a lot
of boilerplate code for common ETL tasks and raises the abstraction
for Flink programs. Tables are available for both static and streaming
data sources (DataSet and DataStream APIs).</p>
<p>Check out the Table guide for Java and Scala
<a href="http://ci.apache.org/projects/flink/flink-docs-master/libs/table.html">here</a>.</p>
<h3 id="gelly-graph-processing-api">Gelly Graph Processing API</h3>
<p>Gelly is a Java Graph API for Flink. It contains a set of utilities
for graph analysis, support for iterative graph processing and a
library of graph algorithms. Gelly exposes a Graph data structure that
wraps DataSets for vertices and edges, as well as methods for creating
graphs from DataSets, graph transformations and utilities (e.g., in-
and out- degrees of vertices), neighborhood aggregations, iterative
vertex-centric graph processing, as well as a library of common graph
algorithms, including PageRank, SSSP, label propagation, and community
detection.</p>
<p>Gelly internally builds on top of Flink’s <a href="http://ci.apache.org/projects/flink/flink-docs-master/apis/iterations.html">delta
iterations</a>. Iterative
graph algorithms are executed leveraging mutable state, achieving
similar performance with specialized graph processing systems.</p>
<p>Gelly will eventually subsume Spargel, Flink’s Pregel-like API. Check
out the Gelly guide
<a href="http://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html">here</a>.</p>
<h3 id="flink-machine-learning-library">Flink Machine Learning Library</h3>
<p>This release includes the first version of Flink’s Machine Learning
library. The library’s pipeline approach, which has been strongly
inspired by scikit-learn’s abstraction of transformers and estimators,
makes it easy to quickly set up a data processing pipeline and to get
your job done.</p>
<p>Flink distinguishes between transformers and learners. Transformers
are components which transform your input data into a new format
allowing you to extract features, cleanse your data or to sample from
it. Learners on the other hand constitute the components which take
your input data and train a model on it. The model you obtain from the
learner can then be evaluated and used to make predictions on unseen
data.</p>
<p>Currently, the machine learning library contains transformers and
learners to do multiple tasks. The library supports multiple linear
regression using a stochastic gradient implementation to scale to
large data sizes. Furthermore, it includes an alternating least
squares (ALS) implementation to factorizes large matrices. The matrix
factorization can be used to do collaborative filtering. An
implementation of the communication efficient distributed dual
coordinate ascent (CoCoA) algorithm is the latest addition to the
library. The CoCoA algorithm can be used to train distributed
soft-margin SVMs.</p>
<h3 id="flink-on-yarn-leveraging-apache-tez">Flink on YARN leveraging Apache Tez</h3>
<p>We are introducing a new execution mode for Flink to be able to run
restricted Flink programs on top of <a href="http://tez.apache.org">Apache
Tez</a>. This mode retains Flink’s APIs,
optimizer, as well as Flink’s runtime operators, but instead of
wrapping those in Flink tasks that are executed by Flink TaskManagers,
it wraps them in Tez runtime tasks and builds a Tez DAG that
represents the program.</p>
<p>By using Flink on Tez, users have an additional choice for an
execution platform for Flink programs. While Flink’s distributed
runtime favors low latency, streaming shuffles, and iterative
algorithms, Tez focuses on scalability and elastic resource usage in
shared YARN clusters.</p>
<p>Get started with Flink on Tez
<a href="http://ci.apache.org/projects/flink/flink-docs-master/setup/flink_on_tez.html">here</a>.</p>
<h3 id="reworked-distributed-runtime-on-akka">Reworked Distributed Runtime on Akka</h3>
<p>Flink’s RPC system has been replaced by the widely adopted
<a href="http://akka.io">Akka</a> framework. Akka’s concurrency model offers the
right abstraction to develop a fast as well as robust distributed
system. By using Akka’s own failure detection mechanism the stability
of Flink’s runtime is significantly improved, because the system can
now react in proper form to node outages. Furthermore, Akka improves
Flink’s scalability by introducing asynchronous messages to the
system. These asynchronous messages allow Flink to be run on many more
nodes than before.</p>
<h3 id="exactly-once-processing-on-kafka-streaming-sources">Exactly-once processing on Kafka Streaming Sources</h3>
<p>This release introduces stream processing with exacly-once delivery
guarantees for Flink streaming programs that analyze streaming sources
that are persisted by <a href="http://kafka.apache.org">Apache Kafka</a>. The
system is internally tracking the Kafka offsets to ensure that Flink
can pick up data from Kafka where it left off in case of an failure.</p>
<p>Read
<a href="http://ci.apache.org/projects/flink/flink-docs-master/apis/streaming_guide.html#apache-kafka">here</a>
on how to use the persistent Kafka source.</p>
<h3 id="improved-yarn-support">Improved YARN support</h3>
<p>Flink’s YARN client contains several improvements, such as a detached
mode for starting a YARN session in the background, the ability to
submit a single Flink job to a YARN cluster without starting a
session, including a “fire and forget” mode. Flink is now also able to
reallocate failed YARN containers to maintain the size of the
requested cluster. This feature allows to implement fault-tolerant
setups on top of YARN. There is also an internal Java API to deploy
and control a running YARN cluster. This is being used by system
integrators to easily control Flink on YARN within their Hadoop 2
cluster.</p>
<p>See the YARN docs
<a href="http://ci.apache.org/projects/flink/flink-docs-master/setup/yarn_setup.html">here</a>.</p>
<h2 id="more-improvements-and-fixes">More Improvements and Fixes</h2>
<ul>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1605">FLINK-1605</a>:
Flink is not exposing its Guava and ASM dependencies to Maven
projects depending on Flink. We use the maven-shade-plugin to
relocate these dependencies into our own namespace. This allows
users to use any Guava or ASM version.</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1605">FLINK-1417</a>:
Automatic recognition and registration of Java Types at Kryo and the
internal serializers: Flink has its own type handling and
serialization framework falling back to Kryo for types that it cannot
handle. To get the best performance Flink is automatically registering
all types a user is using in their program with Kryo.Flink also
registers serializers for Protocol Buffers, Thrift, Avro and YodaTime
automatically. Users can also manually register serializers to Kryo
(https://issues.apache.org/jira/browse/FLINK-1399)</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1296">FLINK-1296</a>: Add
support for sorting very large records</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1679">FLINK-1679</a>:
“degreeOfParallelism” methods renamed to “parallelism”</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1501">FLINK-1501</a>: Add
metrics library for monitoring TaskManagers</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1760">FLINK-1760</a>: Add
support for building Flink with Scala 2.11</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1648">FLINK-1648</a>: Add
a mode where the system automatically sets the parallelism to the
available task slots</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1622">FLINK-1622</a>: Add
groupCombine operator</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1589">FLINK-1589</a>: Add
option to pass Configuration to LocalExecutor</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1504">FLINK-1504</a>: Add
support for accessing secured HDFS clusters in standalone mode</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1478">FLINK-1478</a>: Add
strictly local input split assignment</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1512">FLINK-1512</a>: Add
CsvReader for reading into POJOs.</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1461">FLINK-1461</a>: Add
sortPartition operator</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1450">FLINK-1450</a>: Add
Fold operator to the Streaming api</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1389">FLINK-1389</a>:
Allow setting custom file extensions for files created by the
FileOutputFormat</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1236">FLINK-1236</a>: Add
support for localization of Hadoop Input Splits</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1179">FLINK-1179</a>: Add
button to JobManager web interface to request stack trace of a
TaskManager</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1105">FLINK-1105</a>: Add
support for locally sorted output</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1688">FLINK-1688</a>: Add
socket sink</p>
</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/FLINK-1436">FLINK-1436</a>:
Improve usability of command line interface</p>
</li>
</ul>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
<hr />
<div class="footer text-center">
<p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div><!-- /.container -->
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="/js/codetabs.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>