| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> |
| <title>Apache Flink: Announcing Flink 0.9.0-milestone1 preview release</title> |
| <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> |
| <link rel="icon" href="/favicon.ico" type="image/x-icon"> |
| |
| <!-- Bootstrap --> |
| <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> |
| <link rel="stylesheet" href="/css/flink.css"> |
| <link rel="stylesheet" href="/css/syntax.css"> |
| |
| <!-- Blog RSS feed --> |
| <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> |
| |
| <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> |
| <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> |
| <!--[if lt IE 9]> |
| <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> |
| <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> |
| <![endif]--> |
| </head> |
| <body> |
| |
| |
| <!-- Top navbar. --> |
| <nav class="navbar navbar-default navbar-fixed-top"> |
| <div class="container"> |
| <!-- The logo. --> |
| <div class="navbar-header"> |
| <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| </button> |
| <div class="navbar-logo"> |
| <a href="/"><img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"></a> |
| </div> |
| </div><!-- /.navbar-header --> |
| |
| <!-- The navigation links. --> |
| <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> |
| <ul class="nav navbar-nav"> |
| <!-- Overview --> |
| <li><a href="/index.html">Overview</a></li> |
| |
| <!-- Quickstart --> |
| <li class="dropdown"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Quickstart <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/setup_quickstart.html">Setup</a></li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/java_api_quickstart.html">Java API</a></li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/scala_api_quickstart.html">Scala API</a></li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/run_example_quickstart.html">Run Step-by-Step Example</a></li> |
| </ul> |
| </li> |
| |
| <!-- Features --> |
| <li><a href="/features.html">Features</a></li> |
| |
| <!-- Downloads --> |
| <li><a href="/downloads.html">Downloads</a></li> |
| |
| <!-- Documentation --> |
| <li class="dropdown"> |
| <a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Documentation <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <!-- Latest stable release --> |
| <li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9">0.9.0 Documentation</a></li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/java" class="active">0.9.0 Javadocs</a></li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/scala/index.html" class="active">0.9.0 ScalaDocs</a></li> |
| |
| <!-- Snapshot docs --> |
| <li class="divider"></li> |
| <li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-master">0.10 Documentation</a></li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">0.10 Javadocs</a></li> |
| <li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">0.10 ScalaDocs</a></li> |
| |
| <!-- Wiki --> |
| <li class="divider"></li> |
| <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> |
| </ul> |
| </li> |
| |
| <!-- FAQ --> |
| <li><a href="/faq.html">FAQ</a></li> |
| </ul> |
| |
| <ul class="nav navbar-nav navbar-right"> |
| <!-- Blog --> |
| <li class=" active hidden-md hidden-sm"><a href="/blog/">Blog</a></li> |
| |
| <li class="dropdown hidden-md hidden-sm"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <!-- Community --> |
| <li role="presentation" class="dropdown-header"><strong>Community</strong></li> |
| <li><a href="/community.html#mailing-lists">Mailing Lists</a></li> |
| <li><a href="/community.html#irc">IRC</a></li> |
| <li><a href="/community.html#stack-overflow">Stack Overflow</a></li> |
| <li><a href="/community.html#issue-tracker">Issue Tracker</a></li> |
| <li><a href="/community.html#source-code">Source Code</a></li> |
| <li><a href="/community.html#people">People</a></li> |
| |
| <!-- Contribute --> |
| <li class="divider"></li> |
| <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li> |
| <li><a href="/how-to-contribute.html">How to Contribute</a></li> |
| <li><a href="/coding-guidelines.html">Coding Guidelines</a></li> |
| </ul> |
| </li> |
| |
| <li class="dropdown hidden-md hidden-sm"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a> |
| <ul class="dropdown-menu" role="menu"> |
| <!-- Project --> |
| <li role="presentation" class="dropdown-header"><strong>Project</strong></li> |
| <li><a href="/material.html">Material</a></li> |
| <li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li> |
| <li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li> |
| <li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li> |
| </ul> |
| </li> |
| </ul> |
| </div><!-- /.navbar-collapse --> |
| </div><!-- /.container --> |
| </nav> |
| |
| |
| <!-- Main content. --> |
| <div class="container"> |
| |
| |
| <div class="row"> |
| <div class="col-sm-8 col-sm-offset-2"> |
| <div class="row"> |
| <h1>Announcing Flink 0.9.0-milestone1 preview release</h1> |
| |
| <article> |
| <p>13 Apr 2015</p> |
| |
| <p>The Apache Flink community is pleased to announce the availability of |
| the 0.9.0-milestone-1 release. The release is a preview of the |
| upcoming 0.9.0 release. It contains many new features which will be |
| available in the upcoming 0.9 release. Interested users are encouraged |
| to try it out and give feedback. As the version number indicates, this |
| release is a preview release that contains known issues.</p> |
| |
| <p>You can download the release |
| <a href="http://flink.apache.org/downloads.html#preview">here</a> and check out the |
| latest documentation |
| <a href="http://ci.apache.org/projects/flink/flink-docs-master/">here</a>. Feedback |
| through the Flink <a href="http://flink.apache.org/community.html#mailing-lists">mailing |
| lists</a> is, as |
| always, very welcome!</p> |
| |
| <h2 id="new-features">New Features</h2> |
| |
| <h3 id="table-api">Table API</h3> |
| |
| <p>Flink’s new Table API offers a higher-level abstraction for |
| interacting with structured data sources. The Table API allows users |
| to execute logical, SQL-like queries on distributed data sets while |
| allowing them to freely mix declarative queries with regular Flink |
| operators. Here is an example that groups and joins two tables:</p> |
| |
| <div class="highlight"><pre><code class="language-scala"><span class="k">val</span> <span class="n">clickCounts</span> <span class="k">=</span> <span class="n">clicks</span> |
| <span class="o">.</span><span class="n">groupBy</span><span class="o">(</span><span class="-Symbol">'user</span><span class="o">).</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">'userId</span><span class="o">,</span> <span class="-Symbol">'url</span><span class="o">.</span><span class="n">count</span> <span class="n">as</span> <span class="-Symbol">'count</span><span class="o">)</span> |
| |
| <span class="k">val</span> <span class="n">activeUsers</span> <span class="k">=</span> <span class="n">users</span><span class="o">.</span><span class="n">join</span><span class="o">(</span><span class="n">clickCounts</span><span class="o">)</span> |
| <span class="o">.</span><span class="n">where</span><span class="o">(</span><span class="-Symbol">'id</span> <span class="o">===</span> <span class="-Symbol">'userId</span> <span class="o">&&</span> <span class="-Symbol">'count</span> <span class="o">></span> <span class="mi">10</span><span class="o">).</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">'username</span><span class="o">,</span> <span class="-Symbol">'count</span><span class="o">,</span> <span class="o">...)</span></code></pre></div> |
| |
| <p>Tables consist of logical attributes that can be selected by name |
| rather than physical Java and Scala data types. This alleviates a lot |
| of boilerplate code for common ETL tasks and raises the abstraction |
| for Flink programs. Tables are available for both static and streaming |
| data sources (DataSet and DataStream APIs).</p> |
| |
| <p>Check out the Table guide for Java and Scala |
| <a href="http://ci.apache.org/projects/flink/flink-docs-master/libs/table.html">here</a>.</p> |
| |
| <h3 id="gelly-graph-processing-api">Gelly Graph Processing API</h3> |
| |
| <p>Gelly is a Java Graph API for Flink. It contains a set of utilities |
| for graph analysis, support for iterative graph processing and a |
| library of graph algorithms. Gelly exposes a Graph data structure that |
| wraps DataSets for vertices and edges, as well as methods for creating |
| graphs from DataSets, graph transformations and utilities (e.g., in- |
| and out- degrees of vertices), neighborhood aggregations, iterative |
| vertex-centric graph processing, as well as a library of common graph |
| algorithms, including PageRank, SSSP, label propagation, and community |
| detection.</p> |
| |
| <p>Gelly internally builds on top of Flink’s <a href="http://ci.apache.org/projects/flink/flink-docs-master/apis/iterations.html">delta |
| iterations</a>. Iterative |
| graph algorithms are executed leveraging mutable state, achieving |
| similar performance with specialized graph processing systems.</p> |
| |
| <p>Gelly will eventually subsume Spargel, Flink’s Pregel-like API. Check |
| out the Gelly guide |
| <a href="http://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html">here</a>.</p> |
| |
| <h3 id="flink-machine-learning-library">Flink Machine Learning Library</h3> |
| |
| <p>This release includes the first version of Flink’s Machine Learning |
| library. The library’s pipeline approach, which has been strongly |
| inspired by scikit-learn’s abstraction of transformers and estimators, |
| makes it easy to quickly set up a data processing pipeline and to get |
| your job done.</p> |
| |
| <p>Flink distinguishes between transformers and learners. Transformers |
| are components which transform your input data into a new format |
| allowing you to extract features, cleanse your data or to sample from |
| it. Learners on the other hand constitute the components which take |
| your input data and train a model on it. The model you obtain from the |
| learner can then be evaluated and used to make predictions on unseen |
| data.</p> |
| |
| <p>Currently, the machine learning library contains transformers and |
| learners to do multiple tasks. The library supports multiple linear |
| regression using a stochastic gradient implementation to scale to |
| large data sizes. Furthermore, it includes an alternating least |
| squares (ALS) implementation to factorizes large matrices. The matrix |
| factorization can be used to do collaborative filtering. An |
| implementation of the communication efficient distributed dual |
| coordinate ascent (CoCoA) algorithm is the latest addition to the |
| library. The CoCoA algorithm can be used to train distributed |
| soft-margin SVMs.</p> |
| |
| <h3 id="flink-on-yarn-leveraging-apache-tez">Flink on YARN leveraging Apache Tez</h3> |
| |
| <p>We are introducing a new execution mode for Flink to be able to run |
| restricted Flink programs on top of <a href="http://tez.apache.org">Apache |
| Tez</a>. This mode retains Flink’s APIs, |
| optimizer, as well as Flink’s runtime operators, but instead of |
| wrapping those in Flink tasks that are executed by Flink TaskManagers, |
| it wraps them in Tez runtime tasks and builds a Tez DAG that |
| represents the program.</p> |
| |
| <p>By using Flink on Tez, users have an additional choice for an |
| execution platform for Flink programs. While Flink’s distributed |
| runtime favors low latency, streaming shuffles, and iterative |
| algorithms, Tez focuses on scalability and elastic resource usage in |
| shared YARN clusters.</p> |
| |
| <p>Get started with Flink on Tez |
| <a href="http://ci.apache.org/projects/flink/flink-docs-master/setup/flink_on_tez.html">here</a>.</p> |
| |
| <h3 id="reworked-distributed-runtime-on-akka">Reworked Distributed Runtime on Akka</h3> |
| |
| <p>Flink’s RPC system has been replaced by the widely adopted |
| <a href="http://akka.io">Akka</a> framework. Akka’s concurrency model offers the |
| right abstraction to develop a fast as well as robust distributed |
| system. By using Akka’s own failure detection mechanism the stability |
| of Flink’s runtime is significantly improved, because the system can |
| now react in proper form to node outages. Furthermore, Akka improves |
| Flink’s scalability by introducing asynchronous messages to the |
| system. These asynchronous messages allow Flink to be run on many more |
| nodes than before.</p> |
| |
| <h3 id="exactly-once-processing-on-kafka-streaming-sources">Exactly-once processing on Kafka Streaming Sources</h3> |
| |
| <p>This release introduces stream processing with exacly-once delivery |
| guarantees for Flink streaming programs that analyze streaming sources |
| that are persisted by <a href="http://kafka.apache.org">Apache Kafka</a>. The |
| system is internally tracking the Kafka offsets to ensure that Flink |
| can pick up data from Kafka where it left off in case of an failure.</p> |
| |
| <p>Read |
| <a href="http://ci.apache.org/projects/flink/flink-docs-master/apis/streaming_guide.html#apache-kafka">here</a> |
| on how to use the persistent Kafka source.</p> |
| |
| <h3 id="improved-yarn-support">Improved YARN support</h3> |
| |
| <p>Flink’s YARN client contains several improvements, such as a detached |
| mode for starting a YARN session in the background, the ability to |
| submit a single Flink job to a YARN cluster without starting a |
| session, including a “fire and forget” mode. Flink is now also able to |
| reallocate failed YARN containers to maintain the size of the |
| requested cluster. This feature allows to implement fault-tolerant |
| setups on top of YARN. There is also an internal Java API to deploy |
| and control a running YARN cluster. This is being used by system |
| integrators to easily control Flink on YARN within their Hadoop 2 |
| cluster.</p> |
| |
| <p>See the YARN docs |
| <a href="http://ci.apache.org/projects/flink/flink-docs-master/setup/yarn_setup.html">here</a>.</p> |
| |
| <h2 id="more-improvements-and-fixes">More Improvements and Fixes</h2> |
| |
| <ul> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1605">FLINK-1605</a>: |
| Flink is not exposing its Guava and ASM dependencies to Maven |
| projects depending on Flink. We use the maven-shade-plugin to |
| relocate these dependencies into our own namespace. This allows |
| users to use any Guava or ASM version.</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1605">FLINK-1417</a>: |
| Automatic recognition and registration of Java Types at Kryo and the |
| internal serializers: Flink has its own type handling and |
| serialization framework falling back to Kryo for types that it cannot |
| handle. To get the best performance Flink is automatically registering |
| all types a user is using in their program with Kryo.Flink also |
| registers serializers for Protocol Buffers, Thrift, Avro and YodaTime |
| automatically. Users can also manually register serializers to Kryo |
| (https://issues.apache.org/jira/browse/FLINK-1399)</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1296">FLINK-1296</a>: Add |
| support for sorting very large records</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1679">FLINK-1679</a>: |
| “degreeOfParallelism” methods renamed to “parallelism”</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1501">FLINK-1501</a>: Add |
| metrics library for monitoring TaskManagers</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1760">FLINK-1760</a>: Add |
| support for building Flink with Scala 2.11</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1648">FLINK-1648</a>: Add |
| a mode where the system automatically sets the parallelism to the |
| available task slots</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1622">FLINK-1622</a>: Add |
| groupCombine operator</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1589">FLINK-1589</a>: Add |
| option to pass Configuration to LocalExecutor</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1504">FLINK-1504</a>: Add |
| support for accessing secured HDFS clusters in standalone mode</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1478">FLINK-1478</a>: Add |
| strictly local input split assignment</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1512">FLINK-1512</a>: Add |
| CsvReader for reading into POJOs.</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1461">FLINK-1461</a>: Add |
| sortPartition operator</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1450">FLINK-1450</a>: Add |
| Fold operator to the Streaming api</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1389">FLINK-1389</a>: |
| Allow setting custom file extensions for files created by the |
| FileOutputFormat</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1236">FLINK-1236</a>: Add |
| support for localization of Hadoop Input Splits</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1179">FLINK-1179</a>: Add |
| button to JobManager web interface to request stack trace of a |
| TaskManager</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1105">FLINK-1105</a>: Add |
| support for locally sorted output</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1688">FLINK-1688</a>: Add |
| socket sink</p> |
| </li> |
| <li> |
| <p><a href="https://issues.apache.org/jira/browse/FLINK-1436">FLINK-1436</a>: |
| Improve usability of command line interface</p> |
| </li> |
| </ul> |
| |
| </article> |
| </div> |
| |
| <div class="row"> |
| <div id="disqus_thread"></div> |
| <script type="text/javascript"> |
| /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ |
| var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname |
| |
| /* * * DON'T EDIT BELOW THIS LINE * * */ |
| (function() { |
| var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; |
| dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; |
| (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); |
| })(); |
| </script> |
| </div> |
| </div> |
| </div> |
| |
| <hr /> |
| <div class="footer text-center"> |
| <p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> |
| <p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p> |
| <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> |
| </div> |
| |
| </div><!-- /.container --> |
| |
| <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> |
| <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> |
| <!-- Include all compiled plugins (below), or include individual files as needed --> |
| <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> |
| <script src="/js/codetabs.js"></script> |
| |
| <!-- Google Analytics --> |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); |
| |
| ga('create', 'UA-52545728-1', 'auto'); |
| ga('send', 'pageview'); |
| </script> |
| </body> |
| </html> |