blob: 6abb7979d50c2f4fce797573aa127716de0f9e5c [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: February 2015 in the Flink community</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Top navbar. -->
<nav class="navbar navbar-default navbar-fixed-top">
<div class="container">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/"><img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" width="78px" height="40px"></a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav">
<!-- Overview -->
<li><a href="/index.html">Overview</a></li>
<!-- Quickstart -->
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Quickstart <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/setup_quickstart.html">Setup</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/java_api_quickstart.html">Java API</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/scala_api_quickstart.html">Scala API</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/quickstart/run_example_quickstart.html">Run Step-by-Step Example</a></li>
</ul>
</li>
<!-- Features -->
<li><a href="/features.html">Features</a></li>
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Documentation -->
<li class="dropdown">
<a href="" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Documentation <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Latest stable release -->
<li role="presentation" class="dropdown-header"><strong>Latest Release</strong> (Stable)</li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9">0.9.0 Documentation</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/java" class="active">0.9.0 Javadocs</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/api/scala/index.html" class="active">0.9.0 ScalaDocs</a></li>
<!-- Snapshot docs -->
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Snapshot</strong> (Development)</li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master">0.10 Documentation</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/java" class="active">0.10 Javadocs</a></li>
<li><a href="http://ci.apache.org/projects/flink/flink-docs-master/api/scala/index.html" class="active">0.10 ScalaDocs</a></li>
<!-- Wiki -->
<li class="divider"></li>
<li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
</ul>
</li>
<!-- FAQ -->
<li><a href="/faq.html">FAQ</a></li>
</ul>
<ul class="nav navbar-nav navbar-right">
<!-- Blog -->
<li class=" active hidden-md hidden-sm"><a href="/blog/">Blog</a></li>
<li class="dropdown hidden-md hidden-sm">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Community <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Community -->
<li role="presentation" class="dropdown-header"><strong>Community</strong></li>
<li><a href="/community.html#mailing-lists">Mailing Lists</a></li>
<li><a href="/community.html#irc">IRC</a></li>
<li><a href="/community.html#stack-overflow">Stack Overflow</a></li>
<li><a href="/community.html#issue-tracker">Issue Tracker</a></li>
<li><a href="/community.html#source-code">Source Code</a></li>
<li><a href="/community.html#people">People</a></li>
<!-- Contribute -->
<li class="divider"></li>
<li role="presentation" class="dropdown-header"><strong>Contribute</strong></li>
<li><a href="/how-to-contribute.html">How to Contribute</a></li>
<li><a href="/coding-guidelines.html">Coding Guidelines</a></li>
</ul>
</li>
<li class="dropdown hidden-md hidden-sm">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Project <span class="caret"></span></a>
<ul class="dropdown-menu" role="menu">
<!-- Project -->
<li role="presentation" class="dropdown-header"><strong>Project</strong></li>
<li><a href="/material.html">Material</a></li>
<li><a href="https://twitter.com/apacheflink"><small><span class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li>
<li><a href="https://github.com/apache/flink"><small><span class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home"><small><span class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
</ul>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</div><!-- /.container -->
</nav>
<!-- Main content. -->
<div class="container">
<div class="row">
<div class="col-sm-8 col-sm-offset-2">
<div class="row">
<h1>February 2015 in the Flink community</h1>
<article>
<p>02 Mar 2015</p>
<p>February might be the shortest month of the year, but this does not
mean that the Flink community has not been busy adding features to the
system and fixing bugs. Here’s a rundown of the activity in the Flink
community last month.</p>
<h3 id="release">0.8.1 release</h3>
<p>Flink 0.8.1 was released. This bugfixing release resolves a total of 22 issues.</p>
<h3 id="new-committer">New committer</h3>
<p><a href="https://github.com/mxm">Max Michels</a> has been voted a committer by the Flink PMC.</p>
<h3 id="flink-adapter-for-apache-samoa">Flink adapter for Apache SAMOA</h3>
<p><a href="http://samoa.incubator.apache.org">Apache SAMOA (incubating)</a> is a
distributed streaming machine learning (ML) framework with a
programming abstraction for distributed streaming ML algorithms. SAMOA
runs on a variety of backend engines, currently Apache Storm and
Apache S4. A <a href="https://github.com/apache/incubator-samoa/pull/11">pull
request</a> is
available at the SAMOA repository that adds a Flink adapter for SAMOA.</p>
<h3 id="easy-flink-deployment-on-google-compute-cloud">Easy Flink deployment on Google Compute Cloud</h3>
<p>Flink is now integrated in bdutil, Google’s open source tool for
creating and configuring (Hadoop) clusters in Google Compute
Engine. Deployment of Flink clusters in now supported starting with
<a href="https://groups.google.com/forum/#!topic/gcp-hadoop-announce/uVJ_6y9cGKM">bdutil
1.2.0</a>.</p>
<h3 id="flink-on-the-web">Flink on the Web</h3>
<p>A new blog post on <a href="http://flink.apache.org/news/2015/02/09/streaming-example.html">Flink
Streaming</a>
was published at the blog. Flink was mentioned in several articles on
the web. Here are some examples:</p>
<ul>
<li>
<p><a href="http://dataconomy.com/how-flink-became-an-apache-top-level-project/">How Flink became an Apache Top-Level Project</a></p>
</li>
<li>
<p><a href="https://www.linkedin.com/pulse/stale-synchronous-parallelism-new-frontier-apache-flink-nam-luc-tran?utm_content=buffer461af&amp;utm_medium=social&amp;utm_source=linkedin.com&amp;utm_campaign=buffer">Stale Synchronous Parallelism: The new frontier for Apache Flink?</a></p>
</li>
<li>
<p><a href="http://www.hadoopsphere.com/2015/02/distributed-data-processing-with-apache.html">Distributed data processing with Apache Flink</a></p>
</li>
<li>
<p><a href="http://www.hadoopsphere.com/2015/02/ciao-latency-hallo-speed.html">Ciao latency, hello speed</a></p>
</li>
</ul>
<h2 id="in-the-flink-master">In the Flink master</h2>
<p>The following features have been now merged in Flink’s master repository.</p>
<h3 id="gelly">Gelly</h3>
<p>Gelly, Flink’s Graph API allows users to manipulate graph-shaped data
directly. Here’s for example a calculation of shortest paths in a
graph:</p>
<div class="highlight"><pre><code class="language-java" data-lang="java"><span class="n">Graph</span><span class="o">&lt;</span><span class="n">Long</span><span class="o">,</span> <span class="n">Double</span><span class="o">,</span> <span class="n">Double</span><span class="o">&gt;</span> <span class="n">graph</span> <span class="o">=</span> <span class="n">Graph</span><span class="o">.</span><span class="na">fromDataSet</span><span class="o">(</span><span class="n">vertices</span><span class="o">,</span> <span class="n">edges</span><span class="o">,</span> <span class="n">env</span><span class="o">);</span>
<span class="n">DataSet</span><span class="o">&lt;</span><span class="n">Vertex</span><span class="o">&lt;</span><span class="n">Long</span><span class="o">,</span> <span class="n">Double</span><span class="o">&gt;&gt;</span> <span class="n">singleSourceShortestPaths</span> <span class="o">=</span> <span class="n">graph</span>
<span class="o">.</span><span class="na">run</span><span class="o">(</span><span class="k">new</span> <span class="n">SingleSourceShortestPaths</span><span class="o">&lt;</span><span class="n">Long</span><span class="o">&gt;(</span><span class="n">srcVertexId</span><span class="o">,</span>
<span class="n">maxIterations</span><span class="o">)).</span><span class="na">getVertices</span><span class="o">();</span></code></pre></div>
<p>See more Gelly examples
<a href="https://github.com/apache/flink/tree/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example">here</a>.</p>
<h3 id="flink-expressions">Flink Expressions</h3>
<p>The newly merged
<a href="https://github.com/apache/flink/tree/master/flink-staging/flink-table">flink-table</a>
module is the first step in Flink’s roadmap towards logical queries
and SQL support. Here’s a preview on how you can read two CSV file,
assign a logical schema to, and apply transformations like filters and
joins using logical attributes rather than physical data types.</p>
<div class="highlight"><pre><code class="language-scala" data-lang="scala"><span class="k">val</span> <span class="n">customers</span> <span class="k">=</span> <span class="n">getCustomerDataSet</span><span class="o">(</span><span class="n">env</span><span class="o">)</span>
<span class="o">.</span><span class="n">as</span><span class="o">(</span><span class="-Symbol">&#39;id</span><span class="o">,</span> <span class="-Symbol">&#39;mktSegment</span><span class="o">)</span>
<span class="o">.</span><span class="n">filter</span><span class="o">(</span> <span class="-Symbol">&#39;mktSegment</span> <span class="o">===</span> <span class="s">&quot;AUTOMOBILE&quot;</span> <span class="o">)</span>
<span class="k">val</span> <span class="n">orders</span> <span class="k">=</span> <span class="n">getOrdersDataSet</span><span class="o">(</span><span class="n">env</span><span class="o">)</span>
<span class="o">.</span><span class="n">filter</span><span class="o">(</span> <span class="n">o</span> <span class="k">=&gt;</span> <span class="n">dateFormat</span><span class="o">.</span><span class="n">parse</span><span class="o">(</span><span class="n">o</span><span class="o">.</span><span class="n">orderDate</span><span class="o">).</span><span class="n">before</span><span class="o">(</span><span class="n">date</span><span class="o">)</span> <span class="o">)</span>
<span class="o">.</span><span class="n">as</span><span class="o">(</span><span class="-Symbol">&#39;orderId</span><span class="o">,</span> <span class="-Symbol">&#39;custId</span><span class="o">,</span> <span class="-Symbol">&#39;orderDate</span><span class="o">,</span> <span class="-Symbol">&#39;shipPrio</span><span class="o">)</span>
<span class="k">val</span> <span class="n">items</span> <span class="k">=</span>
<span class="n">orders</span><span class="o">.</span><span class="n">join</span><span class="o">(</span><span class="n">customers</span><span class="o">)</span>
<span class="o">.</span><span class="n">where</span><span class="o">(</span><span class="-Symbol">&#39;custId</span> <span class="o">===</span> <span class="-Symbol">&#39;id</span><span class="o">)</span>
<span class="o">.</span><span class="n">select</span><span class="o">(</span><span class="-Symbol">&#39;orderId</span><span class="o">,</span> <span class="-Symbol">&#39;orderDate</span><span class="o">,</span> <span class="-Symbol">&#39;shipPrio</span><span class="o">)</span></code></pre></div>
<h3 id="access-to-hcatalog-tables">Access to HCatalog tables</h3>
<p>With the <a href="https://github.com/apache/flink/tree/master/flink-staging/flink-hcatalog">flink-hcatalog
module</a>,
you can now conveniently access HCatalog/Hive tables. The module
supports projection (selection and order of fields) and partition
filters.</p>
<h3 id="access-to-secured-yarn-clustershdfs">Access to secured YARN clusters/HDFS.</h3>
<p>With this change users can access Kerberos secured YARN (and HDFS)
Hadoop clusters. Also, basic support for accessing secured HDFS with
a standalone Flink setup is now available.</p>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
<hr />
<div class="footer text-center">
<p>Copyright © 2014-2015 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Apache, and the Apache feather logo are trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div><!-- /.container -->
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="/js/codetabs.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>