blob: 29574653f2af993f94ced1f0121b087bb43f7687 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Flink on Zeppelin Notebooks for Interactive Data Analysis - Part 2</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<!-- We need to load Jquery in the header for custom google analytics event tracking-->
<script src="/js/jquery.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Main content. -->
<div class="container">
<div class="row">
<div id="sidebar" class="col-sm-3">
<!-- Top navbar. -->
<nav class="navbar navbar-default">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/">
<img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
</a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-main">
<!-- First menu section explains visitors what Flink is -->
<!-- What is Stream Processing? -->
<!--
<li><a href="/streamprocessing1.html">What is Stream Processing?</a></li>
-->
<!-- What is Flink? -->
<li><a href="/flink-architecture.html">What is Apache Flink?</a></li>
<ul class="nav navbar-nav navbar-subnav">
<li >
<a href="/flink-architecture.html">Architecture</a>
</li>
<li >
<a href="/flink-applications.html">Applications</a>
</li>
<li >
<a href="/flink-operations.html">Operations</a>
</li>
</ul>
<!-- What is Stateful Functions? -->
<li><a href="/stateful-functions.html">What is Stateful Functions?</a></li>
<!-- Use cases -->
<li><a href="/usecases.html">Use Cases</a></li>
<!-- Powered by -->
<li><a href="/poweredby.html">Powered By</a></li>
&nbsp;
<!-- Second menu section aims to support Flink users -->
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Getting Started -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Getting Started<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/getting-started/index.html" target="_blank">With Flink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1/getting-started/project-setup.html" target="_blank">With Flink Stateful Functions <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="/training.html">Training Course</a></li>
</ul>
</li>
<!-- Documentation -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10" target="_blank">Flink 1.10 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-docs-master" target="_blank">Flink Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1" target="_blank">Flink Stateful Functions 2.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-master" target="_blank">Flink Stateful Functions Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
</ul>
</li>
<!-- getting help -->
<li><a href="/gettinghelp.html">Getting Help</a></li>
<!-- Blog -->
<li><a href="/blog/"><b>Flink Blog</b></a></li>
<!-- Flink-packages -->
<li>
<a href="https://flink-packages.org" target="_blank">flink-packages.org <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Third menu section aim to support community and contributors -->
<!-- Community -->
<li><a href="/community.html">Community &amp; Project Info</a></li>
<!-- Roadmap -->
<li><a href="/roadmap.html">Roadmap</a></li>
<!-- Contribute -->
<li><a href="/contributing/how-to-contribute.html">How to Contribute</a></li>
<!-- GitHub -->
<li>
<a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Language Switcher -->
<li>
<a href="/zh/ecosystem/2020/06/23/flink-on-zeppelin-part2.html">中文版</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-bottom">
<hr />
<!-- Twitter -->
<li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<!-- Visualizer -->
<li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<hr />
<li><a href="https://apache.org" target="_blank">Apache Software Foundation <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li>
<style>
.smalllinks:link {
display: inline-block !important; background: none; padding-top: 0px; padding-bottom: 0px; padding-right: 0px; min-width: 75px;
}
</style>
<a class="smalllinks" href="https://www.apache.org/licenses/" target="_blank">License</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/security/" target="_blank">Security</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Donate</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</div>
<div class="col-sm-9">
<div class="row-fluid">
<div class="col-sm-12">
<div class="row">
<h1>Flink on Zeppelin Notebooks for Interactive Data Analysis - Part 2</h1>
<p><i></i></p>
<article>
<p>23 Jun 2020 Jeff Zhang (<a href="https://twitter.com/zjffdu">@zjffdu</a>)</p>
<p>In a previous post, we introduced the basics of Flink on Zeppelin and how to do Streaming ETL. In this second part of the “Flink on Zeppelin” series of posts, I will share how to
perform streaming data visualization via Flink on Zeppelin and how to use Apache Flink UDFs in Zeppelin.</p>
<h1 id="streaming-data-visualization">Streaming Data Visualization</h1>
<p>With <a href="https://zeppelin.apache.org/">Zeppelin</a>, you can build a real time streaming dashboard without writing any line of javascript/html/css code.</p>
<p>Overall, Zeppelin supports 3 kinds of streaming data analytics:</p>
<ul>
<li>Single Mode</li>
<li>Update Mode</li>
<li>Append Mode</li>
</ul>
<h3 id="single-mode">Single Mode</h3>
<p>Single mode is used for cases when the result of a SQL statement is always one row, such as the following example.
The output format is translated in HTML, and you can specify a paragraph local property template for the final output content template.
And you can use <code>{i}</code> as placeholder for the {i}th column of the result.</p>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_single_mode.gif" width="80%" alt="Single Mode" />
</center>
<h3 id="update-mode">Update Mode</h3>
<p>Update mode is suitable for the cases when the output format is more than one row,
and will always be continuously updated. Here’s one example where we use <code>GROUP BY</code>.</p>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_update_mode.gif" width="80%" alt="Update Mode" />
</center>
<h3 id="append-mode">Append Mode</h3>
<p>Append mode is suitable for the cases when the output data is always appended.
For instance, the example below uses a tumble window.</p>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_append_mode.gif" width="80%" alt="Append Mode" />
</center>
<h1 id="udf">UDF</h1>
<p>SQL is a very powerful language, especially in expressing data flow. But most of the time, you need to handle complicated business logic that cannot be expressed by SQL.
In these cases UDFs (user-defined functions) come particularly handy. In Zeppelin, you can write Scala or Python UDFs, while you can also import Scala, Python and Java UDFs.
Here are 2 examples of Scala and Python UDFs:</p>
<ul>
<li>Scala UDF</li>
</ul>
<div class="highlight"><pre><code class="language-scala"><span class="o">%</span><span class="n">flink</span>
<span class="k">class</span> <span class="nc">ScalaUpper</span> <span class="k">extends</span> <span class="nc">ScalarFunction</span> <span class="o">{</span>
<span class="k">def</span> <span class="n">eval</span><span class="o">(</span><span class="n">str</span><span class="k">:</span> <span class="kt">String</span><span class="o">)</span> <span class="k">=</span> <span class="n">str</span><span class="o">.</span><span class="n">toUpperCase</span>
<span class="o">}</span>
<span class="n">btenv</span><span class="o">.</span><span class="n">registerFunction</span><span class="o">(</span><span class="s">&quot;scala_upper&quot;</span><span class="o">,</span> <span class="k">new</span> <span class="nc">ScalaUpper</span><span class="o">())</span></code></pre></div>
<ul>
<li>Python UDF</li>
</ul>
<div class="highlight"><pre><code class="language-python"><span class="o">%</span><span class="n">flink</span><span class="o">.</span><span class="n">pyflink</span>
<span class="k">class</span> <span class="nc">PythonUpper</span><span class="p">(</span><span class="n">ScalarFunction</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">eval</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s</span><span class="p">):</span>
<span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
<span class="n">bt_env</span><span class="o">.</span><span class="n">register_function</span><span class="p">(</span><span class="s">&quot;python_upper&quot;</span><span class="p">,</span> <span class="n">udf</span><span class="p">(</span><span class="n">PythonUpper</span><span class="p">(),</span> <span class="n">DataTypes</span><span class="o">.</span><span class="n">STRING</span><span class="p">(),</span> <span class="n">DataTypes</span><span class="o">.</span><span class="n">STRING</span><span class="p">()))</span></code></pre></div>
<p>After you define the UDFs, you can use them directly in SQL:</p>
<ul>
<li>Use Scala UDF in SQL</li>
</ul>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_scala_udf.png" width="100%" alt="Scala UDF" />
</center>
<ul>
<li>Use Python UDF in SQL</li>
</ul>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_python_udf.png" width="100%" alt="Python UDF" />
</center>
<h1 id="summary">Summary</h1>
<p>In this post, we explained how to perform streaming data visualization via Flink on Zeppelin and how to use UDFs.
Besides that, you can do more in Zeppelin with Flink, such as batch processing, Hive integration and more.
You can check the following articles for more details and here’s a list of <a href="https://www.youtube.com/watch?v=YxPo0Fosjjg&amp;list=PL4oy12nnS7FFtg3KV1iS5vDb0pTz12VcX">Flink on Zeppelin tutorial videos</a> for your reference.</p>
<h1 id="references">References</h1>
<ul>
<li><a href="http://zeppelin.apache.org">Apache Zeppelin official website</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-1-get-started-2591aaa6aa47">Part 1</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-2-batch-711731df5ad9">Part 2</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-3-streaming-5fca1e16754">Part 3</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-4-advanced-usage-998b74908cd9">Part 4</a></li>
<li><a href="https://www.youtube.com/watch?v=YxPo0Fosjjg&amp;list=PL4oy12nnS7FFtg3KV1iS5vDb0pTz12VcX">Flink on Zeppelin tutorial videos</a></li>
</ul>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
</div>
</div>
<hr />
<div class="row">
<div class="footer text-center col-sm-12">
<p>Copyright © 2014-2019 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div>
</div><!-- /.container -->
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.matchHeight/0.7.0/jquery.matchHeight-min.js"></script>
<script src="/js/codetabs.js"></script>
<script src="/js/stickysidebar.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>