blob: 2d08eea75c6925d189ea33601ce2ef06d3d0d95c [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Flink on Zeppelin Notebooks for Interactive Data Analysis - Part 1</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<!-- We need to load Jquery in the header for custom google analytics event tracking-->
<script src="/js/jquery.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Main content. -->
<div class="container">
<div class="row">
<div id="sidebar" class="col-sm-3">
<!-- Top navbar. -->
<nav class="navbar navbar-default">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/">
<img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
</a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-main">
<!-- First menu section explains visitors what Flink is -->
<!-- What is Stream Processing? -->
<!--
<li><a href="/streamprocessing1.html">What is Stream Processing?</a></li>
-->
<!-- What is Flink? -->
<li><a href="/flink-architecture.html">What is Apache Flink?</a></li>
<ul class="nav navbar-nav navbar-subnav">
<li >
<a href="/flink-architecture.html">Architecture</a>
</li>
<li >
<a href="/flink-applications.html">Applications</a>
</li>
<li >
<a href="/flink-operations.html">Operations</a>
</li>
</ul>
<!-- What is Stateful Functions? -->
<li><a href="/stateful-functions.html">What is Stateful Functions?</a></li>
<!-- Use cases -->
<li><a href="/usecases.html">Use Cases</a></li>
<!-- Powered by -->
<li><a href="/poweredby.html">Powered By</a></li>
&nbsp;
<!-- Second menu section aims to support Flink users -->
<!-- Downloads -->
<li><a href="/downloads.html">Downloads</a></li>
<!-- Getting Started -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Getting Started<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10/getting-started/index.html" target="_blank">With Flink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1/getting-started/project-setup.html" target="_blank">With Flink Stateful Functions <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="/training.html">Training Course</a></li>
</ul>
</li>
<!-- Documentation -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.10" target="_blank">Flink 1.10 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-docs-master" target="_blank">Flink Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1" target="_blank">Flink Stateful Functions 2.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-master" target="_blank">Flink Stateful Functions Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
</ul>
</li>
<!-- getting help -->
<li><a href="/gettinghelp.html">Getting Help</a></li>
<!-- Blog -->
<li class="active"><a href="/blog/"><b>Flink Blog</b></a></li>
<!-- Flink-packages -->
<li>
<a href="https://flink-packages.org" target="_blank">flink-packages.org <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Third menu section aim to support community and contributors -->
<!-- Community -->
<li><a href="/community.html">Community &amp; Project Info</a></li>
<!-- Roadmap -->
<li><a href="/roadmap.html">Roadmap</a></li>
<!-- Contribute -->
<li><a href="/contributing/how-to-contribute.html">How to Contribute</a></li>
<!-- GitHub -->
<li>
<a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Language Switcher -->
<li>
<!-- link to the Chinese home page when current is blog page -->
<a href="/zh">中文版</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-bottom">
<hr />
<!-- Twitter -->
<li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<!-- Visualizer -->
<li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<hr />
<li><a href="https://apache.org" target="_blank">Apache Software Foundation <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li>
<style>
.smalllinks:link {
display: inline-block !important; background: none; padding-top: 0px; padding-bottom: 0px; padding-right: 0px; min-width: 75px;
}
</style>
<a class="smalllinks" href="https://www.apache.org/licenses/" target="_blank">License</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/security/" target="_blank">Security</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Donate</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</div>
<div class="col-sm-9">
<div class="row-fluid">
<div class="col-sm-12">
<div class="row">
<h1>Flink on Zeppelin Notebooks for Interactive Data Analysis - Part 1</h1>
<p><i></i></p>
<article>
<p>15 Jun 2020 Jeff Zhang (<a href="https://twitter.com/zjffdu">@zjffdu</a>)</p>
<p>The latest release of <a href="https://zeppelin.apache.org/">Apache Zeppelin</a> comes with a redesigned interpreter for Apache Flink (version Flink 1.10+ is only supported moving forward)
that allows developers to use Flink directly on Zeppelin notebooks for interactive data analysis. I wrote 2 posts about how to use Flink in Zeppelin. This is part-1 where I explain how the Flink interpreter in Zeppelin works,
and provide a tutorial for running Streaming ETL with Flink on Zeppelin.</p>
<h1 id="the-flink-interpreter-in-zeppelin-09">The Flink Interpreter in Zeppelin 0.9</h1>
<p>The Flink interpreter can be accessed and configured from Zeppelin’s interpreter settings page.
The interpreter has been refactored so that Flink users can now take advantage of Zeppelin to write Flink applications in three languages,
namely Scala, Python (PyFlink) and SQL (for both batch &amp; streaming executions).
Zeppelin 0.9 now comes with the Flink interpreter group, consisting of the below five interpreters:</p>
<ul>
<li>%flink - Provides a Scala environment</li>
<li>%flink.pyflink - Provides a python environment</li>
<li>%flink.ipyflink - Provides an ipython environment</li>
<li>%flink.ssql - Provides a stream sql environment</li>
<li>%flink.bsql - Provides a batch sql environment</li>
</ul>
<p>Not only has the interpreter been extended to support writing Flink applications in three languages, but it has also extended the available execution modes for Flink that now include:</p>
<ul>
<li>Running Flink in Local Mode</li>
<li>Running Flink in Remote Mode</li>
<li>Running Flink in Yarn Mode</li>
</ul>
<p>You can find more information about how to get started with Zeppelin and all the execution modes for Flink applications in <a href="https://github.com/apache/zeppelin/tree/master/notebook/Flink%20Tutorial">Zeppelin notebooks</a> in this post.</p>
<h1 id="flink-on-zeppelin-for-stream-processing">Flink on Zeppelin for Stream processing</h1>
<p>Performing stream processing jobs with Apache Flink on Zeppelin allows you to run most major streaming cases,
such as streaming ETL and real time data analytics, with the use of Flink SQL and specific UDFs.
Below we showcase how you can execute streaming ETL using Flink on Zeppelin:</p>
<p>You can use Flink SQL to perform streaming ETL by following the steps below
(for the full tutorial, please refer to the <a href="https://github.com/apache/zeppelin/blob/master/notebook/Flink%20Tutorial/4.%20Streaming%20ETL_2EYD56B9B.zpln">Flink Tutorial/Streaming ETL tutorial</a> of the Zeppelin distribution):</p>
<ul>
<li>Step 1. Create source table to represent the source data.</li>
</ul>
<center>
<img src="/img/blog/2020-06-15-flink-on-zeppelin/create_source.png" width="80%" alt="Create Source Table" />
</center>
<ul>
<li>Step 2. Create a sink table to represent the processed data.</li>
</ul>
<center>
<img src="/img/blog/2020-06-15-flink-on-zeppelin/create_sink.png" width="80%" alt="Create Sink Table" />
</center>
<ul>
<li>Step 3. After creating the source and sink table, we can insert them to our statement to trigger the stream processing job as the following:</li>
</ul>
<center>
<img src="/img/blog/2020-06-15-flink-on-zeppelin/etl.png" width="80%" alt="ETL" />
</center>
<ul>
<li>Step 4. After initiating the streaming job, you can use another SQL statement to query the sink table to verify the results of your job. Here you can see the top 10 records which will be refreshed every 3 seconds.</li>
</ul>
<center>
<img src="/img/blog/2020-06-15-flink-on-zeppelin/preview.png" width="80%" alt="Preview" />
</center>
<h1 id="summary">Summary</h1>
<p>In this post, we explained how the redesigned Flink interpreter works in Zeppelin 0.9.0 and provided some examples for performing streaming ETL jobs with
Flink and Zeppelin. In the next post, I will talk about how to do streaming data visualization via Flink on Zeppelin.
Besides that, you can find an additional <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-2-batch-711731df5ad9">tutorial for batch processing with Flink on Zeppelin</a> as well as using Flink on Zeppelin for
more advance operations like resource isolation, job concurrency &amp; parallelism, multiple Hadoop &amp; Hive environments and more on our series of posts on Medium.
And here’s a list of <a href="https://www.youtube.com/watch?v=YxPo0Fosjjg&amp;list=PL4oy12nnS7FFtg3KV1iS5vDb0pTz12VcX">Flink on Zeppelin tutorial videos</a> for your reference.</p>
<h1 id="references">References</h1>
<ul>
<li><a href="http://zeppelin.apache.org">Apache Zeppelin official website</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-1-get-started-2591aaa6aa47">Part 1</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-2-batch-711731df5ad9">Part 2</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-3-streaming-5fca1e16754">Part 3</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-4-advanced-usage-998b74908cd9">Part 4</a></li>
<li><a href="https://www.youtube.com/watch?v=YxPo0Fosjjg&amp;list=PL4oy12nnS7FFtg3KV1iS5vDb0pTz12VcX">Flink on Zeppelin tutorial videos</a></li>
</ul>
</article>
</div>
<div class="row">
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
</div>
</div>
</div>
</div>
</div>
<hr />
<div class="row">
<div class="footer text-center col-sm-12">
<p>Copyright © 2014-2019 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div>
</div><!-- /.container -->
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.matchHeight/0.7.0/jquery.matchHeight-min.js"></script>
<script src="/js/codetabs.js"></script>
<script src="/js/stickysidebar.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>