blob: e0dfad45c9ba7a84900588a9182df64b36fe25b3 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir=ZgotmplZ>
<head>
<link rel="stylesheet" href="/bootstrap/css/bootstrap.min.css">
<script src="/bootstrap/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" type="text/css" href="/font-awesome/css/font-awesome.min.css">
<script src="/js/anchor.min.js"></script>
<script src="/js/flink.js"></script>
<link rel="canonical" href="https://flink.apache.org/2020/06/23/flink-on-zeppelin-notebooks-for-interactive-data-analysis-part-2/">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="In a previous post, we introduced the basics of Flink on Zeppelin and how to do Streaming ETL. In this second part of the &ldquo;Flink on Zeppelin&rdquo; series of posts, I will share how to perform streaming data visualization via Flink on Zeppelin and how to use Apache Flink UDFs in Zeppelin.
Streaming Data Visualization # With Zeppelin, you can build a real time streaming dashboard without writing any line of javascript/html/css code.">
<meta name="theme-color" content="#FFFFFF"><meta property="og:title" content="Flink on Zeppelin Notebooks for Interactive Data Analysis - Part 2" />
<meta property="og:description" content="In a previous post, we introduced the basics of Flink on Zeppelin and how to do Streaming ETL. In this second part of the &ldquo;Flink on Zeppelin&rdquo; series of posts, I will share how to perform streaming data visualization via Flink on Zeppelin and how to use Apache Flink UDFs in Zeppelin.
Streaming Data Visualization # With Zeppelin, you can build a real time streaming dashboard without writing any line of javascript/html/css code." />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://flink.apache.org/2020/06/23/flink-on-zeppelin-notebooks-for-interactive-data-analysis-part-2/" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2020-06-23T08:00:00+00:00" />
<meta property="article:modified_time" content="2020-06-23T08:00:00+00:00" />
<title>Flink on Zeppelin Notebooks for Interactive Data Analysis - Part 2 | Apache Flink</title>
<link rel="manifest" href="/manifest.json">
<link rel="icon" href="/favicon.png" type="image/x-icon">
<link rel="stylesheet" href="/book.min.22eceb4d17baa9cdc0f57345edd6f215a40474022dfee39b63befb5fb3c596b5.css" integrity="sha256-IuzrTRe6qc3A9XNF7dbyFaQEdAIt/uObY777X7PFlrU=">
<script defer src="/en.search.min.2698f0d1b683dae4d6cb071668b310a55ebcf1c48d11410a015a51d90105b53e.js" integrity="sha256-Jpjw0baD2uTWywcWaLMQpV688cSNEUEKAVpR2QEFtT4="></script>
<!--
Made with Book Theme
https://github.com/alex-shpak/hugo-book
-->
<meta name="generator" content="Hugo 0.124.1">
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(["setDomains", ["*.flink.apache.org","*.nightlies.apache.org/flink"]]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '1']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
<body dir=ZgotmplZ>
<header>
<nav class="navbar navbar-expand-xl">
<div class="container-fluid">
<a class="navbar-brand" href="/">
<img src="/img/logo/png/100/flink_squirrel_100_color.png" alt="Apache Flink" height="47" width="47" class="d-inline-block align-text-middle">
<span>Apache Flink</span>
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<i class="fa fa-bars navbar-toggler-icon"></i>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">About</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/what-is-flink/flink-architecture/">Architecture</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-applications/">Applications</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-operations/">Operations</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/use-cases/">Use Cases</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/powered-by/">Powered By</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/roadmap/">Roadmap</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/community/">Community & Project Info</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/security/">Security</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/special-thanks/">Special Thanks</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Getting Started</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/try-flink/local_installation/">With Flink<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/docs/try-flink-kubernetes-operator/quick-start/">With Flink Kubernetes Operator<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable/docs/get-started/introduction/">With Flink CDC<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/docs/try-flink-ml/quick-start/">With Flink ML<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/getting-started/project-setup.html">With Flink Stateful Functions<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/learn-flink/overview/">Training Course<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Documentation</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/">Flink 1.19 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-master/">Flink Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/">Kubernetes Operator 1.8 (latest)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main">Kubernetes Operator Main (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable">CDC 3.0 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-master">CDC Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/">ML 2.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-master">ML Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/">Stateful Functions 3.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-master">Stateful Functions Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">How to Contribute</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/how-to-contribute/overview/">Overview</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-code/">Contribute Code</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/reviewing-prs/">Review Pull Requests</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/code-style-and-quality-preamble/">Code Style and Quality Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-documentation/">Contribute Documentation</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/documentation-style-guide/">Documentation Style Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/improve-website/">Contribute to the Website</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/getting-help/">Getting Help</a>
</li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link" href="/posts/">Flink Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/downloads/">Downloads</a>
</li>
</ul>
<div class="book-search">
<div class="book-search-spinner hidden">
<i class="fa fa-refresh fa-spin"></i>
</div>
<form class="search-bar d-flex" onsubmit="return false;"su>
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/">
<i class="fa fa-search search"></i>
<i class="fa fa-circle-o-notch fa-spin spinner"></i>
</form>
<div class="book-search-spinner hidden"></div>
<ul id="book-search-results"></ul>
</div>
</div>
</div>
</nav>
<div class="navbar-clearfix"></div>
</header>
<main class="flex">
<section class="container book-page">
<article class="markdown">
<h1>
<a href="/2020/06/23/flink-on-zeppelin-notebooks-for-interactive-data-analysis-part-2/">Flink on Zeppelin Notebooks for Interactive Data Analysis - Part 2</a>
</h1>
June 23, 2020 -
Jeff Zhang
<a href="https://twitter.com/zjffdu">(@zjffdu)</a>
<p><p>In a previous post, we introduced the basics of Flink on Zeppelin and how to do Streaming ETL. In this second part of the &ldquo;Flink on Zeppelin&rdquo; series of posts, I will share how to
perform streaming data visualization via Flink on Zeppelin and how to use Apache Flink UDFs in Zeppelin.</p>
<h1 id="streaming-data-visualization">
Streaming Data Visualization
<a class="anchor" href="#streaming-data-visualization">#</a>
</h1>
<p>With <a href="https://zeppelin.apache.org/">Zeppelin</a>, you can build a real time streaming dashboard without writing any line of javascript/html/css code.</p>
<p>Overall, Zeppelin supports 3 kinds of streaming data analytics:</p>
<ul>
<li>Single Mode</li>
<li>Update Mode</li>
<li>Append Mode</li>
</ul>
<h3 id="single-mode">
Single Mode
<a class="anchor" href="#single-mode">#</a>
</h3>
<p>Single mode is used for cases when the result of a SQL statement is always one row, such as the following example.
The output format is translated in HTML, and you can specify a paragraph local property template for the final output content template.
And you can use <code>{i}</code> as placeholder for the {i}th column of the result.</p>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_single_mode.gif" width="80%" alt="Single Mode"/>
</center>
<h3 id="update-mode">
Update Mode
<a class="anchor" href="#update-mode">#</a>
</h3>
<p>Update mode is suitable for the cases when the output format is more than one row,
and will always be continuously updated. Here’s one example where we use <code>GROUP BY</code>.</p>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_update_mode.gif" width="80%" alt="Update Mode"/>
</center>
<h3 id="append-mode">
Append Mode
<a class="anchor" href="#append-mode">#</a>
</h3>
<p>Append mode is suitable for the cases when the output data is always appended.
For instance, the example below uses a tumble window.</p>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_append_mode.gif" width="80%" alt="Append Mode"/>
</center>
<h1 id="udf">
UDF
<a class="anchor" href="#udf">#</a>
</h1>
<p>SQL is a very powerful language, especially in expressing data flow. But most of the time, you need to handle complicated business logic that cannot be expressed by SQL.
In these cases UDFs (user-defined functions) come particularly handy. In Zeppelin, you can write Scala or Python UDFs, while you can also import Scala, Python and Java UDFs.
Here are 2 examples of Scala and Python UDFs:</p>
<ul>
<li>Scala UDF</li>
</ul>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-scala" data-lang="scala"><span class="line"><span class="cl"><span class="o">%</span><span class="n">flink</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">class</span> <span class="nc">ScalaUpper</span> <span class="k">extends</span> <span class="nc">ScalarFunction</span> <span class="o">{</span>
</span></span><span class="line"><span class="cl"><span class="k">def</span> <span class="n">eval</span><span class="o">(</span><span class="n">str</span><span class="k">:</span> <span class="kt">String</span><span class="o">)</span> <span class="k">=</span> <span class="n">str</span><span class="o">.</span><span class="n">toUpperCase</span>
</span></span><span class="line"><span class="cl"><span class="o">}</span>
</span></span><span class="line"><span class="cl"><span class="n">btenv</span><span class="o">.</span><span class="n">registerFunction</span><span class="o">(</span><span class="s">&#34;scala_upper&#34;</span><span class="o">,</span> <span class="k">new</span> <span class="nc">ScalaUpper</span><span class="o">())</span>
</span></span></code></pre></div><ul>
<li>Python UDF</li>
</ul>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="o">%</span><span class="n">flink</span><span class="o">.</span><span class="n">pyflink</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="k">class</span> <span class="nc">PythonUpper</span><span class="p">(</span><span class="n">ScalarFunction</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"><span class="k">def</span> <span class="nf">eval</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">s</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="n">bt_env</span><span class="o">.</span><span class="n">register_function</span><span class="p">(</span><span class="s2">&#34;python_upper&#34;</span><span class="p">,</span> <span class="n">udf</span><span class="p">(</span><span class="n">PythonUpper</span><span class="p">(),</span> <span class="n">DataTypes</span><span class="o">.</span><span class="n">STRING</span><span class="p">(),</span> <span class="n">DataTypes</span><span class="o">.</span><span class="n">STRING</span><span class="p">()))</span>
</span></span></code></pre></div><p>After you define the UDFs, you can use them directly in SQL:</p>
<ul>
<li>Use Scala UDF in SQL</li>
</ul>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_scala_udf.png" width="100%" alt="Scala UDF"/>
</center>
<ul>
<li>Use Python UDF in SQL</li>
</ul>
<center>
<img src="/img/blog/2020-06-23-flink-on-zeppelin-part2/flink_python_udf.png" width="100%" alt="Python UDF"/>
</center>
<h1 id="summary">
Summary
<a class="anchor" href="#summary">#</a>
</h1>
<p>In this post, we explained how to perform streaming data visualization via Flink on Zeppelin and how to use UDFs.
Besides that, you can do more in Zeppelin with Flink, such as batch processing, Hive integration and more.
You can check the following articles for more details and here&rsquo;s a list of <a href="https://www.youtube.com/watch?v=YxPo0Fosjjg&amp;list=PL4oy12nnS7FFtg3KV1iS5vDb0pTz12VcX">Flink on Zeppelin tutorial videos</a> for your reference.</p>
<h1 id="references">
References
<a class="anchor" href="#references">#</a>
</h1>
<ul>
<li><a href="http://zeppelin.apache.org">Apache Zeppelin official website</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-1-get-started-2591aaa6aa47">Part 1</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-2-batch-711731df5ad9">Part 2</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-3-streaming-5fca1e16754">Part 3</a></li>
<li>Flink on Zeppelin tutorials - <a href="https://medium.com/@zjffdu/flink-on-zeppelin-part-4-advanced-usage-998b74908cd9">Part 4</a></li>
<li><a href="https://www.youtube.com/watch?v=YxPo0Fosjjg&amp;list=PL4oy12nnS7FFtg3KV1iS5vDb0pTz12VcX">Flink on Zeppelin tutorial videos</a></li>
</ul>
</p>
</article>
<div class="edit-this-page">
<p>
<a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Translation+Specifications">Want to contribute translation?</a>
</p>
<p>
<a href="//github.com/apache/flink-web/edit/asf-site/docs/content/posts/2020-06-23-flink-on-zeppelin-part2.md">
Edit This Page<i class="fa fa-edit fa-fw"></i>
</a>
</p>
</div>
</section>
<aside class="book-toc">
<nav id="TableOfContents"><h3>On This Page <a href="javascript:void(0)" class="toc" onclick="collapseToc()"><i class="fa fa-times" aria-hidden="true"></i></a></h3>
<ul>
<li><a href="#streaming-data-visualization">Streaming Data Visualization</a>
<ul>
<li>
<ul>
<li><a href="#single-mode">Single Mode</a></li>
<li><a href="#update-mode">Update Mode</a></li>
<li><a href="#append-mode">Append Mode</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#udf">UDF</a></li>
<li><a href="#summary">Summary</a></li>
<li><a href="#references">References</a></li>
</ul>
</nav>
</aside>
<aside class="expand-toc hidden">
<a class="toc" onclick="expandToc()" href="javascript:void(0)">
<i class="fa fa-bars" aria-hidden="true"></i>
</a>
</aside>
</main>
<footer>
<div class="separator"></div>
<div class="panels">
<div class="wrapper">
<div class="panel">
<ul>
<li>
<a href="https://flink-packages.org/">flink-packages.org</a>
</li>
<li>
<a href="https://www.apache.org/">Apache Software Foundation</a>
</li>
<li>
<a href="https://www.apache.org/licenses/">License</a>
</li>
<li>
<a href="/zh/">
<i class="fa fa-globe" aria-hidden="true"></i>&nbsp;中文版
</a>
</li>
</ul>
</div>
<div class="panel">
<ul>
<li>
<a href="/what-is-flink/security">Security</a-->
</li>
<li>
<a href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
</li>
<li>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</li>
</ul>
</div>
<div class="panel icons">
<div>
<a href="/posts">
<div class="icon flink-blog-icon"></div>
<span>Flink blog</span>
</a>
</div>
<div>
<a href="https://github.com/apache/flink">
<div class="icon flink-github-icon"></div>
<span>Github</span>
</a>
</div>
<div>
<a href="https://twitter.com/apacheflink">
<div class="icon flink-twitter-icon"></div>
<span>Twitter</span>
</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="container disclaimer">
<p>The contents of this website are © 2024 Apache Software Foundation under the terms of the Apache License v2. Apache Flink, Flink, and the Flink logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</footer>
</body>
</html>