blob: 5192481ca492ff6b93b3659ac8937c280aeedc92 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir=ZgotmplZ>
<head>
<link rel="stylesheet" href="/bootstrap/css/bootstrap.min.css">
<script src="/bootstrap/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" type="text/css" href="/font-awesome/css/font-awesome.min.css">
<script src="/js/anchor.min.js"></script>
<script src="/js/flink.js"></script>
<link rel="canonical" href="https://flink.apache.org/2022/07/12/apache-flink-ml-2.1.0-release-announcement/">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="The Apache Flink community is excited to announce the release of Flink ML 2.1.0! This release focuses on improving Flink ML&rsquo;s infrastructure, such as Python SDK, memory management, and benchmark framework, to facilitate the development of performant, memory-safe, and easy-to-use algorithm libraries. We validated the enhanced infrastructure by implementing, benchmarking, and optimizing 10 new algorithms in Flink ML, and confirmed that Flink ML can meet or exceed the performance of selected algorithms from alternative popular ML libraries.">
<meta name="theme-color" content="#FFFFFF"><meta property="og:title" content="Apache Flink ML 2.1.0 Release Announcement" />
<meta property="og:description" content="The Apache Flink community is excited to announce the release of Flink ML 2.1.0! This release focuses on improving Flink ML&rsquo;s infrastructure, such as Python SDK, memory management, and benchmark framework, to facilitate the development of performant, memory-safe, and easy-to-use algorithm libraries. We validated the enhanced infrastructure by implementing, benchmarking, and optimizing 10 new algorithms in Flink ML, and confirmed that Flink ML can meet or exceed the performance of selected algorithms from alternative popular ML libraries." />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://flink.apache.org/2022/07/12/apache-flink-ml-2.1.0-release-announcement/" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2022-07-12T08:00:00+00:00" />
<meta property="article:modified_time" content="2022-07-12T08:00:00+00:00" />
<title>Apache Flink ML 2.1.0 Release Announcement | Apache Flink</title>
<link rel="manifest" href="/manifest.json">
<link rel="icon" href="/favicon.png" type="image/x-icon">
<link rel="stylesheet" href="/book.min.22eceb4d17baa9cdc0f57345edd6f215a40474022dfee39b63befb5fb3c596b5.css" integrity="sha256-IuzrTRe6qc3A9XNF7dbyFaQEdAIt/uObY777X7PFlrU=">
<script defer src="/en.search.min.baf635ab0e127f80152dd1da4b524a5dea67cb9cc0feb21710b5188ada9c15c1.js" integrity="sha256-uvY1qw4Sf4AVLdHaS1JKXepny5zA/rIXELUYitqcFcE="></script>
<!--
Made with Book Theme
https://github.com/alex-shpak/hugo-book
-->
<meta name="generator" content="Hugo 0.124.1">
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(["setDomains", ["*.flink.apache.org","*.nightlies.apache.org/flink"]]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '1']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
<body dir=ZgotmplZ>
<header>
<nav class="navbar navbar-expand-xl">
<div class="container-fluid">
<a class="navbar-brand" href="/">
<img src="/img/logo/png/100/flink_squirrel_100_color.png" alt="Apache Flink" height="47" width="47" class="d-inline-block align-text-middle">
<span>Apache Flink</span>
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<i class="fa fa-bars navbar-toggler-icon"></i>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">About</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/what-is-flink/flink-architecture/">Architecture</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-applications/">Applications</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-operations/">Operations</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/use-cases/">Use Cases</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/powered-by/">Powered By</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/roadmap/">Roadmap</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/community/">Community & Project Info</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/security/">Security</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/special-thanks/">Special Thanks</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Getting Started</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/try-flink/local_installation/">With Flink<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/docs/try-flink-kubernetes-operator/quick-start/">With Flink Kubernetes Operator<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable/docs/get-started/introduction/">With Flink CDC<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/docs/try-flink-ml/quick-start/">With Flink ML<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/getting-started/project-setup.html">With Flink Stateful Functions<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/learn-flink/overview/">Training Course<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Documentation</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/">Flink 1.19 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-master/">Flink Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/">Kubernetes Operator 1.8 (latest)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main">Kubernetes Operator Main (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable">CDC 3.0 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-master">CDC Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/">ML 2.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-master">ML Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/">Stateful Functions 3.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-master">Stateful Functions Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">How to Contribute</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/how-to-contribute/overview/">Overview</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-code/">Contribute Code</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/reviewing-prs/">Review Pull Requests</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/code-style-and-quality-preamble/">Code Style and Quality Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-documentation/">Contribute Documentation</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/documentation-style-guide/">Documentation Style Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/improve-website/">Contribute to the Website</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/getting-help/">Getting Help</a>
</li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link" href="/posts/">Flink Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/downloads/">Downloads</a>
</li>
</ul>
<div class="book-search">
<div class="book-search-spinner hidden">
<i class="fa fa-refresh fa-spin"></i>
</div>
<form class="search-bar d-flex" onsubmit="return false;"su>
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/">
<i class="fa fa-search search"></i>
<i class="fa fa-circle-o-notch fa-spin spinner"></i>
</form>
<div class="book-search-spinner hidden"></div>
<ul id="book-search-results"></ul>
</div>
</div>
</div>
</nav>
<div class="navbar-clearfix"></div>
</header>
<main class="flex">
<section class="container book-page">
<article class="markdown">
<h1>
<a href="/2022/07/12/apache-flink-ml-2.1.0-release-announcement/">Apache Flink ML 2.1.0 Release Announcement</a>
</h1>
July 12, 2022 -
Zhipeng Zhang
Dong Lin
<p><p>The Apache Flink community is excited to announce the release of Flink ML 2.1.0!
This release focuses on improving Flink ML&rsquo;s infrastructure, such as Python SDK,
memory management, and benchmark framework, to facilitate the development of
performant, memory-safe, and easy-to-use algorithm libraries. We validated the
enhanced infrastructure by implementing, benchmarking, and optimizing 10 new
algorithms in Flink ML, and confirmed that Flink ML can meet or exceed the
performance of selected algorithms from alternative popular ML libraries.
In addition, this release added example Python and Java programs for each
algorithm in the library to help users learn and use Flink ML.</p>
<p>With the improvements and performance benchmarks made in this release, we
believe Flink ML&rsquo;s infrastructure is ready for use by the interested developers
in the community to build performant pythonic machine learning libraries.</p>
<p>We encourage you to <a href="https://flink.apache.org/downloads.html">download the release</a>
and share your feedback with the community through the Flink
<a href="https://flink.apache.org/community.html#mailing-lists">mailing lists</a> or
<a href="https://issues.apache.org/jira/browse/flink">JIRA</a>! We hope you like the new
release and we’d be eager to learn about your experience with it.</p>
<h1 id="notable-features">
Notable Features
<a class="anchor" href="#notable-features">#</a>
</h1>
<h2 id="api-and-infrastructure">
API and Infrastructure
<a class="anchor" href="#api-and-infrastructure">#</a>
</h2>
<h3 id="supporting-fine-grained-per-operator-memory-management">
Supporting fine-grained per-operator memory management
<a class="anchor" href="#supporting-fine-grained-per-operator-memory-management">#</a>
</h3>
<p>Before this release, algorithm operators with internal states (e.g. the training
data to be replayed for each round of iteration) store state data using the
state-backend API (e.g. ListState). Such an operator either needs to store all
data in memory, which risks OOM, or it needs to always store data on disk.
In the latter case, it needs to read and de-serialize all data from disks
repeatedly in each round of iteration even if the data can fit in RAM, leading
to sub-optimal performance when the training data size is small. This makes it
hard for developers to write performant and memory-safe operators.</p>
<p>This release enhances the Flink ML infrastructure with the mechanism to specify
the amount of managed memory that an operator can consume. This allows algorithm
operators to write and read data from managed memory when the data size is below
the quota, and automatically spill those data that exceeds the memory quota to
disks to avoid OOM. Algorithm developers can use this mechanism to achieve
optimal algorithm performance as input data size varies. Please feel free to
check out the implementation of the KMeans operator for example.</p>
<h3 id="improved-infrastructure-for-developing-online-learning-algorithms">
Improved infrastructure for developing online learning algorithms
<a class="anchor" href="#improved-infrastructure-for-developing-online-learning-algorithms">#</a>
</h3>
<p>A key objective of Flink ML is to facilitate the development of online learning
applications. In the last release, we enhanced the Flink ML API with
setModelData() and getModelData(), which allows users of online learning
algorithms to transmit and persist model data as unbounded data streams.
This release continues the effort by improving and validating the infrastructure
needed to develop online learning algorithms.</p>
<p>Specifically, this release added two online learning algorithm prototypes (i.e.
OnlineKMeans and OnlineLogisticRegression) with tests covering the entire
lifecycle of using these algorithms. These two algorithms introduce concepts
such as global batch size and model version, together with metrics and APIs to
set and get those values. While the online algorithm prototypes have not been
optimized for prediction accuracy yet, this line of work is an important step
toward setting up best practices for building online learning algorithms in
Flink ML. We hope more contributors from the community can join this effort.</p>
<h3 id="algorithm-benchmark-framework">
Algorithm benchmark framework
<a class="anchor" href="#algorithm-benchmark-framework">#</a>
</h3>
<p>An easy-to-use benchmark framework is critical to developing and maintaining
performant algorithm libraries in Flink ML. This release added a benchmark
framework that provides APIs to write pluggable and reusable data generators,
takes benchmark configuration in JSON format, and outputs benchmark results in
JSON format to enable custom analysis. An off-the-shelf script is provided to
visualize benchmark results using Matplotlib. Feel free to check out this
<a href="https://github.com/apache/flink-ml/blob/release-2.1/flink-ml-benchmark/README.md">README</a>
for instructions on how to use this benchmark framework.</p>
<p>The benchmark framework currently supports evaluating algorithm throughput.
In the future release, we plan to support evaluating algorithm latency and
accuracy.</p>
<h2 id="python-sdk">
Python SDK
<a class="anchor" href="#python-sdk">#</a>
</h2>
<p>This release enhances the Python SDK so that operators in the Flink ML Python
library can invoke the corresponding operators in the Java library. The Python
operator is a thin-wrapper around the Java operator and delivers the same
performance as the Java operator during execution. This capability significantly
improves developer velocity by allowing algorithm developers to maintain both
the Python and the Java libraries of algorithms without having to implement
those algorithms twice.</p>
<h2 id="algorithm-library">
Algorithm Library
<a class="anchor" href="#algorithm-library">#</a>
</h2>
<p>This release continues to extend the algorithm library in Flink ML, with the
focus on validating the functionalities and the performance of Flink ML
infrastructure using representative algorithms in different categories.</p>
<p>Below are the lists of algorithms newly supported in this release, grouped by
their categories:</p>
<ul>
<li>Feature engineering (MinMaxScaler, StringIndexer, VectorAssembler,
StandardScaler, Bucketizer)</li>
<li>Online learning (OnlineKmeans, OnlineLogisiticRegression)</li>
<li>Regression (LinearRegression)</li>
<li>Classification (LinearSVC)</li>
<li>Evaluation (BinaryClassificationEvaluator)</li>
</ul>
<p>Example Python and Java programs for these algorithms are provided on the Apache
Flink ML <a href="https://nightlies.apache.org/flink/flink-ml-docs-release-2.1/">website</a> to
help users learn and try out Flink ML. And we also provided example benchmark
<a href="https://github.com/apache/flink-ml/tree/release-2.1/flink-ml-benchmark/src/main/resources">configuration files</a>
in the repo for users to validate Flink ML performance. Feel free to check out
this <a href="https://github.com/apache/flink-ml/blob/release-2.1/flink-ml-benchmark/README.md">README</a>
for instructions on how to run those benchmarks.</p>
<h1 id="upgrade-notes">
Upgrade Notes
<a class="anchor" href="#upgrade-notes">#</a>
</h1>
<p>Please review this note for a list of adjustments to make and issues to check
if you plan to upgrade to Flink ML 2.1.0.</p>
<p>This note discusses any critical information about incompatibilities and
breaking changes, performance changes, and any other changes that might impact
your production deployment of Flink ML.</p>
<ul>
<li>
<p><strong>Flink dependency is changed from 1.14 to 1.15</strong>.</p>
<p>This change introduces all the breaking changes listed in the Flink 1.15
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.15/release-notes/flink-1.15/">release notes</a>.</p>
</li>
</ul>
<h1 id="release-notes-and-resources">
Release Notes and Resources
<a class="anchor" href="#release-notes-and-resources">#</a>
</h1>
<p>Please take a look at the <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315522&amp;version=12351141">release notes</a>
for a detailed list of changes and new features.</p>
<p>The source artifacts is now available on the updated
<a href="https://flink.apache.org/downloads.html">Downloads page</a> of the Flink website,
and the most recent distribution of Flink ML Python package is available on
<a href="https://pypi.org/project/apache-flink-ml">PyPI</a>.</p>
<h1 id="list-of-contributors">
List of Contributors
<a class="anchor" href="#list-of-contributors">#</a>
</h1>
<p>The Apache Flink community would like to thank each one of the contributors
that have made this release possible:</p>
<p>Yunfeng Zhou, Zhipeng Zhang, huangxingbo, weibo, Dong Lin, Yun Gao, Jingsong Li
and mumuhhh.</p>
</p>
</article>
<div class="edit-this-page">
<p>
<a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Translation+Specifications">Want to contribute translation?</a>
</p>
<p>
<a href="//github.com/apache/flink-web/edit/asf-site/docs/content/posts/2022-07-12-release-ml-2.1.0.md">
Edit This Page<i class="fa fa-edit fa-fw"></i>
</a>
</p>
</div>
</section>
<aside class="book-toc">
<nav id="TableOfContents"><h3>On This Page <a href="javascript:void(0)" class="toc" onclick="collapseToc()"><i class="fa fa-times" aria-hidden="true"></i></a></h3>
<ul>
<li><a href="#notable-features">Notable Features</a>
<ul>
<li><a href="#api-and-infrastructure">API and Infrastructure</a>
<ul>
<li><a href="#supporting-fine-grained-per-operator-memory-management">Supporting fine-grained per-operator memory management</a></li>
<li><a href="#improved-infrastructure-for-developing-online-learning-algorithms">Improved infrastructure for developing online learning algorithms</a></li>
<li><a href="#algorithm-benchmark-framework">Algorithm benchmark framework</a></li>
</ul>
</li>
<li><a href="#python-sdk">Python SDK</a></li>
<li><a href="#algorithm-library">Algorithm Library</a></li>
</ul>
</li>
<li><a href="#upgrade-notes">Upgrade Notes</a></li>
<li><a href="#release-notes-and-resources">Release Notes and Resources</a></li>
<li><a href="#list-of-contributors">List of Contributors</a></li>
</ul>
</nav>
</aside>
<aside class="expand-toc hidden">
<a class="toc" onclick="expandToc()" href="javascript:void(0)">
<i class="fa fa-bars" aria-hidden="true"></i>
</a>
</aside>
</main>
<footer>
<div class="separator"></div>
<div class="panels">
<div class="wrapper">
<div class="panel">
<ul>
<li>
<a href="https://flink-packages.org/">flink-packages.org</a>
</li>
<li>
<a href="https://www.apache.org/">Apache Software Foundation</a>
</li>
<li>
<a href="https://www.apache.org/licenses/">License</a>
</li>
<li>
<a href="/zh/">
<i class="fa fa-globe" aria-hidden="true"></i>&nbsp;中文版
</a>
</li>
</ul>
</div>
<div class="panel">
<ul>
<li>
<a href="https://www.apache.org/security/">Security</a>
</li>
<li>
<a href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
</li>
<li>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</li>
</ul>
</div>
<div class="panel icons">
<div>
<a href="/posts">
<div class="icon flink-blog-icon"></div>
<span>Flink blog</span>
</a>
</div>
<div>
<a href="https://github.com/apache/flink">
<div class="icon flink-github-icon"></div>
<span>Github</span>
</a>
</div>
<div>
<a href="https://twitter.com/apacheflink">
<div class="icon flink-twitter-icon"></div>
<span>Twitter</span>
</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="container disclaimer">
<p>The contents of this website are © 2024 Apache Software Foundation under the terms of the Apache License v2. Apache Flink, Flink, and the Flink logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</footer>
</body>
</html>