blob: b5e90679e093fb1f4c4133b12874ec58d019ea85 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir=ZgotmplZ>
<head>
<link rel="stylesheet" href="/bootstrap/css/bootstrap.min.css">
<script src="/bootstrap/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" type="text/css" href="/font-awesome/css/font-awesome.min.css">
<script src="/js/anchor.min.js"></script>
<script src="/js/flink.js"></script>
<link rel="canonical" href="https://flink.apache.org/2023/04/19/apache-flink-ml-2.2.0-release-announcement/">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="The Apache Flink community is excited to announce the release of Flink ML 2.2.0! This release focuses on enriching Flink ML&rsquo;s feature engineering algorithms. The library now includes 33 feature engineering algorithms, making it a more comprehensive library for feature engineering tasks.
With the addition of these algorithms, we believe Flink ML library is ready for use in production jobs that require feature engineering capabilities, whose input can then be consumed by both offline and online machine learning tasks.">
<meta name="theme-color" content="#FFFFFF"><meta property="og:title" content="Apache Flink ML 2.2.0 Release Announcement" />
<meta property="og:description" content="The Apache Flink community is excited to announce the release of Flink ML 2.2.0! This release focuses on enriching Flink ML&rsquo;s feature engineering algorithms. The library now includes 33 feature engineering algorithms, making it a more comprehensive library for feature engineering tasks.
With the addition of these algorithms, we believe Flink ML library is ready for use in production jobs that require feature engineering capabilities, whose input can then be consumed by both offline and online machine learning tasks." />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://flink.apache.org/2023/04/19/apache-flink-ml-2.2.0-release-announcement/" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2023-04-19T08:00:00+00:00" />
<meta property="article:modified_time" content="2023-04-19T08:00:00+00:00" />
<title>Apache Flink ML 2.2.0 Release Announcement | Apache Flink</title>
<link rel="manifest" href="/manifest.json">
<link rel="icon" href="/favicon.png" type="image/x-icon">
<link rel="stylesheet" href="/book.min.22eceb4d17baa9cdc0f57345edd6f215a40474022dfee39b63befb5fb3c596b5.css" integrity="sha256-IuzrTRe6qc3A9XNF7dbyFaQEdAIt/uObY777X7PFlrU=">
<script defer src="/en.search.min.2698f0d1b683dae4d6cb071668b310a55ebcf1c48d11410a015a51d90105b53e.js" integrity="sha256-Jpjw0baD2uTWywcWaLMQpV688cSNEUEKAVpR2QEFtT4="></script>
<!--
Made with Book Theme
https://github.com/alex-shpak/hugo-book
-->
<meta name="generator" content="Hugo 0.124.1">
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(["setDomains", ["*.flink.apache.org","*.nightlies.apache.org/flink"]]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '1']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
<body dir=ZgotmplZ>
<header>
<nav class="navbar navbar-expand-xl">
<div class="container-fluid">
<a class="navbar-brand" href="/">
<img src="/img/logo/png/100/flink_squirrel_100_color.png" alt="Apache Flink" height="47" width="47" class="d-inline-block align-text-middle">
<span>Apache Flink</span>
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<i class="fa fa-bars navbar-toggler-icon"></i>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">About</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/what-is-flink/flink-architecture/">Architecture</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-applications/">Applications</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-operations/">Operations</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/use-cases/">Use Cases</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/powered-by/">Powered By</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/roadmap/">Roadmap</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/community/">Community & Project Info</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/security/">Security</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/special-thanks/">Special Thanks</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Getting Started</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/try-flink/local_installation/">With Flink<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/docs/try-flink-kubernetes-operator/quick-start/">With Flink Kubernetes Operator<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable/docs/get-started/introduction/">With Flink CDC<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/docs/try-flink-ml/quick-start/">With Flink ML<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/getting-started/project-setup.html">With Flink Stateful Functions<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/learn-flink/overview/">Training Course<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Documentation</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/">Flink 1.19 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-master/">Flink Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/">Kubernetes Operator 1.8 (latest)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main">Kubernetes Operator Main (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable">CDC 3.0 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-master">CDC Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/">ML 2.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-master">ML Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/">Stateful Functions 3.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-master">Stateful Functions Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">How to Contribute</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/how-to-contribute/overview/">Overview</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-code/">Contribute Code</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/reviewing-prs/">Review Pull Requests</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/code-style-and-quality-preamble/">Code Style and Quality Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-documentation/">Contribute Documentation</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/documentation-style-guide/">Documentation Style Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/improve-website/">Contribute to the Website</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/getting-help/">Getting Help</a>
</li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link" href="/posts/">Flink Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/downloads/">Downloads</a>
</li>
</ul>
<div class="book-search">
<div class="book-search-spinner hidden">
<i class="fa fa-refresh fa-spin"></i>
</div>
<form class="search-bar d-flex" onsubmit="return false;"su>
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/">
<i class="fa fa-search search"></i>
<i class="fa fa-circle-o-notch fa-spin spinner"></i>
</form>
<div class="book-search-spinner hidden"></div>
<ul id="book-search-results"></ul>
</div>
</div>
</div>
</nav>
<div class="navbar-clearfix"></div>
</header>
<main class="flex">
<section class="container book-page">
<article class="markdown">
<h1>
<a href="/2023/04/19/apache-flink-ml-2.2.0-release-announcement/">Apache Flink ML 2.2.0 Release Announcement</a>
</h1>
April 19, 2023 -
Dong Lin
<p><p>The Apache Flink community is excited to announce the release of Flink ML 2.2.0!
This release focuses on enriching Flink ML&rsquo;s feature engineering algorithms. The
library now includes 33 feature engineering algorithms, making it a more
comprehensive library for feature engineering tasks.</p>
<p>With the addition of these algorithms, we believe Flink ML library is ready for
use in production jobs that require feature engineering capabilities, whose
input can then be consumed by both offline and online machine learning tasks.</p>
<p>We encourage you to <a href="https://flink.apache.org/downloads.html">download the
release</a> and share your feedback with
the community through the Flink <a href="https://flink.apache.org/community.html#mailing-lists">mailing
lists</a> or
<a href="https://issues.apache.org/jira/browse/flink">JIRA</a>! We hope you like the new
release and we’d be eager to learn about your experience with it.</p>
<h1 id="notable-features">
Notable Features
<a class="anchor" href="#notable-features">#</a>
</h1>
<h2 id="introduced-api-and-infrastructure-for-online-serving">
Introduced API and infrastructure for online serving
<a class="anchor" href="#introduced-api-and-infrastructure-for-online-serving">#</a>
</h2>
<p>In machine learning, one of the main goals of model training is to deploy the
trained model to perform online inference, where the model server must respond
to incoming requests with millisecond-level latency. However, prior releases of
Flink ML only supported nearline inference using the Flink runtime, which may
not meet the requirements of online inference use-cases.</p>
<p>With
<a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=240881268">FLIP-289</a>,
Flink ML now provides an API and infrastructure for users to load a
ModelServable from model data generated by an Estimator. This ModelServable can
be replicated across multiple model servers to process online inference requests
in parallel. As the ModelServable is effectively a UDF that does not rely on
Flink runtime, it can also be integrated as a UDF into other serving or
processing frameworks to serve the model trained by Flink ML.</p>
<p>As a first step, the LogisticRegressionModelServable has been added to serve the
logistic regression model online, and more servables will be added in the
future. This new feature enables Flink ML to be used for both offline and online
machine learning tasks, making it more versatile for a wider range of use cases.</p>
<h2 id="added-27-feature-engineering-algorithms">
Added 27 feature engineering algorithms
<a class="anchor" href="#added-27-feature-engineering-algorithms">#</a>
</h2>
<p>Flink ML 2.2.0 significantly expanded the coverage of feature engineering
algorithms, increasing the number from 6 to 33. Flink ML now covers 28 out of
the 33 feature engineering algorithms provided in Spark ML, making it a more
comprehensive library for feature engineering tasks.</p>
<p>Feature engineering is a critical step in modern AI infrastructures as it can
preprocess data not only for traditional machine learning algorithms like GBT
but also for deep learning algorithms and large language models like
Transformer, which are increasingly popular. With the addition of these
algorithms, we hope Flink ML can be more useful in machine-learning tasks for
Flink users.</p>
<p>All feature engineering algorithms can be easily accessed through the drop-down
list on the left side of
<a href="https://nightlies.apache.org/flink/flink-ml-docs-master/docs/operators/feature/binarizer/">this</a>
Flink ML page. For each algorithm, we have provided Python and Java examples to
demonstrate how to use them.</p>
<h2 id="added-two-production-validated-online-learning-algorithms">
Added two production-validated online learning algorithms
<a class="anchor" href="#added-two-production-validated-online-learning-algorithms">#</a>
</h2>
<p>Flink ML offers a significant advantage over other machine learning libraries in
terms of its ability to perform online learning using Flink&rsquo;s streaming runtime.
To leverage this strength, we implemented two online algorithms in Flink ML and
successfully used them in a production machine learning job at Alibaba.</p>
<p>This job involves dynamically clustering similar logs and detecting errors in
the logs to help site reliability engineers. By using OnlineStandardScaler and
AgglomerativeClustering to standardize and cluster logs in real-time, the job is
able to update models more frequently with a much simpler infrastructure setup.
We presented this work at <a href="https://flink-forward.org.cn/">Flink Forward Asia</a>
last year, and it will soon be integrated into the open-source project
<a href="https://github.com/alibaba/SREWorks">SREWorks</a>.</p>
<p>With these online algorithms, Flink ML provides users with the ability to
continuously update models using new data in real-time, resulting in more
accurate and up-to-date predictions. This can be particularly useful in use
cases where data is constantly streaming in, and it&rsquo;s important to make quick
decisions based on the latest available information.</p>
<h1 id="upgrade-notes">
Upgrade Notes
<a class="anchor" href="#upgrade-notes">#</a>
</h1>
<p>This release is fully backward compatible with Flink ML 2.1. Users should be
able to upgrade to Flink ML 2.2.0 without worrying about any incompatibilities
or breaking changes.</p>
<h1 id="release-notes-and-resources">
Release Notes and Resources
<a class="anchor" href="#release-notes-and-resources">#</a>
</h1>
<p>Please take a look at the <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315522&amp;version=12351884">release
notes</a>
for a detailed list of changes and new features.</p>
<p>The binary distribution and source artifacts are now available on the updated
<a href="https://flink.apache.org/downloads.html">Downloads page</a> of the Flink website,
and the most recent distribution of Flink ML Python package is available on
<a href="https://pypi.org/project/apache-flink-ml">PyPI</a>.</p>
<h1 id="list-of-contributors">
List of Contributors
<a class="anchor" href="#list-of-contributors">#</a>
</h1>
<p>The Apache Flink community would like to thank each one of the contributors that
have made this release possible:</p>
<p>Zhipeng Zhang, Dong Lin, Fan Hong, JiangXin, Zsombor Chikan, huangxingbo,
taosiyuan163, vacaly, weibozhao, yunfengzhou-hub</p>
</p>
</article>
<div class="edit-this-page">
<p>
<a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Translation+Specifications">Want to contribute translation?</a>
</p>
<p>
<a href="//github.com/apache/flink-web/edit/asf-site/docs/content/posts/2023-04-19-release-ml-2.2.0.md">
Edit This Page<i class="fa fa-edit fa-fw"></i>
</a>
</p>
</div>
</section>
<aside class="book-toc">
<nav id="TableOfContents"><h3>On This Page <a href="javascript:void(0)" class="toc" onclick="collapseToc()"><i class="fa fa-times" aria-hidden="true"></i></a></h3>
<ul>
<li><a href="#notable-features">Notable Features</a>
<ul>
<li><a href="#introduced-api-and-infrastructure-for-online-serving">Introduced API and infrastructure for online serving</a></li>
<li><a href="#added-27-feature-engineering-algorithms">Added 27 feature engineering algorithms</a></li>
<li><a href="#added-two-production-validated-online-learning-algorithms">Added two production-validated online learning algorithms</a></li>
</ul>
</li>
<li><a href="#upgrade-notes">Upgrade Notes</a></li>
<li><a href="#release-notes-and-resources">Release Notes and Resources</a></li>
<li><a href="#list-of-contributors">List of Contributors</a></li>
</ul>
</nav>
</aside>
<aside class="expand-toc hidden">
<a class="toc" onclick="expandToc()" href="javascript:void(0)">
<i class="fa fa-bars" aria-hidden="true"></i>
</a>
</aside>
</main>
<footer>
<div class="separator"></div>
<div class="panels">
<div class="wrapper">
<div class="panel">
<ul>
<li>
<a href="https://flink-packages.org/">flink-packages.org</a>
</li>
<li>
<a href="https://www.apache.org/">Apache Software Foundation</a>
</li>
<li>
<a href="https://www.apache.org/licenses/">License</a>
</li>
<li>
<a href="/zh/">
<i class="fa fa-globe" aria-hidden="true"></i>&nbsp;中文版
</a>
</li>
</ul>
</div>
<div class="panel">
<ul>
<li>
<a href="/what-is-flink/security">Security</a-->
</li>
<li>
<a href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
</li>
<li>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</li>
</ul>
</div>
<div class="panel icons">
<div>
<a href="/posts">
<div class="icon flink-blog-icon"></div>
<span>Flink blog</span>
</a>
</div>
<div>
<a href="https://github.com/apache/flink">
<div class="icon flink-github-icon"></div>
<span>Github</span>
</a>
</div>
<div>
<a href="https://twitter.com/apacheflink">
<div class="icon flink-twitter-icon"></div>
<span>Twitter</span>
</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="container disclaimer">
<p>The contents of this website are © 2024 Apache Software Foundation under the terms of the Apache License v2. Apache Flink, Flink, and the Flink logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</footer>
</body>
</html>