blob: b86cf43bb6e2ce86c667f4c2de2ca1d52d9c0fa0 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir=ZgotmplZ>
<head>
<link rel="stylesheet" href="/bootstrap/css/bootstrap.min.css">
<script src="/bootstrap/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" type="text/css" href="/font-awesome/css/font-awesome.min.css">
<script src="/js/anchor.min.js"></script>
<script src="/js/flink.js"></script>
<link rel="canonical" href="https://flink.apache.org/2021/05/03/apache-flink-1.13.0-release-announcement/">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="The Apache Flink community is excited to announce the release of Flink 1.13.0! More than 200 contributors worked on over 1,000 issues for this new version.
The release brings us a big step forward in one of our major efforts: Making Stream Processing Applications as natural and as simple to manage as any other application. The new reactive scaling mode means that scaling streaming applications in and out now works like in any other application by just changing the number of parallel processes.">
<meta name="theme-color" content="#FFFFFF"><meta property="og:title" content="Apache Flink 1.13.0 Release Announcement" />
<meta property="og:description" content="The Apache Flink community is excited to announce the release of Flink 1.13.0! More than 200 contributors worked on over 1,000 issues for this new version.
The release brings us a big step forward in one of our major efforts: Making Stream Processing Applications as natural and as simple to manage as any other application. The new reactive scaling mode means that scaling streaming applications in and out now works like in any other application by just changing the number of parallel processes." />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://flink.apache.org/2021/05/03/apache-flink-1.13.0-release-announcement/" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2021-05-03T08:00:00+00:00" />
<meta property="article:modified_time" content="2021-05-03T08:00:00+00:00" />
<title>Apache Flink 1.13.0 Release Announcement | Apache Flink</title>
<link rel="manifest" href="/manifest.json">
<link rel="icon" href="/favicon.png" type="image/x-icon">
<link rel="stylesheet" href="/book.min.22eceb4d17baa9cdc0f57345edd6f215a40474022dfee39b63befb5fb3c596b5.css" integrity="sha256-IuzrTRe6qc3A9XNF7dbyFaQEdAIt/uObY777X7PFlrU=">
<script defer src="/en.search.min.2698f0d1b683dae4d6cb071668b310a55ebcf1c48d11410a015a51d90105b53e.js" integrity="sha256-Jpjw0baD2uTWywcWaLMQpV688cSNEUEKAVpR2QEFtT4="></script>
<!--
Made with Book Theme
https://github.com/alex-shpak/hugo-book
-->
<meta name="generator" content="Hugo 0.124.1">
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(["setDomains", ["*.flink.apache.org","*.nightlies.apache.org/flink"]]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '1']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
<body dir=ZgotmplZ>
<header>
<nav class="navbar navbar-expand-xl">
<div class="container-fluid">
<a class="navbar-brand" href="/">
<img src="/img/logo/png/100/flink_squirrel_100_color.png" alt="Apache Flink" height="47" width="47" class="d-inline-block align-text-middle">
<span>Apache Flink</span>
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<i class="fa fa-bars navbar-toggler-icon"></i>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">About</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/what-is-flink/flink-architecture/">Architecture</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-applications/">Applications</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-operations/">Operations</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/use-cases/">Use Cases</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/powered-by/">Powered By</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/roadmap/">Roadmap</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/community/">Community & Project Info</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/security/">Security</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/special-thanks/">Special Thanks</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Getting Started</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/try-flink/local_installation/">With Flink<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/docs/try-flink-kubernetes-operator/quick-start/">With Flink Kubernetes Operator<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable/docs/get-started/introduction/">With Flink CDC<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/docs/try-flink-ml/quick-start/">With Flink ML<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/getting-started/project-setup.html">With Flink Stateful Functions<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/learn-flink/overview/">Training Course<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Documentation</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/">Flink 1.19 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-master/">Flink Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/">Kubernetes Operator 1.8 (latest)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main">Kubernetes Operator Main (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable">CDC 3.0 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-master">CDC Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/">ML 2.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-master">ML Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/">Stateful Functions 3.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-master">Stateful Functions Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">How to Contribute</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/how-to-contribute/overview/">Overview</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-code/">Contribute Code</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/reviewing-prs/">Review Pull Requests</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/code-style-and-quality-preamble/">Code Style and Quality Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-documentation/">Contribute Documentation</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/documentation-style-guide/">Documentation Style Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/improve-website/">Contribute to the Website</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/getting-help/">Getting Help</a>
</li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link" href="/posts/">Flink Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/downloads/">Downloads</a>
</li>
</ul>
<div class="book-search">
<div class="book-search-spinner hidden">
<i class="fa fa-refresh fa-spin"></i>
</div>
<form class="search-bar d-flex" onsubmit="return false;"su>
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/">
<i class="fa fa-search search"></i>
<i class="fa fa-circle-o-notch fa-spin spinner"></i>
</form>
<div class="book-search-spinner hidden"></div>
<ul id="book-search-results"></ul>
</div>
</div>
</div>
</nav>
<div class="navbar-clearfix"></div>
</header>
<main class="flex">
<section class="container book-page">
<article class="markdown">
<h1>
<a href="/2021/05/03/apache-flink-1.13.0-release-announcement/">Apache Flink 1.13.0 Release Announcement</a>
</h1>
May 3, 2021 -
Stephan Ewen
<a href="https://twitter.com/StephanEwen">(@StephanEwen)</a>
Dawid Wysakowicz
<a href="https://twitter.com/dwysakowicz">(@dwysakowicz)</a>
<p><p>The Apache Flink community is excited to announce the release of Flink 1.13.0! More than 200
contributors worked on over 1,000 issues for this new version.</p>
<p>The release brings us a big step forward in one of our major efforts: <strong>Making Stream Processing
Applications as natural and as simple to manage as any other application.</strong> The new <em>reactive scaling</em>
mode means that scaling streaming applications in and out now works like in any other application
by just changing the number of parallel processes.</p>
<p>The release also prominently features a <strong>series of improvements that help users better understand the performance of
applications.</strong> When the streams don&rsquo;t flow as fast as you&rsquo;d hope, these can help you to understand
why: Load and <em>backpressure visualization</em> to identify bottlenecks, <em>CPU flame graphs</em> to identify hot
code paths in your application, and <em>State Access Latencies</em> to see how the State Backends are keeping
up.</p>
<p>Beyond those features, the Flink community has added a ton of improvements all over the system,
some of which we discuss in this article. We hope you enjoy the new release and features.
Towards the end of the article, we describe changes to be aware of when upgrading
from earlier versions of Apache Flink.</p>
<p>We encourage you to <a href="https://flink.apache.org/downloads.html">download the release</a> and share your
feedback with the community through
the <a href="https://flink.apache.org/community.html#mailing-lists">Flink mailing lists</a>
or <a href="https://issues.apache.org/jira/projects/FLINK/summary">JIRA</a>.</p>
<hr>
<h1 id="notable-features">
Notable features
<a class="anchor" href="#notable-features">#</a>
</h1>
<h2 id="reactive-scaling">
Reactive scaling
<a class="anchor" href="#reactive-scaling">#</a>
</h2>
<p>Reactive scaling is the latest piece in Flink&rsquo;s initiative to make Stream Processing
Applications as natural and as simple to manage as any other application.</p>
<p>Flink has a dual nature when it comes to resource management and deployments: You can deploy
Flink applications onto resource orchestrators like Kubernetes or Yarn in such a way that Flink actively manages
the resources and allocates and releases workers as needed. That is especially useful for jobs and
applications that rapidly change their required resources, like batch applications and ad-hoc SQL
queries. The application parallelism rules, the number of workers follows. In the context of Flink
applications, we call this <em>active scaling</em>.</p>
<p>For long-running streaming applications, it is often a nicer model to just deploy them like any
other long-running application: The application doesn&rsquo;t really need to know that it runs on K8s,
EKS, Yarn, etc. and doesn&rsquo;t try to acquire a specific amount of workers; instead, it just uses the
number of workers that are given to it. The number of workers rules, the application parallelism
adjusts to that. In the context of Flink, we call that <em>reactive scaling</em>.</p>
<p>The <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/concepts/flink-architecture/#flink-application-execution">Application Deployment Mode</a>
started this effort, making deployments more application-like (by avoiding two separate deployment
steps to (1) start a cluster and (2) submit an application). The reactive scaling mode completes this,
and you now don&rsquo;t have to use extra tools (scripts, or a K8s operator) anymore to keep the number
of workers, and the application parallelism settings in sync.</p>
<p>You can now put an auto-scaler around Flink applications like around other typical applications — as
long as you are mindful about the cost of rescaling when configuring the autoscaler: Stateful
streaming applications must move state around when scaling.</p>
<p>To try the reactive-scaling mode, add the <code>scheduler-mode: reactive</code> config entry and deploy
an application cluster (<a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/deployment/resource-providers/standalone/overview/#application-mode">standalone</a> or <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/deployment/resource-providers/standalone/kubernetes/#deploy-application-cluster">Kubernetes</a>). Check out <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/deployment/elastic_scaling/#reactive-mode">the reactive scaling docs</a> for more details.</p>
<h2 id="analyzing-application-performance">
Analyzing application performance
<a class="anchor" href="#analyzing-application-performance">#</a>
</h2>
<p>Like for any application, analyzing and understanding the performance of a Flink application
is critical. Often even more critical, because Flink applications are typically data-intensive
(processing high volumes of data) and are at the same time expected to provide results within
(near-) real-time latencies.</p>
<p>When an application doesn&rsquo;t keep up with the data rate anymore, or an application takes more
resources than you&rsquo;d expect it would, these new tools can help you track down the causes:</p>
<p><strong>Bottleneck detection, Back Pressure monitoring</strong></p>
<p>The first question during performance analysis is often: Which operation is the bottleneck?</p>
<p>To help answer that, Flink exposes metrics about the degree to which tasks are <em>busy</em> (doing work)
and <em>back-pressured</em> (have the capacity to do work but cannot because their successor operators
cannot accept more results). Candidates for bottlenecks are the busy operators whose predecessors
are back-pressured.</p>
<p>Flink 1.13 brings an improved back pressure metric system (using task mailbox timings rather than
thread stack sampling), and a reworked graphical representation of the job&rsquo;s dataflow with color-coding
and ratios for busyness and backpressure.</p>
<figure style="align-content: center">
<img src="/img/blog/2021-05-03-release-1.13.0/bottleneck.png" style="width: 900px"/>
</figure>
<p><strong>CPU flame graphs in Web UI</strong></p>
<p>The next question during performance analysis is typically: What part of work in the bottlenecked
operator is expensive?</p>
<p>One visually effective means to investigate that is <em>Flame Graphs</em>. They help answer question like:</p>
<ul>
<li>
<p>Which methods are currently consuming CPU resources?</p>
</li>
<li>
<p>How does one method&rsquo;s CPU consumption compare to other methods?</p>
</li>
<li>
<p>Which series of calls on the stack led to executing a particular method?</p>
</li>
</ul>
<p>Flame Graphs are constructed by repeatedly sampling the thread stack traces. Every method call is
represented by a bar, where the length of the bar is proportional to the number of times it is present
in the samples. When enabled, the graphs are shown in a new UI component for the selected operator.</p>
<figure style="align-content: center">
<img src="/img/blog/2021-05-03-release-1.13.0/7.png" style="display: block; margin-left: auto; margin-right: auto; width: 600px"/>
</figure>
<p>The <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/ops/debugging/flame_graphs">Flame Graphs documentation</a>
contains more details and instructions for enabling this feature.</p>
<p><strong>Access Latency Metrics for State</strong></p>
<p>Another possible performance bottleneck can be the state backend, especially when your state is larger
than the main memory available to Flink and you are using the <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/ops/state/state_backends/#the-embeddedrocksdbstatebackend">RocksDB state backend</a>.</p>
<p>That&rsquo;s not saying RocksDB is slow (we love RocksDB!), but it has some requirements to achieve
good performance. For example, it is easy to accidentally <a href="https://www.ververica.com/blog/the-impact-of-disks-on-rocksdb-state-backend-in-flink-a-case-study">starve RocksDB&rsquo;s demand for IOPs on cloud setups with
the wrong type of disk resources</a>.</p>
<p>On top of the CPU flame graphs, the new <em>state backend latency metrics</em> can help you understand whether
your state backend is responsive. For example, if you see that RocksDB state accesses start to take
milliseconds, you probably need to look into your memory and I/O configuration.
These metrics can be activated by setting the <code>state.backend.rocksdb.latency-track-enabled</code> option.
The metrics are sampled, and their collection should have a marginal impact on the RocksDB state
backend performance.</p>
<h2 id="switching-state-backend-with-savepoints">
Switching State Backend with savepoints
<a class="anchor" href="#switching-state-backend-with-savepoints">#</a>
</h2>
<p>You can now change the state backend of a Flink application when resuming from a savepoint.
That means the application&rsquo;s state is no longer locked into the state backend that was used when
the application was initially started.</p>
<p>This makes it possible, for example, to initially start with the HashMap State Backend (pure
in-memory in JVM Heap) and later switch to the RocksDB State Backend, once the state grows
too large.</p>
<p>Under the hood, Flink now has a canonical savepoint format, which all state backends use when
creating a data snapshot for a savepoint.</p>
<h2 id="user-specified-pod-templates-for-kubernetes-deployments">
User-specified pod templates for Kubernetes deployments
<a class="anchor" href="#user-specified-pod-templates-for-kubernetes-deployments">#</a>
</h2>
<p>The <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/deployment/resource-providers/native_kubernetes/">native Kubernetes deployment</a>
(where Flink actively talks to K8s to start and stop pods) now supports <em>custom pod templates</em>.</p>
<p>With those templates, users can set up and configure the JobManagers and TaskManagers pods in a
Kubernetes-y way, with flexibility beyond the configuration options that are directly built into
Flink&rsquo;s Kubernetes integration.</p>
<h2 id="unaligned-checkpoints---production-ready">
Unaligned Checkpoints - production-ready
<a class="anchor" href="#unaligned-checkpoints---production-ready">#</a>
</h2>
<p>Unaligned Checkpoints have matured to the point where we encourage all users to try them out,
if they see issues with their application under backpressure.</p>
<p>In particular, these changes make Unaligned Checkpoints easier to use:</p>
<ul>
<li>
<p>You can now rescale applications from unaligned checkpoints. This comes in handy if your
application needs to be scaled from a retained checkpoint because you cannot (afford to) create
a savepoint.</p>
</li>
<li>
<p>Enabling unaligned checkpoints is cheaper for applications that are not back-pressured.
Unaligned checkpoints can now trigger adaptively with a timeout, meaning a checkpoint starts
as an aligned checkpoint (not storing any in-flight events) and falls back to an unaligned
checkpoint (storing some in-flight events), if the alignment phase takes longer than a certain
time.</p>
</li>
</ul>
<p>Find out more about how to enable unaligned checkpoints in the <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/ops/state/checkpoints/#unaligned-checkpoints">Checkpointing Documentation</a>.</p>
<h2 id="machine-learning-library-moving-to-a-separate-repository">
Machine Learning Library moving to a separate repository
<a class="anchor" href="#machine-learning-library-moving-to-a-separate-repository">#</a>
</h2>
<p>To accelerate the development of Flink&rsquo;s Machine Learning efforts (streaming, batch, and
unified machine learning), the effort has moved to the new repository <a href="https://github.com/apache/flink-ml">flink-ml</a>
under the Flink project. We here follow a similar approach like the <em>Stateful Functions</em> effort,
where a separate repository has helped to speed up the development by allowing for more light-weight
contribution workflows and separate release cycles.</p>
<p>Stay tuned for more updates in the Machine Learning efforts, like the interplay with
<a href="https://github.com/alibaba/Alink">ALink</a> (suite of many common Machine Learning Algorithms on Flink)
or the <a href="https://github.com/alibaba/flink-ai-extended">Flink &amp; TensorFlow integration</a>.</p>
<h1 id="notable-sql--table-api-improvements">
Notable SQL &amp; Table API improvements
<a class="anchor" href="#notable-sql--table-api-improvements">#</a>
</h1>
<p>Like in previous releases, SQL and the Table API remain an area of big developments.</p>
<h2 id="windows-via-table-valued-functions">
Windows via Table-valued functions
<a class="anchor" href="#windows-via-table-valued-functions">#</a>
</h2>
<p>Defining time windows is one of the most frequent operations in streaming SQL queries.
Flink 1.13 introduces a new way to define windows: via <em>Table-valued Functions</em>.
This approach is both more expressive (lets you define new types of windows) and fully
in line with the SQL standard.</p>
<p>Flink 1.13 supports <em>TUMBLE</em> and <em>HOP</em> windows in the new syntax, <em>SESSION</em> windows will
follow in a subsequent release. To demonstrate the increased expressiveness, consider the two examples
below.</p>
<p>A new <em>CUMULATE</em> window function that assigns windows with an expanding step size until the maximum
window size is reached:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-sql" data-lang="sql"><span class="line"><span class="cl"><span class="k">SELECT</span><span class="w"> </span><span class="n">window_time</span><span class="p">,</span><span class="w"> </span><span class="n">window_start</span><span class="p">,</span><span class="w"> </span><span class="n">window_end</span><span class="p">,</span><span class="w"> </span><span class="k">SUM</span><span class="p">(</span><span class="n">price</span><span class="p">)</span><span class="w"> </span><span class="k">AS</span><span class="w"> </span><span class="n">total_price</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">FROM</span><span class="w"> </span><span class="k">TABLE</span><span class="p">(</span><span class="n">CUMULATE</span><span class="p">(</span><span class="k">TABLE</span><span class="w"> </span><span class="n">Bid</span><span class="p">,</span><span class="w"> </span><span class="k">DESCRIPTOR</span><span class="p">(</span><span class="n">bidtime</span><span class="p">),</span><span class="w"> </span><span class="nb">INTERVAL</span><span class="w"> </span><span class="s1">&#39;2&#39;</span><span class="w"> </span><span class="n">MINUTES</span><span class="p">,</span><span class="w"> </span><span class="nb">INTERVAL</span><span class="w"> </span><span class="s1">&#39;10&#39;</span><span class="w"> </span><span class="n">MINUTES</span><span class="p">))</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">GROUP</span><span class="w"> </span><span class="k">BY</span><span class="w"> </span><span class="n">window_start</span><span class="p">,</span><span class="w"> </span><span class="n">window_end</span><span class="p">,</span><span class="w"> </span><span class="n">window_time</span><span class="p">;</span><span class="w">
</span></span></span></code></pre></div><p>You can reference the window start and window end time of the table-valued window functions,
making new types of constructs possible. Beyond regular windowed aggregations and windowed joins,
you can, for example, now express windowed Top-K aggregations:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-sql" data-lang="sql"><span class="line"><span class="cl"><span class="k">SELECT</span><span class="w"> </span><span class="n">window_time</span><span class="p">,</span><span class="w"> </span><span class="p">...</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">FROM</span><span class="w"> </span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">SELECT</span><span class="w"> </span><span class="o">*</span><span class="p">,</span><span class="w"> </span><span class="n">ROW_NUMBER</span><span class="p">()</span><span class="w"> </span><span class="n">OVER</span><span class="w"> </span><span class="p">(</span><span class="n">PARTITION</span><span class="w"> </span><span class="k">BY</span><span class="w"> </span><span class="n">window_start</span><span class="p">,</span><span class="w"> </span><span class="n">window_end</span><span class="w"> </span><span class="k">ORDER</span><span class="w"> </span><span class="k">BY</span><span class="w"> </span><span class="n">total_price</span><span class="w"> </span><span class="k">DESC</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="n">rank</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="k">FROM</span><span class="w"> </span><span class="n">t</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">)</span><span class="w"> </span><span class="k">WHERE</span><span class="w"> </span><span class="n">rank</span><span class="w"> </span><span class="o">&lt;=</span><span class="w"> </span><span class="mi">100</span><span class="p">;</span><span class="w">
</span></span></span></code></pre></div><h2 id="improved-interoperability-between-datastream-api-and-table-apisql">
Improved interoperability between DataStream API and Table API/SQL
<a class="anchor" href="#improved-interoperability-between-datastream-api-and-table-apisql">#</a>
</h2>
<p>This release radically simplifies mixing DataStream API and Table API programs.</p>
<p>The Table API is a great way to develop applications, with its declarative nature and its
many built-in functions. But sometimes, you need to <em>escape</em> to the DataStream API for its
expressiveness, flexibility, and explicit control over the state.</p>
<p>The new methods <code>StreamTableEnvironment.toDataStream()/.fromDataStream()</code> can model
a <code>DataStream</code> from the DataStream API as a table source or sink.
Notable improvements include:</p>
<ul>
<li>
<p>Automatic type conversion between the DataStream and Table API type systems.</p>
</li>
<li>
<p>Seamless integration of event time configurations; watermarks flow through boundaries for high
consistency.</p>
</li>
<li>
<p>Enhancements to the <code>Row</code> class (representing row events from the Table API) has received a major
overhaul (improving the behavior of <code>toString()</code>/<code>hashCode()</code>/<code>equals()</code> methods) and now supports
accessing fields by name, with support for sparse representations.</p>
</li>
</ul>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-java" data-lang="java"><span class="line"><span class="cl"><span class="n">Table</span><span class="w"> </span><span class="n">table</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">tableEnv</span><span class="p">.</span><span class="na">fromDataStream</span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">dataStream</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">Schema</span><span class="p">.</span><span class="na">newBuilder</span><span class="p">()</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">.</span><span class="na">columnByMetadata</span><span class="p">(</span><span class="s">&#34;rowtime&#34;</span><span class="p">,</span><span class="w"> </span><span class="s">&#34;TIMESTAMP(3)&#34;</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">.</span><span class="na">watermark</span><span class="p">(</span><span class="s">&#34;rowtime&#34;</span><span class="p">,</span><span class="w"> </span><span class="s">&#34;SOURCE_WATERMARK()&#34;</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">.</span><span class="na">build</span><span class="p">());</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="n">DataStream</span><span class="o">&lt;</span><span class="n">Row</span><span class="o">&gt;</span><span class="w"> </span><span class="n">dataStream</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">tableEnv</span><span class="p">.</span><span class="na">toDataStream</span><span class="p">(</span><span class="n">table</span><span class="p">)</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">.</span><span class="na">keyBy</span><span class="p">(</span><span class="n">r</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">r</span><span class="p">.</span><span class="na">getField</span><span class="p">(</span><span class="s">&#34;user&#34;</span><span class="p">))</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="p">.</span><span class="na">window</span><span class="p">(...);</span><span class="w">
</span></span></span></code></pre></div><h2 id="sql-client-init-scripts-and-statement-sets">
SQL Client: Init scripts and Statement Sets
<a class="anchor" href="#sql-client-init-scripts-and-statement-sets">#</a>
</h2>
<p>The SQL Client is a convenient way to run and deploy SQL streaming and batch jobs directly,
without writing any code from the command line, or as part of a CI/CD workflow.</p>
<p>This release vastly improves the functionality of the SQL client. Almost all operations as that
are available to Java applications (when programmatically launching queries from the
<code>TableEnvironment</code>) are now supported in the SQL Client and as SQL scripts.
That means SQL users need much less glue code for their SQL deployments.</p>
<p><strong>Easier Configuration and Code Sharing</strong></p>
<p>The support of YAML files to configure the SQL Client will be discontinued. Instead, the client
accepts one or more <em>initialization scripts</em> to configure a session before the main SQL script
gets executed.</p>
<p>These init scripts would typically be shared across teams/deployments and could be used for
loading common catalogs, applying common configuration settings, or defining standard views.</p>
<pre tabindex="0"><code>./sql-client.sh -i init1.sql init2.sql -f sqljob.sql
</code></pre><p><strong>More config options</strong></p>
<p>A greater set of recognized config options and improved <code>SET</code>/<code>RESET</code> commands make it easier to
define and control the execution from within the SQL client and SQL scripts.</p>
<p><strong>Multi-query Support with Statement Sets</strong></p>
<p>Multi-query execution lets you execute multiple SQL queries (or statements) as a single Flink job.
This is particularly useful for streaming SQL queries that run indefinitely.</p>
<p><em>Statement Sets</em> are the mechanism to group the queries together that should be executed together.</p>
<p>The following is an example of a SQL script that can be run via the SQL client. It sets up and
configures the environment and executes multiple queries. The script captures end-to-end the
queries and all environment setup and configuration work, making it a self-contained deployment
artifact.</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-sql" data-lang="sql"><span class="line"><span class="cl"><span class="c1">-- set up a catalog
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">CREATE</span><span class="w"> </span><span class="k">CATALOG</span><span class="w"> </span><span class="n">hive_catalog</span><span class="w"> </span><span class="k">WITH</span><span class="w"> </span><span class="p">(</span><span class="s1">&#39;type&#39;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s1">&#39;hive&#39;</span><span class="p">);</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="n">USE</span><span class="w"> </span><span class="k">CATALOG</span><span class="w"> </span><span class="n">hive_catalog</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">-- or use temporary objects
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">CREATE</span><span class="w"> </span><span class="k">TEMPORARY</span><span class="w"> </span><span class="k">TABLE</span><span class="w"> </span><span class="n">clicks</span><span class="w"> </span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">user_id</span><span class="w"> </span><span class="nb">BIGINT</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">page_id</span><span class="w"> </span><span class="nb">BIGINT</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="n">viewtime</span><span class="w"> </span><span class="k">TIMESTAMP</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">)</span><span class="w"> </span><span class="k">WITH</span><span class="w"> </span><span class="p">(</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="s1">&#39;connector&#39;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s1">&#39;kafka&#39;</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="s1">&#39;topic&#39;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s1">&#39;clicks&#39;</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="s1">&#39;properties.bootstrap.servers&#39;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s1">&#39;...&#39;</span><span class="p">,</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"> </span><span class="s1">&#39;format&#39;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s1">&#39;avro&#39;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="p">);</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">-- set the execution mode for jobs
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">SET</span><span class="w"> </span><span class="n">execution</span><span class="p">.</span><span class="n">runtime</span><span class="o">-</span><span class="k">mode</span><span class="o">=</span><span class="n">streaming</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">-- set the sync/async mode for INSERT INTOs
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">SET</span><span class="w"> </span><span class="k">table</span><span class="p">.</span><span class="n">dml</span><span class="o">-</span><span class="n">sync</span><span class="o">=</span><span class="k">false</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">-- set the job&#39;s parallelism
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">SET</span><span class="w"> </span><span class="n">parallism</span><span class="p">.</span><span class="k">default</span><span class="o">=</span><span class="mi">10</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">-- set the job name
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">SET</span><span class="w"> </span><span class="n">pipeline</span><span class="p">.</span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_flink_job</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="c1">-- restore state from the specific savepoint path
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">SET</span><span class="w"> </span><span class="n">execution</span><span class="p">.</span><span class="n">savepoint</span><span class="p">.</span><span class="n">path</span><span class="o">=/</span><span class="n">tmp</span><span class="o">/</span><span class="n">flink</span><span class="o">-</span><span class="n">savepoints</span><span class="o">/</span><span class="n">savepoint</span><span class="o">-</span><span class="n">bb0dab</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">BEGIN</span><span class="w"> </span><span class="k">STATEMENT</span><span class="w"> </span><span class="k">SET</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">INSERT</span><span class="w"> </span><span class="k">INTO</span><span class="w"> </span><span class="n">pageview_pv_sink</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">SELECT</span><span class="w"> </span><span class="n">page_id</span><span class="p">,</span><span class="w"> </span><span class="k">count</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="k">FROM</span><span class="w"> </span><span class="n">clicks</span><span class="w"> </span><span class="k">GROUP</span><span class="w"> </span><span class="k">BY</span><span class="w"> </span><span class="n">page_id</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">INSERT</span><span class="w"> </span><span class="k">INTO</span><span class="w"> </span><span class="n">pageview_uv_sink</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">SELECT</span><span class="w"> </span><span class="n">page_id</span><span class="p">,</span><span class="w"> </span><span class="k">count</span><span class="p">(</span><span class="k">distinct</span><span class="w"> </span><span class="n">user_id</span><span class="p">)</span><span class="w"> </span><span class="k">FROM</span><span class="w"> </span><span class="n">clicks</span><span class="w"> </span><span class="k">GROUP</span><span class="w"> </span><span class="k">BY</span><span class="w"> </span><span class="n">page_id</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">END</span><span class="p">;</span><span class="w">
</span></span></span></code></pre></div><h2 id="hive-query-syntax-compatibility">
Hive query syntax compatibility
<a class="anchor" href="#hive-query-syntax-compatibility">#</a>
</h2>
<p>You can now write SQL queries against Flink using the Hive SQL syntax.
In addition to Hive&rsquo;s DDL dialect, Flink now also accepts the commonly-used Hive DML and DQL
dialects.</p>
<p>To use the Hive SQL dialect, set <code>table.sql-dialect</code> to <code>hive</code> and load the <code>HiveModule</code>.
The latter is important because Hive&rsquo;s built-in functions are required for proper syntax and
semantics compatibility. The following example illustrates that:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-sql" data-lang="sql"><span class="line"><span class="cl"><span class="k">CREATE</span><span class="w"> </span><span class="k">CATALOG</span><span class="w"> </span><span class="n">myhive</span><span class="w"> </span><span class="k">WITH</span><span class="w"> </span><span class="p">(</span><span class="s1">&#39;type&#39;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s1">&#39;hive&#39;</span><span class="p">);</span><span class="w"> </span><span class="c1">-- setup HiveCatalog
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="n">USE</span><span class="w"> </span><span class="k">CATALOG</span><span class="w"> </span><span class="n">myhive</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">LOAD</span><span class="w"> </span><span class="n">MODULE</span><span class="w"> </span><span class="n">hive</span><span class="p">;</span><span class="w"> </span><span class="c1">-- setup HiveModule
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="n">USE</span><span class="w"> </span><span class="n">MODULES</span><span class="w"> </span><span class="n">hive</span><span class="p">,</span><span class="n">core</span><span class="p">;</span><span class="w">
</span></span></span><span class="line"><span class="cl"><span class="w"></span><span class="k">SET</span><span class="w"> </span><span class="k">table</span><span class="p">.</span><span class="k">sql</span><span class="o">-</span><span class="n">dialect</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">hive</span><span class="p">;</span><span class="w"> </span><span class="c1">-- enable Hive dialect
</span></span></span><span class="line"><span class="cl"><span class="c1"></span><span class="k">SELECT</span><span class="w"> </span><span class="k">key</span><span class="p">,</span><span class="w"> </span><span class="n">value</span><span class="w"> </span><span class="k">FROM</span><span class="w"> </span><span class="n">src</span><span class="w"> </span><span class="k">CLUSTER</span><span class="w"> </span><span class="k">BY</span><span class="w"> </span><span class="k">key</span><span class="p">;</span><span class="w"> </span><span class="c1">-- run some Hive queries
</span></span></span></code></pre></div><p>Please note that the Hive dialect no longer supports Flink&rsquo;s SQL syntax for DML and DQL statements.
Switch back to the <code>default</code> dialect for Flink&rsquo;s syntax.</p>
<h2 id="improved-behavior-of-sql-time-functions">
Improved behavior of SQL time functions
<a class="anchor" href="#improved-behavior-of-sql-time-functions">#</a>
</h2>
<p>Working with time is a crucial element of any data processing. But simultaneously, handling different
time zones, dates, and times is an <a href="https://xkcd.com/1883/">increadibly delicate task</a> when working with data.</p>
<p>In Flink 1.13. we put much effort into simplifying the usage of time-related functions. We adjusted (made
more specific) the return types of functions such as: <code>PROCTIME()</code>, <code>CURRENT_TIMESTAMP</code>, <code>NOW()</code>.</p>
<p>Moreover, you can now also define an event time attribute on a <em>TIMESTAMP_LTZ</em> column to gracefully
do window processing with the support of Daylight Saving Time.</p>
<p>Please see the release notes for a complete list of changes.</p>
<hr>
<h1 id="notable-pyflink-improvements">
Notable PyFlink improvements
<a class="anchor" href="#notable-pyflink-improvements">#</a>
</h1>
<p>The general theme of this release in PyFlink is to bring the Python DataStream API and Table API
closer to feature parity with the Java/Scala APIs.</p>
<h3 id="stateful-operations-in-the-python-datastream-api">
Stateful operations in the Python DataStream API
<a class="anchor" href="#stateful-operations-in-the-python-datastream-api">#</a>
</h3>
<p>With Flink 1.13, Python programmers now also get to enjoy the full potential of Apache Flink&rsquo;s
stateful stream processing APIs. The rearchitected Python DataStream API, introduced in Flink 1.12,
now has full stateful capabilities, allowing users to remember information from events in the state
and act on it later.</p>
<p>That stateful processing capability is the basis of many of the more sophisticated processing
operations, which need to remember information across individual events (for example, Windowing
Operations).</p>
<p>This example shows a custom counting window implementation, using state:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="k">class</span> <span class="nc">CountWindowAverage</span><span class="p">(</span><span class="n">FlatMapFunction</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">window_size</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span> <span class="o">=</span> <span class="n">window_size</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">def</span> <span class="nf">open</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">runtime_context</span><span class="p">:</span> <span class="n">RuntimeContext</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">descriptor</span> <span class="o">=</span> <span class="n">ValueStateDescriptor</span><span class="p">(</span><span class="s2">&#34;average&#34;</span><span class="p">,</span> <span class="n">Types</span><span class="o">.</span><span class="n">TUPLE</span><span class="p">([</span><span class="n">Types</span><span class="o">.</span><span class="n">LONG</span><span class="p">(),</span> <span class="n">Types</span><span class="o">.</span><span class="n">LONG</span><span class="p">()]))</span>
</span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">sum</span> <span class="o">=</span> <span class="n">runtime_context</span><span class="o">.</span><span class="n">get_state</span><span class="p">(</span><span class="n">descriptor</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"> <span class="k">def</span> <span class="nf">flat_map</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
</span></span><span class="line"><span class="cl"> <span class="n">current_sum</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sum</span><span class="o">.</span><span class="n">value</span><span class="p">()</span>
</span></span><span class="line"><span class="cl"> <span class="k">if</span> <span class="n">current_sum</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="n">current_sum</span> <span class="o">=</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
</span></span><span class="line"><span class="cl"> <span class="c1"># update the count</span>
</span></span><span class="line"><span class="cl"> <span class="n">current_sum</span> <span class="o">=</span> <span class="p">(</span><span class="n">current_sum</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">current_sum</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="n">value</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
</span></span><span class="line"><span class="cl"> <span class="c1"># if the count reaches window_size, emit the average and clear the state</span>
</span></span><span class="line"><span class="cl"> <span class="k">if</span> <span class="n">current_sum</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">sum</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
</span></span><span class="line"><span class="cl"> <span class="k">yield</span> <span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">current_sum</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">//</span> <span class="n">current_sum</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
</span></span><span class="line"><span class="cl"> <span class="k">else</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="bp">self</span><span class="o">.</span><span class="n">sum</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">current_sum</span><span class="p">)</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="n">ds</span> <span class="o">=</span> <span class="o">...</span> <span class="c1"># type: DataStream</span>
</span></span><span class="line"><span class="cl"><span class="n">ds</span><span class="o">.</span><span class="n">key_by</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> \
</span></span><span class="line"><span class="cl"> <span class="o">.</span><span class="n">flat_map</span><span class="p">(</span><span class="n">CountWindowAverage</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
</span></span></code></pre></div><h3 id="user-defined-windows-in-the-pyflink-datastream-api">
User-defined Windows in the PyFlink DataStream API
<a class="anchor" href="#user-defined-windows-in-the-pyflink-datastream-api">#</a>
</h3>
<p>Flink 1.13 adds support for user-defined windows to the PyFlink DataStream API. Programs can now use
windows beyond the standard window definitions.</p>
<p>Because windows are at the heart of all programs that process unbounded streams (by splitting the
stream into &ldquo;buckets&rdquo; of bounded size), this greatly increases the expressiveness of the API.</p>
<h3 id="row-based-operation-in-the-pyflink-table-api">
Row-based operation in the PyFlink Table API
<a class="anchor" href="#row-based-operation-in-the-pyflink-table-api">#</a>
</h3>
<p>The Python Table API now supports row-based operations, i.e., custom transformation functions on rows.
These functions are an easy way to apply data transformations on tables beyond the built-in functions.</p>
<p>This is an example of using a <code>map()</code> operation in Python Table API:</p>
<div class="highlight"><pre tabindex="0" class="chroma"><code class="language-python" data-lang="python"><span class="line"><span class="cl"><span class="nd">@udf</span><span class="p">(</span><span class="n">result_type</span><span class="o">=</span><span class="n">DataTypes</span><span class="o">.</span><span class="n">ROW</span><span class="p">(</span>
</span></span><span class="line"><span class="cl"> <span class="p">[</span><span class="n">DataTypes</span><span class="o">.</span><span class="n">FIELD</span><span class="p">(</span><span class="s2">&#34;c1&#34;</span><span class="p">,</span> <span class="n">DataTypes</span><span class="o">.</span><span class="n">BIGINT</span><span class="p">()),</span>
</span></span><span class="line"><span class="cl"> <span class="n">DataTypes</span><span class="o">.</span><span class="n">FIELD</span><span class="p">(</span><span class="s2">&#34;c2&#34;</span><span class="p">,</span> <span class="n">DataTypes</span><span class="o">.</span><span class="n">STRING</span><span class="p">())]))</span>
</span></span><span class="line"><span class="cl"><span class="k">def</span> <span class="nf">increment_column</span><span class="p">(</span><span class="n">r</span><span class="p">:</span> <span class="n">Row</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Row</span><span class="p">:</span>
</span></span><span class="line"><span class="cl"> <span class="k">return</span> <span class="n">Row</span><span class="p">(</span><span class="n">r</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">r</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
</span></span><span class="line"><span class="cl">
</span></span><span class="line"><span class="cl"><span class="n">table</span> <span class="o">=</span> <span class="o">...</span> <span class="c1"># type: Table</span>
</span></span><span class="line"><span class="cl"><span class="n">mapped_result</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">increment_column</span><span class="p">)</span>
</span></span></code></pre></div><p>In addition to <code>map()</code>, the API also supports <code>flat_map()</code>, <code>aggregate()</code>, <code>flat_aggregate()</code>,
and other row-based operations. This brings the Python Table API a big step closer to feature
parity with the Java Table API.</p>
<h3 id="batch-execution-mode-for-pyflink-datastream-programs">
Batch execution mode for PyFlink DataStream programs
<a class="anchor" href="#batch-execution-mode-for-pyflink-datastream-programs">#</a>
</h3>
<p>The PyFlink DataStream API now also supports the batch execution mode for bounded streams,
which was introduced for the Java DataStream API in Flink 1.12.</p>
<p>The batch execution mode simplifies operations and improves the performance of programs on bounded streams,
by exploiting the bounded stream nature to bypass state backends and checkpoints.</p>
<h1 id="other-improvements">
Other improvements
<a class="anchor" href="#other-improvements">#</a>
</h1>
<p><strong>Flink Documentation via Hugo</strong></p>
<p>The Flink Documentation has been migrated from Jekyll to Hugo. If you find something missing, please let us know.
We are also curious to hear if you like the new look &amp; feel.</p>
<p><strong>Exception histories in the Web UI</strong></p>
<p>The Flink Web UI will present up to <em>n</em> last exceptions that caused a job to fail.
That helps to debug scenarios where a root failure caused subsequent failures. The root failure
cause can be found in the exception history.</p>
<p><strong>Better exception / failure-cause reporting for unsuccessful checkpoints</strong></p>
<p>Flink now provides statistics for checkpoints that failed or were aborted to make it easier
to determine the failure cause without having to analyze the logs.</p>
<p>Prior versions of Flink were reporting metrics (e.g., size of persisted data, trigger time)
only in case a checkpoint succeeded.</p>
<p><strong>Exactly-once JDBC sink</strong></p>
<p>From 1.13, JDBC sink can guarantee exactly-once delivery of results for XA-compliant databases
by transactionally committing results on checkpoints. The target database must have (or be linked
to) an XA Transaction Manager.</p>
<p>The connector exists currently only for the <em>DataStream API</em>, and can be created through the
<code>JdbcSink.exactlyOnceSink(...)</code> method (or by instantiating the <code>JdbcXaSinkFunction</code> directly).</p>
<p><strong>PyFlink Table API supports User-Defined Aggregate Functions in Group Windows</strong></p>
<p>Group Windows in PyFlink&rsquo;s Table API now support both general Python User-defined Aggregate
Functions (UDAFs) and Pandas UDAFs. Such functions are critical to many analysis- and ML training
programs.</p>
<p>Flink 1.13 improves upon previous releases, where these functions were only supported
in unbounded Group-by aggregations.</p>
<p><strong>Improved Sort-Merge Shuffle for Batch Execution</strong></p>
<p>Flink 1.13 improves the memory stability and performance of the <em>sort-merge blocking shuffle</em>
for batch-executed programs, initially introduced in Flink 1.12 via <a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-148%3A&#43;Introduce&#43;Sort-Merge&#43;Based&#43;Blocking&#43;Shuffle&#43;to&#43;Flink">FLIP-148</a>.</p>
<p>Programs with higher parallelism (1000s) should no longer frequently trigger <em>OutOfMemoryError: Direct Memory</em>.
The performance (especially on spinning disks) is improved through better I/O scheduling
and broadcast optimizations.</p>
<p><strong>HBase connector supports async lookup and lookup cache</strong></p>
<p>The HBase Lookup Table Source now supports an <em>async lookup mode</em> and a lookup cache.
This greatly benefits the performance of Table/SQL jobs with lookup joins against HBase, while
reducing the I/O requests to HBase in the typical case.</p>
<p>In prior versions, the HBase Lookup Source only communicated synchronously, resulting in lower
pipeline utilization and throughput.</p>
<h1 id="changes-to-consider-when-upgrading-to-flink-113">
Changes to consider when upgrading to Flink 1.13
<a class="anchor" href="#changes-to-consider-when-upgrading-to-flink-113">#</a>
</h1>
<ul>
<li><a href="https://issues.apache.org/jira/browse/FLINK-21709">FLINK-21709</a> - The old planner of the Table &amp;
SQL API has been deprecated in Flink 1.13 and will be dropped in Flink 1.14.
The <em>Blink</em> engine has been the default planner for some releases now and will be the only one going forward.
That means that both the <code>BatchTableEnvironment</code> and SQL/DataSet interoperability are reaching
the end of life. Please use the unified <code>TableEnvironment</code> for batch and stream processing going forward.</li>
<li><a href="https://issues.apache.org/jira/browse/FLINK-22352">FLINK-22352</a> The community decided to deprecate
the Apache Mesos support for Apache Flink. It is subject to removal in the future. Users are
encouraged to switch to a different resource manager.</li>
<li><a href="https://issues.apache.org/jira/browse/FLINK-21935">FLINK-21935</a> - The <code>state.backend.async</code>
option is deprecated. Snapshots are always asynchronous now (as they were by default before) and
there is no option to configure a synchronous snapshot anymore.</li>
<li><a href="https://issues.apache.org/jira/browse/FLINK-17012">FLINK-17012</a> - The tasks&rsquo; <code>RUNNING</code> state was split
into two states: <code>INITIALIZING</code> and <code>RUNNING</code>. A task is <code>INITIALIZING</code> while it loads the checkpointed state,
and, in the case of unaligned checkpoints, until the checkpointed in-flight data has been recovered.
This lets monitoring systems better determine when the tasks are really back to doing work by making
the phase for state restoring explicit.</li>
<li><a href="https://issues.apache.org/jira/browse/FLINK-21698">FLINK-21698</a> - The <em>CAST</em> operation between the
NUMERIC type and the TIMESTAMP type is problematic and therefore no longer supported: Statements like
<code>CAST(numeric AS TIMESTAMP(3))</code> will now fail. Please use <code>TO_TIMESTAMP(FROM_UNIXTIME(numeric))</code> instead.</li>
<li><a href="https://issues.apache.org/jira/browse/FLINK-22133">FLINK-22133</a> The unified source API for connectors
has a minor breaking change: The <code>SplitEnumerator.snapshotState()</code> method was adjusted to accept the
<em>Checkpoint ID</em> of the checkpoint for which the snapshot is created.</li>
<li><a href="https://issues.apache.org/jira/browse/FLINK-19463">FLINK-19463</a> - The old <code>StateBackend</code> interfaces were deprecated
as they had overloaded semantics which many users found confusing. This is a pure API change and does not affect
runtime characteristics of applications.
For full details on how to update existing pipelines, please see the <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/docs/ops/state/state_backends/#migrating-from-legacy-backends">migration guide</a>.</li>
</ul>
<h1 id="resources">
Resources
<a class="anchor" href="#resources">#</a>
</h1>
<p>The binary distribution and source artifacts are now available on the updated <a href="/downloads.html">Downloads page</a>
of the Flink website, and the most recent distribution of PyFlink is available on <a href="https://pypi.org/project/apache-flink/">PyPI</a>.</p>
<p>Please review the <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/release-notes/flink-1.13">release notes</a>
carefully if you plan to upgrade your setup to Flink 1.13. This version is API-compatible with
previous 1.x releases for APIs annotated with the <code>@Public</code> annotation.</p>
<p>You can also check the complete <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315522&amp;version=12349287">release changelog</a>
and <a href="//nightlies.apache.org/flink/flink-docs-release-1.13/">updated documentation</a> for a detailed list of changes and new features.</p>
<h1 id="list-of-contributors">
List of Contributors
<a class="anchor" href="#list-of-contributors">#</a>
</h1>
<p>The Apache Flink community would like to thank each one of the contributors that have
made this release possible:</p>
<p>acqua.csq, AkisAya, Alexander Fedulov, Aljoscha Krettek, Ammar Al-Batool, Andrey Zagrebin, anlen321,
Anton Kalashnikov, appleyuchi, Arvid Heise, Austin Cawley-Edwards, austin ce, azagrebin, blublinsky,
Brian Zhou, bytesmithing, caozhen1937, chen qin, Chesnay Schepler, Congxian Qiu, Cristian,
cxiiiiiii, Danny Chan, Danny Cranmer, David Anderson, Dawid Wysakowicz, dbgp2021, Dian Fu,
DinoZhang, dixingxing, Dong Lin, Dylan Forciea, est08zw, Etienne Chauchot, fanrui03, Flora Tao,
FLRNKS, fornaix, fuyli, George, Giacomo Gamba, GitHub, godfrey he, GuoWei Ma, Gyula Fora,
hackergin, hameizi, Haoyuan Ge, Harshvardhan Chauhan, Haseeb Asif, hehuiyuan, huangxiao, HuangXiao,
huangxingbo, HuangXingBo, humengyu2012, huzekang, Hwanju Kim, Ingo Bürk, I. Raleigh, Ivan, iyupeng,
Jack, Jane, Jark Wu, Jerry Wang, Jiangjie (Becket) Qin, JiangXin, Jiayi Liao, JieFang.He, Jie Wang,
jinfeng, Jingsong Lee, JingsongLi, Jing Zhang, Joao Boto, JohnTeslaa, Jun Qin, kanata163, kevin.cyj,
KevinyhZou, Kezhu Wang, klion26, Kostas Kloudas, kougazhang, Kurt Young, laughing, legendtkl,
leiqiang, Leonard Xu, liaojiayi, Lijie Wang, liming.1018, lincoln lee, lincoln-lil, liushouwei,
liuyufei, LM Kang, lometheus, luyb, Lyn Zhang, Maciej Obuchowski, Maciek Próchniak, mans2singh,
Marek Sabo, Matthias Pohl, meijie, Mika Naylor, Miklos Gergely, Mohit Paliwal, Moritz Manner,
morsapaes, Mulan, Nico Kruber, openopen2, paul8263, Paul Lam, Peidian li, pengkangjing, Peter Huang,
Piotr Nowojski, Qinghui Xu, Qingsheng Ren, Raghav Kumar Gautam, Rainie Li, Ricky Burnett, Rion
Williams, Robert Metzger, Roc Marshal, Roman, Roman Khachatryan, Ruguo,
Ruguo Yu, Rui Li, Sebastian Liu, Seth Wiesman, sharkdtu, sharkdtu(涂小刚), Shengkai, shizhengchao,
shouweikun, Shuo Cheng, simenliuxing, SteNicholas, Stephan Ewen, Suo Lu, sv3ndk, Svend Vanderveken,
taox, Terry Wang, Thelgis Kotsos, Thesharing, Thomas Weise, Till Rohrmann, Timo Walther, Ting Sun,
totoro, totorooo, TsReaper, Tzu-Li (Gordon) Tai, V1ncentzzZ, vthinkxie, wangfeifan, wangpeibin,
wangyang0918, wangyemao-github, Wei Zhong, Wenlong Lyu, wineandcheeze, wjc, xiaoHoly, Xintong Song,
xixingya, xmarker, Xue Wang, Yadong Xie, yangsanity, Yangze Guo, Yao Zhang, Yuan Mei, yulei0824, Yu
Li, Yun Gao, Yun Tang, yuruguo, yushujun, Yuval Itzchakov, yuzhao.cyz, zck, zhangjunfan,
zhangzhengqi3, zhao_wei_nan, zhaown, zhaoxing, Zhenghua Gao, Zhenqiu Huang, zhisheng, zhongqishang,
zhushang, zhuxiaoshang, Zhu Zhu, zjuwangg, zoucao, zoudan, 左元, 星, 肖佳文, 龙三</p>
</p>
</article>
<div class="edit-this-page">
<p>
<a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Translation+Specifications">Want to contribute translation?</a>
</p>
<p>
<a href="//github.com/apache/flink-web/edit/asf-site/docs/content/posts/2021-05-03-release-1.13.0.md">
Edit This Page<i class="fa fa-edit fa-fw"></i>
</a>
</p>
</div>
</section>
<aside class="book-toc">
<nav id="TableOfContents"><h3>On This Page <a href="javascript:void(0)" class="toc" onclick="collapseToc()"><i class="fa fa-times" aria-hidden="true"></i></a></h3>
<ul>
<li><a href="#notable-features">Notable features</a>
<ul>
<li><a href="#reactive-scaling">Reactive scaling</a></li>
<li><a href="#analyzing-application-performance">Analyzing application performance</a></li>
<li><a href="#switching-state-backend-with-savepoints">Switching State Backend with savepoints</a></li>
<li><a href="#user-specified-pod-templates-for-kubernetes-deployments">User-specified pod templates for Kubernetes deployments</a></li>
<li><a href="#unaligned-checkpoints---production-ready">Unaligned Checkpoints - production-ready</a></li>
<li><a href="#machine-learning-library-moving-to-a-separate-repository">Machine Learning Library moving to a separate repository</a></li>
</ul>
</li>
<li><a href="#notable-sql--table-api-improvements">Notable SQL &amp; Table API improvements</a>
<ul>
<li><a href="#windows-via-table-valued-functions">Windows via Table-valued functions</a></li>
<li><a href="#improved-interoperability-between-datastream-api-and-table-apisql">Improved interoperability between DataStream API and Table API/SQL</a></li>
<li><a href="#sql-client-init-scripts-and-statement-sets">SQL Client: Init scripts and Statement Sets</a></li>
<li><a href="#hive-query-syntax-compatibility">Hive query syntax compatibility</a></li>
<li><a href="#improved-behavior-of-sql-time-functions">Improved behavior of SQL time functions</a></li>
</ul>
</li>
<li><a href="#notable-pyflink-improvements">Notable PyFlink improvements</a>
<ul>
<li>
<ul>
<li><a href="#stateful-operations-in-the-python-datastream-api">Stateful operations in the Python DataStream API</a></li>
<li><a href="#user-defined-windows-in-the-pyflink-datastream-api">User-defined Windows in the PyFlink DataStream API</a></li>
<li><a href="#row-based-operation-in-the-pyflink-table-api">Row-based operation in the PyFlink Table API</a></li>
<li><a href="#batch-execution-mode-for-pyflink-datastream-programs">Batch execution mode for PyFlink DataStream programs</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#other-improvements">Other improvements</a></li>
<li><a href="#changes-to-consider-when-upgrading-to-flink-113">Changes to consider when upgrading to Flink 1.13</a></li>
<li><a href="#resources">Resources</a></li>
<li><a href="#list-of-contributors">List of Contributors</a></li>
</ul>
</nav>
</aside>
<aside class="expand-toc hidden">
<a class="toc" onclick="expandToc()" href="javascript:void(0)">
<i class="fa fa-bars" aria-hidden="true"></i>
</a>
</aside>
</main>
<footer>
<div class="separator"></div>
<div class="panels">
<div class="wrapper">
<div class="panel">
<ul>
<li>
<a href="https://flink-packages.org/">flink-packages.org</a>
</li>
<li>
<a href="https://www.apache.org/">Apache Software Foundation</a>
</li>
<li>
<a href="https://www.apache.org/licenses/">License</a>
</li>
<li>
<a href="/zh/">
<i class="fa fa-globe" aria-hidden="true"></i>&nbsp;中文版
</a>
</li>
</ul>
</div>
<div class="panel">
<ul>
<li>
<a href="/what-is-flink/security">Security</a-->
</li>
<li>
<a href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
</li>
<li>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</li>
</ul>
</div>
<div class="panel icons">
<div>
<a href="/posts">
<div class="icon flink-blog-icon"></div>
<span>Flink blog</span>
</a>
</div>
<div>
<a href="https://github.com/apache/flink">
<div class="icon flink-github-icon"></div>
<span>Github</span>
</a>
</div>
<div>
<a href="https://twitter.com/apacheflink">
<div class="icon flink-twitter-icon"></div>
<span>Twitter</span>
</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="container disclaimer">
<p>The contents of this website are © 2024 Apache Software Foundation under the terms of the Apache License v2. Apache Flink, Flink, and the Flink logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</footer>
</body>
</html>