blob: f033f74a0dfb57c7650285f14dac05074d3004e1 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" dir=ZgotmplZ>
<head>
<link rel="stylesheet" href="/bootstrap/css/bootstrap.min.css">
<script src="/bootstrap/js/bootstrap.bundle.min.js"></script>
<link rel="stylesheet" type="text/css" href="/font-awesome/css/font-awesome.min.css">
<script src="/js/anchor.min.js"></script>
<script src="/js/flink.js"></script>
<link rel="canonical" href="https://flink.apache.org/2023/03/23/announcing-the-release-of-apache-flink-1.17/">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="The Apache Flink PMC is pleased to announce Apache Flink release 1.17.0. Apache Flink is the leading stream processing standard, and the concept of unified stream and batch data processing is being successfully adopted in more and more companies. Thanks to our excellent community and contributors, Apache Flink continues to grow as a technology and remains one of the most active projects in the Apache Software Foundation. Flink 1.17 had 172 contributors enthusiastically participating and saw the completion of 7 FLIPs and 600&#43; issues, bringing many exciting new features and improvements to the community.">
<meta name="theme-color" content="#FFFFFF"><meta property="og:title" content="Announcing the Release of Apache Flink 1.17" />
<meta property="og:description" content="The Apache Flink PMC is pleased to announce Apache Flink release 1.17.0. Apache Flink is the leading stream processing standard, and the concept of unified stream and batch data processing is being successfully adopted in more and more companies. Thanks to our excellent community and contributors, Apache Flink continues to grow as a technology and remains one of the most active projects in the Apache Software Foundation. Flink 1.17 had 172 contributors enthusiastically participating and saw the completion of 7 FLIPs and 600&#43; issues, bringing many exciting new features and improvements to the community." />
<meta property="og:type" content="article" />
<meta property="og:url" content="https://flink.apache.org/2023/03/23/announcing-the-release-of-apache-flink-1.17/" /><meta property="article:section" content="posts" />
<meta property="article:published_time" content="2023-03-23T08:00:00+00:00" />
<meta property="article:modified_time" content="2023-03-23T08:00:00+00:00" />
<title>Announcing the Release of Apache Flink 1.17 | Apache Flink</title>
<link rel="manifest" href="/manifest.json">
<link rel="icon" href="/favicon.png" type="image/x-icon">
<link rel="stylesheet" href="/book.min.22eceb4d17baa9cdc0f57345edd6f215a40474022dfee39b63befb5fb3c596b5.css" integrity="sha256-IuzrTRe6qc3A9XNF7dbyFaQEdAIt/uObY777X7PFlrU=">
<script defer src="/en.search.min.2698f0d1b683dae4d6cb071668b310a55ebcf1c48d11410a015a51d90105b53e.js" integrity="sha256-Jpjw0baD2uTWywcWaLMQpV688cSNEUEKAVpR2QEFtT4="></script>
<!--
Made with Book Theme
https://github.com/alex-shpak/hugo-book
-->
<meta name="generator" content="Hugo 0.124.1">
<script>
var _paq = window._paq = window._paq || [];
_paq.push(['disableCookies']);
_paq.push(["setDomains", ["*.flink.apache.org","*.nightlies.apache.org/flink"]]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '1']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
</head>
<body dir=ZgotmplZ>
<header>
<nav class="navbar navbar-expand-xl">
<div class="container-fluid">
<a class="navbar-brand" href="/">
<img src="/img/logo/png/100/flink_squirrel_100_color.png" alt="Apache Flink" height="47" width="47" class="d-inline-block align-text-middle">
<span>Apache Flink</span>
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<i class="fa fa-bars navbar-toggler-icon"></i>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">About</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/what-is-flink/flink-architecture/">Architecture</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-applications/">Applications</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/flink-operations/">Operations</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/use-cases/">Use Cases</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/powered-by/">Powered By</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/roadmap/">Roadmap</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/community/">Community & Project Info</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/security/">Security</a>
</li>
<li>
<a class="dropdown-item" href="/what-is-flink/special-thanks/">Special Thanks</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Getting Started</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/try-flink/local_installation/">With Flink<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/docs/try-flink-kubernetes-operator/quick-start/">With Flink Kubernetes Operator<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable/docs/get-started/introduction/">With Flink CDC<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/docs/try-flink-ml/quick-start/">With Flink ML<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/getting-started/project-setup.html">With Flink Stateful Functions<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/docs/learn-flink/overview/">Training Course<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">Documentation</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-stable/">Flink 1.19 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-docs-master/">Flink Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-stable/">Kubernetes Operator 1.8 (latest)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main">Kubernetes Operator Main (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-stable">CDC 3.0 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-cdc-docs-master">CDC Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-stable/">ML 2.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-ml-docs-master">ML Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-stable/">Stateful Functions 3.3 (stable)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
<li>
<a class="dropdown-item" href="https://nightlies.apache.org/flink/flink-statefun-docs-master">Stateful Functions Master (snapshot)<i class="link fa fa-external-link title" aria-hidden="true"></i>
</a>
</li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" role="button" data-bs-toggle="dropdown" aria-expanded="false">How to Contribute</a>
<ul class="dropdown-menu">
<li>
<a class="dropdown-item" href="/how-to-contribute/overview/">Overview</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-code/">Contribute Code</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/reviewing-prs/">Review Pull Requests</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/code-style-and-quality-preamble/">Code Style and Quality Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/contribute-documentation/">Contribute Documentation</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/documentation-style-guide/">Documentation Style Guide</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/improve-website/">Contribute to the Website</a>
</li>
<li>
<a class="dropdown-item" href="/how-to-contribute/getting-help/">Getting Help</a>
</li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link" href="/posts/">Flink Blog</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/downloads/">Downloads</a>
</li>
</ul>
<div class="book-search">
<div class="book-search-spinner hidden">
<i class="fa fa-refresh fa-spin"></i>
</div>
<form class="search-bar d-flex" onsubmit="return false;"su>
<input type="text" id="book-search-input" placeholder="Search" aria-label="Search" maxlength="64" data-hotkeys="s/">
<i class="fa fa-search search"></i>
<i class="fa fa-circle-o-notch fa-spin spinner"></i>
</form>
<div class="book-search-spinner hidden"></div>
<ul id="book-search-results"></ul>
</div>
</div>
</div>
</nav>
<div class="navbar-clearfix"></div>
</header>
<main class="flex">
<section class="container book-page">
<article class="markdown">
<h1>
<a href="/2023/03/23/announcing-the-release-of-apache-flink-1.17/">Announcing the Release of Apache Flink 1.17</a>
</h1>
March 23, 2023 -
Leonard Xu
<a href="https://twitter.com/Leonardxbj">(@Leonardxbj)</a>
<p><p>The Apache Flink PMC is pleased to announce Apache Flink release 1.17.0. Apache
Flink is the leading stream processing standard, and the concept of unified
stream and batch data processing is being successfully adopted in more and more
companies. Thanks to our excellent community and contributors, Apache Flink
continues to grow as a technology and remains one of the most active projects in
the Apache Software Foundation. Flink 1.17 had 172 contributors enthusiastically
participating and saw the completion of 7 FLIPs and 600+ issues, bringing many
exciting new features and improvements to the community.</p>
<h1 id="towards-streaming-warehouses">
Towards Streaming Warehouses
<a class="anchor" href="#towards-streaming-warehouses">#</a>
</h1>
<p>In order to achieve greater efficiency in the realm of <a href="https://www.alibabacloud.com/blog/more-than-computing-a-new-era-led-by-the-warehouse-architecture-of-apache-flink_598821">streaming
warehouse</a>,
Flink 1.17 contains substantial improvements to both the performance of batch
processing and the semantics of streaming processing. These improvements
represent a significant stride towards the creation of a more efficient and
streamlined data warehouse, capable of processing large quantities of data in
real-time.</p>
<p>For batch processing, this release includes several new features and
improvements:</p>
<ul>
<li><strong>Streaming Warehouse API:</strong>
<a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=235838061">FLIP-282</a>
introduces the new Delete and Update API in Flink SQL which only works in batch
mode. External storage systems like Flink Table Store can implement row-level
modification via this new API. The ALTER TABLE syntax is enhanced by including the
ability to ADD/MODIFY/DROP columns, primary keys, and watermarks, making it
easier for users to maintain their table schema.</li>
<li><strong>Batch Execution Improvements:</strong> Execution of batch workloads has been
significantly improved in Flink 1.17 in terms of performance, stability and
usability. Performance wise, a 26% TPC-DS improvement on 10T dataset is achieved
with strategy and operator optimizations, such as new join reordering and adaptive
local hash aggregation, Hive aggregate functions improvements, and the
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/ops/batch/batch_shuffle/#hybrid-shuffle">Hybrid Shuffle Mode</a>
enhancements. Stability wise,
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/speculative_execution/">Speculative Execution</a>
now supports all operators, and the
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/elastic_scaling/#adaptive-batch-scheduler">Adaptive Batch Scheduler</a>
is more robust against data skew. Usability wise, the tuning effort required
for batch workloads has been reduced. The is now the default scheduler in
batch mode. The Hybrid Shuffle Mode is compatible with Speculative Execution
and the Adaptive Batch Scheduler, next to various configuration simplifications.</li>
<li><strong>SQL Client/Gateway:</strong> Apache Flink 1.17 introduces the &ldquo;gateway mode&rdquo; for
SQL Client, allowing users to submit SQL queries to a SQL Gateway for enhanced
functionality. Users can use SQL statements to manage job lifecycles,
including displaying job information and stopping running jobs. This provides
a powerful tool for managing Flink jobs.</li>
</ul>
<p>For stream processing, the following features and improvements are realized:</p>
<ul>
<li><strong>Streaming SQL Semantics:</strong> Non-deterministic operations may bring incorrect
results or exceptions which is a challenging topic in streaming SQL. Incorrect
optimization plans and functional issues have been fixed, and the experimental
feature of <a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/dev/table/sql/explain/#explaindetails">PLAN_ADVICE</a>
is introduced to inform of potential correctness risks and optimization
suggestions to SQL users.</li>
<li><strong>Checkpoint Improvements:</strong> The generic incremental checkpoint improvements
enhance the speed and stability of the checkpoint procedure, and the unaligned
checkpoint has improved stability under backpressure and is production-ready
in Flink 1.17. Users can manually trigger checkpoints with self-defined
checkpoint types while a job is running with the newly introduced REST
interface for triggering checkpoints.</li>
<li><strong>Watermark Alignment Enhancement:</strong> Efficient watermark processing directly
affects the execution efficiency of event time applications. In Flink 1.17,
<a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-217%3A&#43;Support&#43;watermark&#43;alignment&#43;of&#43;source&#43;splits">FLIP-217</a>
introduces an improvement to watermark alignment by aligning data emission
across splits within a source operator. This improvement results in more
efficient coordination of watermark progress in the source, which in turn
mitigates excessive buffering by downstream operators and enhances the overall
efficiency of steaming job execution.</li>
<li><strong>StateBackend Upgrade:</strong> The updated version of
<a href="https://github.com/ververica/frocksdb">FRocksDB</a> to 6.20.3-ververica-2.0
brings improvements to RocksDBStateBackend like sharing memory between slots,
and now supports Apple Silicon chipsets like the Mac M1.</li>
</ul>
<h1 id="batch-processing">
Batch processing
<a class="anchor" href="#batch-processing">#</a>
</h1>
<p>As a unified stream and batch data processing engine, Flink stands out
particularly in the field of stream processing. In order to improve its batch
processing capabilities, the community contributors put in a lot of effort into
improving Flink&rsquo;s batch performance and ecosystem in version 1.17. This makes it
easier for users to build a streaming warehouse based on Flink.</p>
<h2 id="speculative-execution">
Speculative Execution
<a class="anchor" href="#speculative-execution">#</a>
</h2>
<p><a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/speculative_execution/">Speculative Execution</a>
for sinks is now supported. Previously, Speculative
Execution was not enabled for sinks to avoid instability or incorrect results.
In Flink 1.17, the context of sinks is improved so that sinks, including <a href="https://github.com/apache/flink/blob/release-1.17/flink-core/src/main/java/org/apache/flink/api/connector/sink2/Sink.java">new
sinks</a>
and <a href="https://github.com/apache/flink/blob/release-1.17/flink-core/src/main/java/org/apache/flink/api/common/io/OutputFormat.java">OutputFormat
sinks</a>,
are aware of the number of attempts. With the number of attempts, sinks are able
to isolate the produced data of different attempts of the same subtask, even if
the attempts are running at the same time. The <em>FinalizeOnMaster</em> interface is
also improved so that OutputFormat sinks can see which attempts are finished and
then properly commit the written data. Once a sink can work well with concurrent
attempts, it can implement the decorative interface
<a href="https://github.com/apache/flink/blob/release-1.17/flink-core/src/main/java/org/apache/flink/api/common/SupportsConcurrentExecutionAttempts.java">SupportsConcurrentExecutionAttempts</a>
so that Speculative Execution is allowed to be performed on it. Some built in
sinks are enabled to do Speculative Execution, including DiscardingSink,
PrintSinkFunction, PrintSink, FileSink, FileSystemOutputFormat and
HiveTableSink.</p>
<p>The slow task detection is improved for Speculative Execution. Previously, it
only considered the execution time of tasks when deciding which tasks are slow.
It now takes the input data volume of tasks into account. Tasks which have a
longer execution time but consume more data may not be considered as slow. This
improvement helps to eliminate the negative impacts from data skew on slow task
detection.</p>
<h2 id="adaptive-batch-scheduler">
Adaptive Batch Scheduler
<a class="anchor" href="#adaptive-batch-scheduler">#</a>
</h2>
<p><a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/elastic_scaling/#adaptive-batch-scheduler">Adaptive Batch Scheduler</a>
is now used for batch jobs by default. This scheduler can automatically decide
a proper parallelism of each job vertex, based on how much data the vertex processes.
It is also the only scheduler which supports speculative execution.</p>
<p>The configuration of Adaptive Batch Scheduler is improved for ease of use. Users
no longer need to explicitly set the global default parallelism to -1 to enable
automatically deciding parallelism. Instead, the global default parallelism, if
set, will be used as the upper bound when deciding the parallelism. The keys of
Adaptive Batch Scheduler configuration options are also renamed to be easier to
understand.</p>
<p>The capabilities of Adaptive Batch Scheduler are also improved. It now supports
evenly distributing data to downstream tasks, based on fine-grained data
distribution information. The limitation that the decided parallelism of
vertices can only be a power of 2 is no longer needed and therefore removed.</p>
<h2 id="hybrid-shuffle-mode">
Hybrid Shuffle Mode
<a class="anchor" href="#hybrid-shuffle-mode">#</a>
</h2>
<p>Various important improvements to the Hybrid Shuffle Mode are available in this
release.</p>
<ul>
<li>Hybrid Shuffle Mode now supports Adaptive Batch Scheduler and Speculative
Execution.</li>
<li>Hybrid Shuffle Mode now supports reusing intermediate data when possible,
which brings significant performance improvements.</li>
<li>The stability is improved to avoid stability issues in large scale production.</li>
</ul>
<p>More details can be found at the
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/ops/batch/batch_shuffle/#hybrid-shuffle">Hybrid-Shuffle</a>
section of the documentation.</p>
<h2 id="tpc-ds-benchmark">
TPC-DS Benchmark
<a class="anchor" href="#tpc-ds-benchmark">#</a>
</h2>
<p>Starting with Flink 1.16, the performance of the Batch engine has continuously
been optimized. In Flink 1.16, dynamic partition pruning was introduced, but not
all TPC-DS queries could be optimized. In Flink 1.17, the algorithm has been
improved, and most of the TPC-DS results are now optimized. In Flink 1.17, a
dynamic programming join-reorder algorithm is introduced, which has a better
working, larger search space compared to the previous algorithm.. The planner
can automatically select the appropriate join-reorder algorithm based on the
number of joins in a query, so that users no longer need to care about the join-reorder
algorithms. (Note: the join-reorder is disabled by default, and you need
to enable it when running TPC-DS.) In the operator layer, dynamic hash local
aggregation strategy is introduced, which can dynamically determine according to
the data distribution whether the local hash aggregation operation is needed to
improve performance. In the runtime layer, some unnecessary virtual function
calls are removed to speed up the execution. To summarize, Flink 1.17 has a 26%
performance improvement compared to Flink 1.16 on a 10T dataset for partitioned
tables.</p>
<center>
<img src="/img/blog/2023-03-23-release-1.17.0/tpc-ds-benchmark.png" style="width:90%;margin:15px">
</center>
<h2 id="sql-client--gateway">
SQL Client / Gateway
<a class="anchor" href="#sql-client--gateway">#</a>
</h2>
<p>Apache Flink 1.17 introduces a new feature called &ldquo;gateway mode&rdquo; for the SQL
Client, which enhances its functionality by allowing it to connect to a remote
gateway and submit SQL queries like it does in embedded mode. This new mode
offers users much more convenience when working with the SQL Gateway.</p>
<p>In addition, the SQL Client/SQL Gateway now provides new support for managing
job lifecycles through SQL statements. Users can use SQL statements to display
all job information stored in the JobManager and stop running jobs using their
unique job IDs. With this new feature, SQL Client/Gateway now has almost the
same functionality as Flink CLI, making it another powerful tool for managing
Flink jobs.</p>
<h2 id="sql-api">
SQL API
<a class="anchor" href="#sql-api">#</a>
</h2>
<p>Row-Level SQL Delete &amp; Update are becoming more and more important in modern big
data workflows. The use cases include deleting a set of rows for regulatory
compliance, updating a set of rows for data correction, etc. Many popular
engines such as Trino or Hive have supported them. In Flink 1.17, the new Delete &amp;
Update API is introduced in Flink, which works in batch mode and is exposed to
connectors. Now external storage systems can implement row-level modification via
this new API. Furthermore, the ALTER TABLE syntax is extended to include the
ability to ADD/MODIFY/DROP columns, primary keys, and watermarks. These enhanced
capabilities provide users with the flexibility to maintain their table schema
metadata according to their needs.</p>
<h2 id="hive-compatibility">
Hive Compatibility
<a class="anchor" href="#hive-compatibility">#</a>
</h2>
<p>Apache Flink 1.17 brings new improvements to the Hive table sink, making it more
efficient than ever before. In previous versions, the Hive table sink only
supported automatic file compaction in streaming mode, but not in batch mode. In
Flink 1.17, the Hive table sink can now automatically compact newly written files
in batch mode as well. This feature can greatly reduce the number of small files.
Also, for using Hive built-in functions via
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/connectors/table/hive/hive_functions/">HiveModule</a>,
Flink introduces several native Hive aggregation functions including
SUM/COUNT/AVG/MIN/MAX to HiveModule. These functions can be executed using the
hash-based aggregation operator and then bring significant performance
improvements.</p>
<h1 id="streaming-processing">
Streaming Processing
<a class="anchor" href="#streaming-processing">#</a>
</h1>
<p>In Flink 1.17, difficult Streaming SQL semantics and correctness issues are
addressed, checkpoint performance is optimized, watermark alignment is enhanced,
Streaming FileSink expands ABFS(Azure Blob Filesystem) support, Calcite and
FRocksDB have been upgraded to newer versions. These improvements further
enhance the capabilities of Flink in the field of stream processing.</p>
<h2 id="streaming-sql-semantics">
Streaming SQL Semantics
<a class="anchor" href="#streaming-sql-semantics">#</a>
</h2>
<p>In terms of correctness and semantic enhancement, Flink 1.17 introduces the
experimental feature
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/dev/table/sql/explain/#explaindetails">PLAN_ADVICE</a>
that detects potential correctness risks and provides optimization suggestions.
For example, if a <a href="https://nightlies.apache.org/flink/flink-docs-master/docs/dev/table/concepts/determinism/#3-determinism-in-streaming-processing">NDU (Non-deterministic
Updates)</a>
issue is detected by EXPLAIN PLAN_ADVICE, the optimizer will append the advice
at the end of the physical plan, the optimizer will then tag the advice id to the
relational node of related operations, and recommend that users update their
configurations accordingly. By providing users with this specific advice, the
optimizer can help them improve the accuracy and reliability of their query
results.</p>
<pre tabindex="0"><code>== Optimized Physical Plan With Advice ==
...
advice[1]: [WARNING] The column(s): day(generated by non-deterministic function: CURRENT_TIMESTAMP ) can not satisfy the determinism requirement for correctly processing update message(&#39;UB&#39;/&#39;UA&#39;/&#39;D&#39; in changelogMode, not &#39;I&#39; only), this usually happens when input node has no upsertKey(upsertKeys=[{}]) or current node outputs non-deterministic update messages. Please consider removing these non-deterministic columns or making them deterministic by using deterministic functions.
</code></pre><p>The PLAN_ADVICE also helps users improve the performance and efficiency of their
queries. For example, when a GroupAggregate operation is detected and can be
optimized to a more efficient local-global aggregation. By providing users with
this specific advice for optimization, the optimizer enables users to easily
improve the performance and efficiency of their queries.</p>
<pre tabindex="0"><code>== Optimized Physical Plan With Advice ==
...
advice[1]: [ADVICE] You might want to enable local-global two-phase optimization by configuring (&#39;table.optimizer.agg-phase-strategy&#39; to &#39;AUTO&#39;).
</code></pre><p>In addition, Flink 1.17 resolved several incorrect plan optimizations which led
to incorrect results reported in
<a href="https://issues.apache.org/jira/browse/FLINK-29849">FLINK-29849</a>,
<a href="https://issues.apache.org/jira/browse/FLINK-30006">FLINK-30006</a>, and
<a href="https://issues.apache.org/jira/browse/FLINK-30841">FLINK-30841</a>.</p>
<h2 id="checkpoint-improvements">
Checkpoint Improvements
<a class="anchor" href="#checkpoint-improvements">#</a>
</h2>
<p>Generic Incremental Checkpoint (GIC) aims to improve the speed and stability of
the checkpoint procedure. Some experimental results in the WordCount case are
shown as below. More details can be found in this <a href="https://www.ververica.com/blog/generic-log-based-incremental-checkpoint">blog
post</a>.</p>
<p>Table1: Benefits after enabling GIC in WordCount case</p>
<center>
<img src="/img/blog/2023-03-23-release-1.17.0/beneifts-after-enabling-gic-in-wordcount-case.png" style="width:90%;margin:15px">
</center>
<p>Table2: Costs after enabling GIC in WordCount case</p>
<center>
<img src="/img/blog/2023-03-23-release-1.17.0/costs-after-enabling-gic-in-wordcount-case.png" style="width:90%;margin:15px">
</center>
<p>Unaligned Checkpoints (UC) greatly increase the completed ratio of checkpoints
under backpressure. The previous UC implementation would write many small files
which may cause high load for the namenode of HDFS. In this release, this problem
is resolved to make UC more usable in the production environment.</p>
<p>In 1.17, a <a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/ops/rest_api/#jobs-jobid-checkpoints-1">REST
API</a>
is provided so that users can manually trigger checkpoints with a self-defined
checkpoint type while a job is running. For example, for a job running with
incremental checkpoint, users can trigger a full checkpoint periodically or
manually to break the incremental checkpoint chain to avoid referring to files
from a long time ago.</p>
<h2 id="watermark-alignment-support">
Watermark Alignment Support
<a class="anchor" href="#watermark-alignment-support">#</a>
</h2>
<p>In earlier versions, <a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-182%3A&#43;Support&#43;watermark&#43;alignment&#43;of&#43;FLIP-27&#43;Sources">FLIP-182</a>
proposed a solution called watermark alignment to tackle the issue of data skew
in event time applications caused by imbalanced sources. However, it had a
limitation that the source parallelism had to match the number of splits. This
was because a source operator with multiple splits might need to buffer a
considerable amount of data if one split emitted data faster than another. To
address this limitation, Flink 1.17 introduced <a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-217%3A&#43;Support&#43;watermark&#43;alignment&#43;of&#43;source&#43;splits">FLIP-217</a>,
which enhances watermark alignment to align data emission across splits within
a source operator while considering watermark boundaries. This enhancement
ensures more coordinated watermark progress in the source, preventing downstream
operators from buffering excessive data and improving the execution efficiency of
streaming jobs.</p>
<h2 id="streaming-filesink-expansion">
Streaming FileSink Expansion
<a class="anchor" href="#streaming-filesink-expansion">#</a>
</h2>
<p>Following the addition of ABFS support, the
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/connectors/datastream/filesystem/#file-sink">FileSink</a>
is now able to function in streaming mode with a total of five different
filesystems: HDFS, S3, OSS, ABFS, and Local. This expansion effectively covers
the majority of main filesystems, providing a comprehensive range of options and
increased versatility for users.</p>
<h2 id="rocksdbstatebackend-upgrade">
RocksDBStateBackend Upgrade
<a class="anchor" href="#rocksdbstatebackend-upgrade">#</a>
</h2>
<p>This release has updated the version of
<a href="https://github.com/ververica/frocksdb">FRocksDB</a> to 6.20.3-ververica-2.0 which
brings improvements for RocksDBStateBackend:</p>
<ol>
<li>Support build FRocksDB Java on Apple Silicon chipsets, such as Mac M1 and M2.</li>
<li>Improve the performance of compaction filter by avoiding expensive ToString()</li>
<li>Upgrade ZLIB version of FRocksDB to avoid memory corruption</li>
<li>Add periodic_compaction_seconds option to RocksJava</li>
</ol>
<p>Please see <a href="https://issues.apache.org/jira/browse/FLINK-30836">FLINK-30836</a> for
more details.</p>
<p>This release also widens the scope of sharing memory between slots to
TaskManager, which can help to increase the memory efficiency if the memory
usage of slots in a TaskManager is uneven. Furthermore, it can reduce the
overall memory consumption at the expense of resource isolation after tuning.
Read more about
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/docs/deployment/config/#state-backend-rocksdb-memory-fixed-per-tm">state.backend.rocksdb.memory.fixed-per-tm</a>
configuration.</p>
<h2 id="calcite-upgrade">
Calcite Upgrade
<a class="anchor" href="#calcite-upgrade">#</a>
</h2>
<p>Flink 1.17 is upgraded to <a href="https://calcite.apache.org/">Calcite</a> version 1.29.0
to improve the performance and efficiency of the Flink SQL system. Flink 1.16
uses Calcite 1.26.0 which has severe issues with RexNode simplification caused
by the SEARCH operator. This leads to wrong data from query optimization as
reported in <a href="https://issues.apache.org/jira/browse/CALCITE-4325">CALCITE-4325</a> and <a href="https://issues.apache.org/jira/browse/CALCITE-4352">
CALCITE-4352</a>. By upgrading
the version of Calcite, Flink can take advantage of its improved
performance and new features in Flink SQL processing. This resolves multiple
bugs and leads to faster query processing times.</p>
<h1 id="others">
Others
<a class="anchor" href="#others">#</a>
</h1>
<h2 id="pyflink">
PyFlink
<a class="anchor" href="#pyflink">#</a>
</h2>
<p>The Flink 1.17 release includes updates to PyFlink, the Python interface for
Apache Flink. Notable improvements include support for Python 3.10 and execution
capabilities on Apple Silicon chipsets, such as the Mac M1 and M2 computers.
Additionally, the release includes minor optimizations that enhance
cross-process communication stability between Java and Python processes, enable
the specification of data types of Python UDFs via strings to improve usability,
and support access to job parameters in Python UDFs. This release focuses on
improving PyFlink&rsquo;s functionality and usability, rather than introducing new
major features. However, these enhancements are expected to improve the user
experience and facilitate efficient data processing.</p>
<h2 id="daily-performance-benchmark">
Daily Performance Benchmark
<a class="anchor" href="#daily-performance-benchmark">#</a>
</h2>
<p>In Flink 1.17, daily performance monitoring has been integrated into the
<a href="https://apache-flink.slack.com/archives/C0471S0DFJ9">#flink-dev-benchmarks Slack channel</a>. This feature is crucial in quickly
identifying regressions and ensuring the quality of the code. Once a regression
is identified through the Slack channel or the <a href="http://codespeed.dak8s.net:8000">speed
center</a>, developers can refer to the guidance
provided in the <a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=115511847">Benchmark&rsquo;s
wiki</a>
to address the issue effectively. This feature helps the community take a
proactive approach in ensuring system performance, resulting in a better product
and increased user satisfaction.</p>
<h2 id="subtask-level-flame-graph">
Subtask Level Flame Graph
<a class="anchor" href="#subtask-level-flame-graph">#</a>
</h2>
<p>Starting with Flink 1.17, Flame Graph provides &ldquo;drill down&rdquo; visualizations to
the task level, which allows users to gain a more detailed understanding of the
performance of their tasks. This feature is a significant improvement over
previous versions of Flame Graph, as it empowers users to select a subtask of
interest and see the corresponding flame graph. By doing so, users can identify
specific areas where their tasks may be experiencing performance issues and take
steps to address them. This can lead to significant improvements in the overall
efficiency and effectiveness of their data processing pipelines.</p>
<center>
<img src="/img/blog/2023-03-23-release-1.17.0/subtask-level-flame-graph.png" style="width:90%;margin:15px">
</center>
<h2 id="generalized-delegation-token-support">
Generalized Delegation Token Support
<a class="anchor" href="#generalized-delegation-token-support">#</a>
</h2>
<p>Previously, Flink supported Kerberos authentication and Hadoop based tokens.
With
<a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-272%3A&#43;Generalized&#43;delegation&#43;token&#43;support">FLIP-272</a>
being finalized, Flink’s delegation token framework is generalized to make it
authentication protocol agnostic. This will allow contributors in the future
to add support for non-Hadoop compliant frameworks where the authentication
protocol is not based on Kerberos. Additionally,
<a href="https://cwiki.apache.org/confluence/display/FLINK/FLIP-211%3A&#43;Kerberos&#43;delegation&#43;token&#43;framework">FLIP-211</a>
was implemented which improves Flink&rsquo;s interactions with Kerberos: It reduces
the number of requests that are necessary to exchange delegation tokens in Flink.</p>
<h1 id="upgrade-notes">
Upgrade Notes
<a class="anchor" href="#upgrade-notes">#</a>
</h1>
<p>The Flink community tries to ensure that upgrades are as seamless as possible.
However, certain changes may require users to make adjustments to certain parts
of the program when upgrading to version 1.17. Please refer to the
<a href="https://nightlies.apache.org/flink/flink-docs-release-1.17/release-notes/flink-1.17/">release notes</a>
for a comprehensive list of adjustments to make and issues to check during the
upgrading process.</p>
<h1 id="list-of-contributors">
List of Contributors
<a class="anchor" href="#list-of-contributors">#</a>
</h1>
<p>The Apache Flink community would like to express gratitude to all the
contributors who made this release possible:</p>
<p>Ahmed Hamdy, Aitozi, Aleksandr Pilipenko, Alexander Fedulov, Alexander Preuß,
Anton Kalashnikov, Arvid Heise, Bo Cui, Brayno, Carlos Castro, ChangZhuo Chen
(陳昌倬), Chen Qin, Chesnay Schepler, Clemens, ConradJam, Danny Cranmer, Dawid
Wysakowicz, Dian Fu, Dong Lin, Dongjoon Hyun, Elphas Toringepi, Eric Xiao,
Fabian Paul, Ferenc Csaky, Gabor Somogyi, Gen Luo, Gunnar Morling, Gyula Fora,
Hangxiang Yu, Hong Liang Teoh, HuangXingBo, Jacky Lau, Jane Chan, Jark Wu,
Jiale, Jin, Jing Ge, Jinzhong Li, Joao Boto, John Roesler, Jun He, JunRuiLee,
Junrui Lee, Juntao Hu, Krzysztof Chmielewski, Leonard Xu, Licho, Lijie Wang,
Mark Canlas, Martijn Visser, MartijnVisser, Martin Liu, Marton Balassi, Mason
Chen, Matt, Matthias Pohl, Maximilian Michels, Mingliang Liu, Mulavar, Nico
Kruber, Noah, Paul Lin, Peter Huang, Piotr Nowojski, Qing Lim, QingWei,
Qingsheng Ren, Rakesh, Ran Tao, Robert Metzger, Roc Marshal, Roman Khachatryan,
Ron, Rui Fan, Ryan Skraba, Salva Alcántara, Samrat, Samrat Deb, Samrat002,
Sebastian Mattheis, Sergey Nuyanzin, Seth Saperstein, Shengkai, Shuiqiang Chen,
Smirnov Alexander, Sriram Ganesh, Steven van Rossum, Tartarus0zm, Timo Walther,
Venkata krishnan Sowrirajan, Wei Zhong, Weihua Hu, Weijie Guo, Xianxun Ye,
Xintong Song, Yash Mayya, YasuoStudyJava, Yu Chen, Yubin Li, Yufan Sheng, Yun
Gao, Yun Tang, Yuxin Tan, Zakelly, Zhanghao Chen, Zhenqiu Huang, Zhu Zhu,
ZmmBigdata, bzhaoopenstack, chengshuo.cs, chenxujun, chenyuzhi, chenyuzhi459,
chenzihao, dependabot[bot], fanrui, fengli, frankeshi, fredia, godfreyhe,
gongzhongqiang, harker2015, hehuiyuan, hiscat, huangxingbo, hunter-cloud09,
ifndef-SleePy, jeremyber-aws, jiangjiguang, jingge, kevin.cyj, kristoffSC, kurt,
laughingman7743, libowen, lincoln lee, lincoln.lil, liujiangang, liujingmao,
liuyongvs, liuzhuang2017, luoyuxia, mas-chen, moqimoqidea, muggleChen, noelo,
ouyangwulin, ramkrish86, saikikun, sammieliu, shihong90, shuiqiangchen,
snuyanzin, sunxia, sxnan, tison, todd5167, tonyzhu918, wangfeifan, wenbingshen,
xuyang, yiksanchan, yunfengzhou-hub, yunhong, yuxia Luo, yuzelin, zhangjingcun,
zhangmang, zhengyunhong.zyh, zhouli, zoucao, 沈嘉琦</p>
</p>
</article>
<div class="edit-this-page">
<p>
<a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Translation+Specifications">Want to contribute translation?</a>
</p>
<p>
<a href="//github.com/apache/flink-web/edit/asf-site/docs/content/posts/2023-03-23-release-1.17.0.md">
Edit This Page<i class="fa fa-edit fa-fw"></i>
</a>
</p>
</div>
</section>
<aside class="book-toc">
<nav id="TableOfContents"><h3>On This Page <a href="javascript:void(0)" class="toc" onclick="collapseToc()"><i class="fa fa-times" aria-hidden="true"></i></a></h3>
<ul>
<li><a href="#towards-streaming-warehouses">Towards Streaming Warehouses</a></li>
<li><a href="#batch-processing">Batch processing</a>
<ul>
<li><a href="#speculative-execution">Speculative Execution</a></li>
<li><a href="#adaptive-batch-scheduler">Adaptive Batch Scheduler</a></li>
<li><a href="#hybrid-shuffle-mode">Hybrid Shuffle Mode</a></li>
<li><a href="#tpc-ds-benchmark">TPC-DS Benchmark</a></li>
<li><a href="#sql-client--gateway">SQL Client / Gateway</a></li>
<li><a href="#sql-api">SQL API</a></li>
<li><a href="#hive-compatibility">Hive Compatibility</a></li>
</ul>
</li>
<li><a href="#streaming-processing">Streaming Processing</a>
<ul>
<li><a href="#streaming-sql-semantics">Streaming SQL Semantics</a></li>
<li><a href="#checkpoint-improvements">Checkpoint Improvements</a></li>
<li><a href="#watermark-alignment-support">Watermark Alignment Support</a></li>
<li><a href="#streaming-filesink-expansion">Streaming FileSink Expansion</a></li>
<li><a href="#rocksdbstatebackend-upgrade">RocksDBStateBackend Upgrade</a></li>
<li><a href="#calcite-upgrade">Calcite Upgrade</a></li>
</ul>
</li>
<li><a href="#others">Others</a>
<ul>
<li><a href="#pyflink">PyFlink</a></li>
<li><a href="#daily-performance-benchmark">Daily Performance Benchmark</a></li>
<li><a href="#subtask-level-flame-graph">Subtask Level Flame Graph</a></li>
<li><a href="#generalized-delegation-token-support">Generalized Delegation Token Support</a></li>
</ul>
</li>
<li><a href="#upgrade-notes">Upgrade Notes</a></li>
<li><a href="#list-of-contributors">List of Contributors</a></li>
</ul>
</nav>
</aside>
<aside class="expand-toc hidden">
<a class="toc" onclick="expandToc()" href="javascript:void(0)">
<i class="fa fa-bars" aria-hidden="true"></i>
</a>
</aside>
</main>
<footer>
<div class="separator"></div>
<div class="panels">
<div class="wrapper">
<div class="panel">
<ul>
<li>
<a href="https://flink-packages.org/">flink-packages.org</a>
</li>
<li>
<a href="https://www.apache.org/">Apache Software Foundation</a>
</li>
<li>
<a href="https://www.apache.org/licenses/">License</a>
</li>
<li>
<a href="/zh/">
<i class="fa fa-globe" aria-hidden="true"></i>&nbsp;中文版
</a>
</li>
</ul>
</div>
<div class="panel">
<ul>
<li>
<a href="/what-is-flink/security">Security</a-->
</li>
<li>
<a href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
</li>
<li>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</li>
</ul>
</div>
<div class="panel icons">
<div>
<a href="/posts">
<div class="icon flink-blog-icon"></div>
<span>Flink blog</span>
</a>
</div>
<div>
<a href="https://github.com/apache/flink">
<div class="icon flink-github-icon"></div>
<span>Github</span>
</a>
</div>
<div>
<a href="https://twitter.com/apacheflink">
<div class="icon flink-twitter-icon"></div>
<span>Twitter</span>
</a>
</div>
</div>
</div>
</div>
<hr/>
<div class="container disclaimer">
<p>The contents of this website are © 2024 Apache Software Foundation under the terms of the Apache License v2. Apache Flink, Flink, and the Flink logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</footer>
</body>
</html>