blob: dfe237f3b62b33dafc09ba510621ead60dfdee2b [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
Spark Release 3.4.0 | Apache Spark
</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="/css/custom.css" rel="stylesheet">
<!-- Code highlighter CSS -->
<link href="/css/pygments-default.css" rel="stylesheet">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4" style="background: #1D6890;">
<a class="navbar-brand" href="/">
<img src="/images/spark-logo-rev.svg" alt="" width="141" height="72">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarContent"
aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse col-md-12 col-lg-auto pt-4" id="navbarContent">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/downloads.html">Download</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="libraries" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Libraries
</a>
<ul class="dropdown-menu" aria-labelledby="libraries">
<li><a class="dropdown-item" href="/sql/">SQL and DataFrames</a></li>
<li><a class="dropdown-item" href="/spark-connect/">Spark Connect</a></li>
<li><a class="dropdown-item" href="/streaming/">Spark Streaming</a></li>
<li><a class="dropdown-item" href="/pandas-on-spark/">pandas on Spark</a></li>
<li><a class="dropdown-item" href="/mllib/">MLlib (machine learning)</a></li>
<li><a class="dropdown-item" href="/graphx/">GraphX (graph)</a></li>
<li>
<hr class="dropdown-divider">
</li>
<li><a class="dropdown-item" href="/third-party-projects.html">Third-Party Projects</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="documentation" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Documentation
</a>
<ul class="dropdown-menu" aria-labelledby="documentation">
<li><a class="dropdown-item" href="/docs/latest/">Latest Release</a></li>
<li><a class="dropdown-item" href="/documentation.html">Older Versions and Other Resources</a></li>
<li><a class="dropdown-item" href="/faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/examples.html">Examples</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="community" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Community
</a>
<ul class="dropdown-menu" aria-labelledby="community">
<li><a class="dropdown-item" href="/community.html">Mailing Lists &amp; Resources</a></li>
<li><a class="dropdown-item" href="/contributing.html">Contributing to Spark</a></li>
<li><a class="dropdown-item" href="/improvement-proposals.html">Improvement Proposals (SPIP)</a>
</li>
<li><a class="dropdown-item" href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a>
</li>
<li><a class="dropdown-item" href="/powered-by.html">Powered By</a></li>
<li><a class="dropdown-item" href="/committers.html">Project Committers</a></li>
<li><a class="dropdown-item" href="/history.html">Project History</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="developers" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Developers
</a>
<ul class="dropdown-menu" aria-labelledby="developers">
<li><a class="dropdown-item" href="/developer-tools.html">Useful Developer Tools</a></li>
<li><a class="dropdown-item" href="/versioning-policy.html">Versioning Policy</a></li>
<li><a class="dropdown-item" href="/release-process.html">Release Process</a></li>
<li><a class="dropdown-item" href="/security.html">Security</a></li>
</ul>
</li>
</ul>
<ul class="navbar-nav ml-auto">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="apacheFoundation" role="button"
data-bs-toggle="dropdown" aria-expanded="false">
Apache Software Foundation
</a>
<ul class="dropdown-menu" aria-labelledby="apacheFoundation">
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Homepage</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/">License</a></li>
<li><a class="dropdown-item"
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/events/current-event">Event</a></li>
</ul>
</li>
</ul>
</div>
</nav>
<div class="container">
<div class="row mt-4">
<div class="col-12 col-md-9">
<h2>Spark Release 3.4.0</h2>
<p>Apache Spark 3.4.0 is the fifth release of the 3.x line. With tremendous contribution from the open-source community, this release managed to resolve in excess of 2,600 Jira tickets.</p>
<p>This release introduces Python client for Spark Connect, augments Structured Streaming with async progress tracking and Python arbitrary stateful processing, increases Pandas API coverage and provides NumPy input support, simplifies the migration from traditional data warehouses by improving ANSI compliance and implementing dozens of new built-in functions, and boosts development productivity and debuggability with memory profiling.</p>
<p>To download Apache Spark 3.4.0, visit the <a href="https://spark.apache.org/downloads.html">downloads</a> page. You can consult JIRA for the <a href="https://s.apache.org/spark-3.4.0">detailed changes</a>. We have curated a list of high level changes here, grouped by major modules.</p>
<h3 id="highlight">Highlight</h3>
<ul>
<li>Python client for Spark Connect (<a href="https://issues.apache.org/jira/browse/SPARK-39375">SPARK-39375</a>)</li>
<li>Implement support for DEFAULT values for columns in tables (<a href="https://issues.apache.org/jira/browse/SPARK-38334">SPARK-38334</a>)</li>
<li>Support TIMESTAMP WITHOUT TIMEZONE data type (<a href="https://issues.apache.org/jira/browse/SPARK-35662">SPARK-35662</a>)</li>
<li>Support &#8220;Lateral Column Alias References&#8221; (<a href="https://issues.apache.org/jira/browse/SPARK-27561">SPARK-27561</a>)</li>
<li>Harden SQLSTATE usage for error classes (<a href="https://issues.apache.org/jira/browse/SPARK-41994">SPARK-41994</a>)</li>
<li>Enable Bloom filter Joins by default (<a href="https://issues.apache.org/jira/browse/SPARK-38841">SPARK-38841</a>)</li>
<li>Better Spark UI scalability and Driver stability for large applications (<a href="https://issues.apache.org/jira/browse/SPARK-41053">SPARK-41053</a>)</li>
<li>Async Progress Tracking in Structured Streaming (<a href="https://issues.apache.org/jira/browse/SPARK-39591">SPARK-39591</a>)</li>
<li>Python Arbitrary Stateful Processing in Structured Streaming (<a href="https://issues.apache.org/jira/browse/SPARK-40434">SPARK-40434</a>)</li>
<li>Pandas API coverage improvements (<a href="https://issues.apache.org/jira/browse/SPARK-42882">SPARK-42882</a>) and NumPy input support in PySpark (<a href="https://issues.apache.org/jira/browse/SPARK-39405">SPARK-39405</a>)</li>
<li>Provide a memory profiler for PySpark user-defined functions (<a href="https://issues.apache.org/jira/browse/SPARK-40281">SPARK-40281</a>)</li>
<li>Implement PyTorch Distributor (<a href="https://issues.apache.org/jira/browse/SPARK-41589">SPARK-41589</a>)</li>
<li>Publish SBOM artifacts (<a href="https://issues.apache.org/jira/browse/SPARK-41893">SPARK-41893</a>)</li>
<li>Support IPv6-only environment (<a href="https://issues.apache.org/jira/browse/SPARK-39457">SPARK-39457</a>)</li>
<li>Customized K8s Scheduler (Apache YuniKorn and Volcano) GA (<a href="https://issues.apache.org/jira/browse/SPARK-42802">SPARK-42802</a>)</li>
</ul>
<h3 id="spark-sql">Spark SQL</h3>
<h4 id="features">Features</h4>
<ul>
<li>Implement support for DEFAULT values for columns in tables (<a href="https://issues.apache.org/jira/browse/SPARK-38334">SPARK-38334</a>)</li>
<li>Add Dataset.as(StructType) (<a href="https://issues.apache.org/jira/browse/SPARK-39625">SPARK-39625</a>)</li>
<li>Support parameterized SQL (<a href="https://issues.apache.org/jira/browse/SPARK-41271">SPARK-41271</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42702">SPARK-42702</a>)</li>
<li>Add unpivot / melt (<a href="https://issues.apache.org/jira/browse/SPARK-38864">SPARK-38864</a>, <a href="https://issues.apache.org/jira/browse/SPARK-39876">SPARK-39876</a>)</li>
<li>Support &#8220;lateral column alias references&#8221; (<a href="https://issues.apache.org/jira/browse/SPARK-27561">SPARK-27561</a>)</li>
<li>Support result offset clause (<a href="https://issues.apache.org/jira/browse/SPARK-28330">SPARK-28330</a>, <a href="https://issues.apache.org/jira/browse/SPARK-39159">SPARK-39159</a>)</li>
<li>Support Timestamp without time zone data type (<a href="https://issues.apache.org/jira/browse/SPARK-35662">SPARK-35662</a>)</li>
<li>Support scalar subquery in time travel (<a href="https://issues.apache.org/jira/browse/SPARK-39306">SPARK-39306</a>)</li>
<li>Make Catalog API be compatible with 3-layer-namespace (<a href="https://issues.apache.org/jira/browse/SPARK-39235">SPARK-39235</a>)</li>
<li>Support timestamp in seconds for TimeTravel using Dataframe options (<a href="https://issues.apache.org/jira/browse/SPARK-39633">SPARK-39633</a>)</li>
<li>Add SparkSession.config(Map) (<a href="https://issues.apache.org/jira/browse/SPARK-40163">SPARK-40163</a>)</li>
<li>Support changing session catalog&#8217;s default database (<a href="https://issues.apache.org/jira/browse/SPARK-35242">SPARK-35242</a>)</li>
<li>Protobuf support for Spark - from_protobuf AND to_protobuf (<a href="https://issues.apache.org/jira/browse/SPARK-40654">SPARK-40654</a>)</li>
<li>Add WHEN NOT MATCHED BY SOURCE clause to MERGE INTO (<a href="https://issues.apache.org/jira/browse/SPARK-40921">SPARK-40921</a>)</li>
<li>Relax ordering constraint for CREATE TABLE column options (<a href="https://issues.apache.org/jira/browse/SPARK-40944">SPARK-40944</a>)</li>
<li>SQL Equivalent for Dataframe overwrite command (<a href="https://issues.apache.org/jira/browse/SPARK-40956">SPARK-40956</a>)</li>
<li>Support Generate with no required child output to host outer references (<a href="https://issues.apache.org/jira/browse/SPARK-41441">SPARK-41441</a>)</li>
<li>ORDER BY ALL (<a href="https://issues.apache.org/jira/browse/SPARK-41637">SPARK-41637</a>)</li>
<li>GROUP BY ALL (<a href="https://issues.apache.org/jira/browse/SPARK-41635">SPARK-41635</a>)</li>
<li>Add flatMapSortedGroups and cogroupSorted (<a href="https://issues.apache.org/jira/browse/SPARK-38591">SPARK-38591</a>)</li>
<li>Support subqueries with correlated non-equality predicates (<a href="https://issues.apache.org/jira/browse/SPARK-36114">SPARK-36114</a>)</li>
<li>Support subqueries with correlation through UNION/INTERSECT/EXCEPT (<a href="https://issues.apache.org/jira/browse/SPARK-36124">SPARK-36124</a>)</li>
</ul>
<h4 id="ansi-compliance">ANSI Compliance</h4>
<ul>
<li>ANSI SQL mode: always return null on invalid access to map column (<a href="https://issues.apache.org/jira/browse/SPARK-40066">SPARK-40066</a>)</li>
<li>Support double quoted identifiers (<a href="https://issues.apache.org/jira/browse/SPARK-40585">SPARK-40585</a>)</li>
<li>ANSI SQL mode: Round/Bround should return an error on integer overflow (<a href="https://issues.apache.org/jira/browse/SPARK-42045">SPARK-42045</a>)</li>
<li>Support casting of integrals to ANSI intervals (<a href="https://issues.apache.org/jira/browse/SPARK-40008">SPARK-40008</a>)</li>
<li>Support cast of decimals to ANSI intervals (<a href="https://issues.apache.org/jira/browse/SPARK-40014">SPARK-40014</a>)</li>
<li>Return wider ANSI interval types from the percentile functions (<a href="https://issues.apache.org/jira/browse/SPARK-40151">SPARK-40151</a>)</li>
<li>Support cast of ANSI intervals to decimals (<a href="https://issues.apache.org/jira/browse/SPARK-39470">SPARK-39470</a>)</li>
<li>Support casting intervals to integrals in ANSI mode (<a href="https://issues.apache.org/jira/browse/SPARK-39451">SPARK-39451</a>)</li>
<li>Harden SQLSTATE usage for error classes (<a href="https://issues.apache.org/jira/browse/SPARK-41994">SPARK-41994</a>)</li>
</ul>
<h4 id="functions">Functions</h4>
<ul>
<li>Support table-valued generator functions in the FROM clause (<a href="https://issues.apache.org/jira/browse/SPARK-41594">SPARK-41594</a>)</li>
<li>Support ANSI Aggregate Function: REGR_SXY (<a href="https://issues.apache.org/jira/browse/SPARK-37681">SPARK-37681</a>)</li>
<li>Support ANSI Aggregate Function: REGR_R2 (<a href="https://issues.apache.org/jira/browse/SPARK-37641">SPARK-37641</a>)</li>
<li>Support ANSI Aggregate Function: REGR_SXX (<a href="https://issues.apache.org/jira/browse/SPARK-37672">SPARK-37672</a>)</li>
<li>Support ANSI Aggregate Function: REGR_SYY (<a href="https://issues.apache.org/jira/browse/SPARK-37702">SPARK-37702</a>)</li>
<li>Support ANSI Aggregate Function: REGR_SLOPE (<a href="https://issues.apache.org/jira/browse/SPARK-39230">SPARK-39230</a>)</li>
<li>Support ANSI Aggregate Function: REGR_INTERCEPT (<a href="https://issues.apache.org/jira/browse/SPARK-37623">SPARK-37623</a>)</li>
<li>Support ANSI aggregation function PERCENTILE_CONT as window function (<a href="https://issues.apache.org/jira/browse/SPARK-38219">SPARK-38219</a>)</li>
<li>Support ANSI Aggregation Function: PERCENTILE_DISC (<a href="https://issues.apache.org/jira/browse/SPARK-37691">SPARK-37691</a>)</li>
<li>Support SPLIT_PART function (<a href="https://issues.apache.org/jira/browse/SPARK-38063">SPARK-38063</a>)</li>
<li>Support TRY_AVG function (<a href="https://issues.apache.org/jira/browse/SPARK-38589">SPARK-38589</a>)</li>
<li>Support TRY_TO_BINARY function (<a href="https://issues.apache.org/jira/browse/SPARK-38590">SPARK-38590</a>)</li>
<li>Support the TO_NUMBER and TRY_TO_NUMBER SQL functions according to a new specification (<a href="https://issues.apache.org/jira/browse/SPARK-38796">SPARK-38796</a>)</li>
<li>Support ANSI general value specification and function - USER (<a href="https://issues.apache.org/jira/browse/SPARK-39138">SPARK-39138</a>)</li>
<li>Support TO_CHAR and TRY_TO_CHAR functions to format Decimal values as strings (<a href="https://issues.apache.org/jira/browse/SPARK-28516">SPARK-28516</a>)</li>
<li>Support ANY_VALUE aggregate function (<a href="https://issues.apache.org/jira/browse/SPARK-39213">SPARK-39213</a>)</li>
<li>Support EQUAL_NUL function (<a href="https://issues.apache.org/jira/browse/SPARK-39305">SPARK-39305</a>)</li>
<li>Support aggregate function MEDIAN (<a href="https://issues.apache.org/jira/browse/SPARK-39320">SPARK-39320</a>)</li>
<li>Support REGEXP_COUNT function (<a href="https://issues.apache.org/jira/browse/SPARK-39618">SPARK-39618</a>)</li>
<li>Support REGEXP_INSTR function (<a href="https://issues.apache.org/jira/browse/SPARK-39744">SPARK-39744</a>)</li>
<li>Support REGEXP_SUBSTR function (<a href="https://issues.apache.org/jira/browse/SPARK-39695">SPARK-39695</a>)</li>
<li>Support UNPIVOT function (<a href="https://issues.apache.org/jira/browse/SPARK-39876">SPARK-39876</a>)</li>
<li>Support TRY_TO_TIMESTAMP function (<a href="https://issues.apache.org/jira/browse/SPARK-39795">SPARK-39795</a>)</li>
<li>Support url encode/decode as built-in function and tidy up url-related functions (<a href="https://issues.apache.org/jira/browse/SPARK-39741">SPARK-39741</a>)</li>
<li>Support aggregate function MODE (<a href="https://issues.apache.org/jira/browse/SPARK-39808">SPARK-39808</a>)</li>
<li>Support GET function (<a href="https://issues.apache.org/jira/browse/SPARK-40109">SPARK-40109</a>)</li>
<li>Add function aliases: LEN, DATEPART, DATEADD, DATE_DIFF, CURDATE (<a href="https://issues.apache.org/jira/browse/SPARK-40352">SPARK-40352</a>)</li>
<li>Improve the TO_BINARY function (<a href="https://issues.apache.org/jira/browse/SPARK-40112">SPARK-40112</a>)</li>
<li>Support CURRENT_SCHEMA (<a href="https://issues.apache.org/jira/browse/SPARK-41323">SPARK-41323</a>)</li>
<li>Support data masking built-in function MASK (<a href="https://issues.apache.org/jira/browse/SPARK-40687">SPARK-40687</a>)</li>
<li>Support high-order function: ARRAY_COMPACT (<a href="https://issues.apache.org/jira/browse/SPARK-41235">SPARK-41235</a>)</li>
<li>Support ARRAY_APPEND function (<a href="https://issues.apache.org/jira/browse/SPARK-41232">SPARK-41232</a>)</li>
<li>Support ARRAY_INSERT function (<a href="https://issues.apache.org/jira/browse/SPARK-41234">SPARK-41234</a>)</li>
<li>Support LUHN_CHECK function (<a href="https://issues.apache.org/jira/browse/SPARK-42191">SPARK-42191</a>)</li>
<li>Support ARRAY_SORT(column, comparator) (<a href="https://issues.apache.org/jira/browse/SPARK-39925">SPARK-39925</a>)</li>
</ul>
<h4 id="data-sources">Data Sources</h4>
<ul>
<li>Support Column Stats in DS v2 (<a href="https://issues.apache.org/jira/browse/SPARK-41378">SPARK-41378</a>)</li>
<li>Storage Partitioned Join (SPJ) in DS v2 (<a href="https://issues.apache.org/jira/browse/SPARK-37375">SPARK-37375</a>)</li>
<li>Row-level operations in DS v2 (<a href="https://issues.apache.org/jira/browse/SPARK-35801">SPARK-35801</a>)</li>
<li>Add SupportsReportOrdering mix in interface for DS v2 Scan (<a href="https://issues.apache.org/jira/browse/SPARK-38647">SPARK-38647</a>)</li>
<li>Infer DATE type for CSV schema inference (<a href="https://issues.apache.org/jira/browse/SPARK-39469">SPARK-39469</a>)</li>
<li>Support driver metrics in DS v2 custom metric API (<a href="https://issues.apache.org/jira/browse/SPARK-39635">SPARK-39635</a>)</li>
<li>Distribution and ordering support DS v2 function in writing (<a href="https://issues.apache.org/jira/browse/SPARK-39607">SPARK-39607</a>)</li>
<li>StringEndsWith/Contains support push down to Parquet so that we can leverage dictionary filter (<a href="https://issues.apache.org/jira/browse/SPARK-39002">SPARK-39002</a>)</li>
<li>Support UDT in Spark Parquet vectorized reader (<a href="https://issues.apache.org/jira/browse/SPARK-39086">SPARK-39086</a>)</li>
<li>Extend METADATA column to support row indexes for Parquet files (<a href="https://issues.apache.org/jira/browse/SPARK-37980">SPARK-37980</a>)</li>
<li>Support reading parquet FIXED_LEN_BYTE_ARRAY type (<a href="https://issues.apache.org/jira/browse/SPARK-41096">SPARK-41096</a>)</li>
<li>Optimize the order of filtering predicates (<a href="https://issues.apache.org/jira/browse/SPARK-40045">SPARK-40045</a>)</li>
<li>Support CTE and temp table queries with MSSQL JDBC (<a href="https://issues.apache.org/jira/browse/SPARK-37259">SPARK-37259</a>)</li>
<li>Support ignoreCorruptFiles and ignoreMissingFiles in Data Source options (<a href="https://issues.apache.org/jira/browse/SPARK-38767">SPARK-38767</a>)</li>
<li>Pull out v1 write to WriteFiles (<a href="https://issues.apache.org/jira/browse/SPARK-41407">SPARK-41407</a>)</li>
<li>Add read-side char padding to cover external data files (<a href="https://issues.apache.org/jira/browse/SPARK-40697">SPARK-40697</a>)</li>
</ul>
<h4 id="query-optimization">Query Optimization</h4>
<ul>
<li>Merge non-correlated scalar subqueries (<a href="https://issues.apache.org/jira/browse/SPARK-34079">SPARK-34079</a>)</li>
<li>Enable Bloom filter Joins by default (<a href="https://issues.apache.org/jira/browse/SPARK-38841">SPARK-38841</a>)</li>
<li>Remove unnecessary distinct in aggregate expression by distinctKeys (<a href="https://issues.apache.org/jira/browse/SPARK-38832">SPARK-38832</a>)</li>
<li>Support predicate pushdown and column pruning for de-duped CTEs (<a href="https://issues.apache.org/jira/browse/SPARK-37670">SPARK-37670</a>)</li>
<li>Remove outer join if aggregate functions are duplicate agnostic on streamed side (<a href="https://issues.apache.org/jira/browse/SPARK-38886">SPARK-38886</a>)</li>
<li>Remove left/right outer join if only left/right side columns are selected and the join keys on the other side are unique (<a href="https://issues.apache.org/jira/browse/SPARK-39172">SPARK-39172</a>)</li>
<li>Optimize global Sort to RepartitionByExpression (<a href="https://issues.apache.org/jira/browse/SPARK-39911">SPARK-39911</a>)</li>
<li>Optimize TransposeWindow rule (<a href="https://issues.apache.org/jira/browse/SPARK-38034">SPARK-38034</a>)</li>
<li>Enhance EliminateSorts to support removing sorts via LocalLimit (<a href="https://issues.apache.org/jira/browse/SPARK-40050">SPARK-40050</a>)</li>
<li>Push local limit to both sides if join condition is empty (<a href="https://issues.apache.org/jira/browse/SPARK-40040">SPARK-40040</a>)</li>
<li>Add PushProjectionThroughLimit for Optimizer (<a href="https://issues.apache.org/jira/browse/SPARK-40501">SPARK-40501</a>)</li>
<li>Support PIVOT/UNPIVOT with join children (<a href="https://issues.apache.org/jira/browse/SPARK-41195">SPARK-41195</a>)</li>
<li>Support column pruning with multiple nondeterministic Filters (<a href="https://issues.apache.org/jira/browse/SPARK-41017">SPARK-41017</a>)</li>
<li>Improve output partitioning and ordering with AQE cache (<a href="https://issues.apache.org/jira/browse/SPARK-41048">SPARK-41048</a>)</li>
<li>Improve multi like performance by creating a balanced expression tree predicate (<a href="https://issues.apache.org/jira/browse/SPARK-41167">SPARK-41167</a>)</li>
<li>Remove the Sort if it is the child of RepartitionByExpression (<a href="https://issues.apache.org/jira/browse/SPARK-36703">SPARK-36703</a>)</li>
<li>Use available column statistics from completed query stages (<a href="https://issues.apache.org/jira/browse/SPARK-39991">SPARK-39991</a>)</li>
<li>Reuse expressions in WindowSpecDefinition (<a href="https://issues.apache.org/jira/browse/SPARK-41805">SPARK-41805</a>)</li>
<li>Improve AliasAwareOutputPartitioning and AliasAwareQueryOutputOrdering to take all aliases into account (<a href="https://issues.apache.org/jira/browse/SPARK-40086">SPARK-40086</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42049">SPARK-42049</a>)</li>
<li>Push down limit through Python UDFs (<a href="https://issues.apache.org/jira/browse/SPARK-42115">SPARK-42115</a>)</li>
</ul>
<h4 id="code-generation-and-query-execution">Code Generation and Query Execution</h4>
<ul>
<li>Make defaultJoin in BroadcastNestedLoopJoinExec running in parallel (<a href="https://issues.apache.org/jira/browse/SPARK-40487">SPARK-40487</a>)</li>
<li>Codegen Support for HiveGenericUDF (<a href="https://issues.apache.org/jira/browse/SPARK-42051">SPARK-42051</a>)</li>
<li>Introduce shuffle on SinglePartition (<a href="https://issues.apache.org/jira/browse/SPARK-41986">SPARK-41986</a>)</li>
<li>Makes DPP support the pruning side has Union (<a href="https://issues.apache.org/jira/browse/SPARK-39217">SPARK-39217</a>)</li>
</ul>
<h4 id="other-notable-changes">Other Notable Changes</h4>
<ul>
<li>Support Auto Partition Statistics Collection (<a href="https://issues.apache.org/jira/browse/SPARK-38573">SPARK-38573</a>)</li>
<li>Format error messages in the Thrift Server (<a href="https://issues.apache.org/jira/browse/SPARK-40098">SPARK-40098</a>)</li>
<li>Add an extension API to do plan normalization for caching (<a href="https://issues.apache.org/jira/browse/SPARK-41183">SPARK-41183</a>)</li>
<li>Refactor Spark types by introducing physical types (<a href="https://issues.apache.org/jira/browse/SPARK-41226">SPARK-41226</a>)</li>
<li>OneOf field support and recursion checks (<a href="https://issues.apache.org/jira/browse/SPARK-41396">SPARK-41396</a>)</li>
<li>Centralize the column resolution logic (<a href="https://issues.apache.org/jira/browse/SPARK-41405">SPARK-41405</a>)</li>
<li>Improve the plan change validation (<a href="https://issues.apache.org/jira/browse/SPARK-42081">SPARK-42081</a>)</li>
<li>Introduce SparkPath for typesafety (<a href="https://issues.apache.org/jira/browse/SPARK-41970">SPARK-41970</a>)</li>
<li>Throw Exception for db_name.view_name when creating temp view by Dataset API (<a href="https://issues.apache.org/jira/browse/SPARK-41090">SPARK-41090</a>)</li>
<li>Change the default value of argument of Mask function from -1 to NULL (<a href="https://issues.apache.org/jira/browse/SPARK-42070">SPARK-42070</a>)</li>
</ul>
<h3 id="spark-core">Spark Core</h3>
<h4 id="decommission">Decommission</h4>
<ul>
<li>Avoid unnecessary task rerun on decommissioned executor lost if shuffle data migrated (<a href="https://issues.apache.org/jira/browse/SPARK-41469">SPARK-41469</a>)</li>
<li>Ignore stage fetch failure caused by decommissioned executor (<a href="https://issues.apache.org/jira/browse/SPARK-40481">SPARK-40481</a>)</li>
<li>
<table>
<tbody>
<tr>
<td>Enable spark.storage.decommission.(rdd</td>
<td>shuffle)Blocks.enabled by default (<a href="https://issues.apache.org/jira/browse/SPARK-40198">SPARK-40198</a>)</td>
</tr>
</tbody>
</table>
</li>
<li>Add support for YARN decommissioning when ESS is disabled (<a href="https://issues.apache.org/jira/browse/SPARK-30835">SPARK-30835</a>)</li>
</ul>
<h4 id="scheduler">Scheduler</h4>
<ul>
<li>Make stage scheduling support local-cluster mode (<a href="https://issues.apache.org/jira/browse/SPARK-41949">SPARK-41949</a>)</li>
<li>Support stage level task resource profile for standalone cluster when dynamic allocation disabled (<a href="https://issues.apache.org/jira/browse/SPARK-39853">SPARK-39853</a>)</li>
<li>Delay onDisconnected to enable Driver receives ExecutorExitCode (<a href="https://issues.apache.org/jira/browse/SPARK-39957">SPARK-39957</a>)</li>
<li>Improve the speculation through the stage task metrics (<a href="https://issues.apache.org/jira/browse/SPARK-32170">SPARK-32170</a>)</li>
<li>Add stage level resource scheduling support for standalone cluster (<a href="https://issues.apache.org/jira/browse/SPARK-39062">SPARK-39062</a>)</li>
<li>Improve LaunchTask process to avoid Stage failures caused by fail-to-send LaunchTask messages (<a href="https://issues.apache.org/jira/browse/SPARK-39955">SPARK-39955</a>)</li>
</ul>
<h4 id="shuffle">Shuffle</h4>
<ul>
<li>Add Push Based Shuffle client side read metrics (<a href="https://issues.apache.org/jira/browse/SPARK-36620">SPARK-36620</a>)</li>
<li>Shuffle server side metrics for Push-based shuffle (​​<a href="https://issues.apache.org/jira/browse/SPARK-33573">SPARK-33573</a>)</li>
<li>Ensure mergedShuffleCleaner have been shutdown before db close (<a href="https://issues.apache.org/jira/browse/SPARK-40186">SPARK-40186</a>)</li>
<li>Add RocksDB support for shuffle service state store (<a href="https://issues.apache.org/jira/browse/SPARK-38888">SPARK-38888</a>)</li>
<li>Encapsulate LevelDB used to store remote/external shuffle state as DB (<a href="https://issues.apache.org/jira/browse/SPARK-38909">SPARK-38909</a>)</li>
<li>Enable spark.dynamicAllocation.shuffleTracking.enabled by default (<a href="https://issues.apache.org/jira/browse/SPARK-3984">SPARK-3984)</a></li>
<li>Enable Push-based shuffle service to store state in NM level DB for work preserving restart (<a href="https://issues.apache.org/jira/browse/SPARK-33236">SPARK-33236</a>)</li>
<li>Remove shuffle blocks using the shuffle service for released executors (<a href="https://issues.apache.org/jira/browse/SPARK-37618">SPARK-37618</a>)</li>
</ul>
<h4 id="other-notable-changes-1">Other Notable Changes</h4>
<ul>
<li>Support IPv6-only environment (<a href="https://issues.apache.org/jira/browse/SPARK-39457">SPARK-39457</a>)</li>
<li>Enable spark.kryo.unsafe by default (<a href="https://issues.apache.org/jira/browse/SPARK-42137">SPARK-42137</a>)</li>
<li>Disallow arbitrary custom classpath with proxy user in cluster mode (<a href="https://issues.apache.org/jira/browse/SPARK-41958">SPARK-41958</a>)</li>
<li>Avoid BlockManager re-registration if the executor has been lost (<a href="https://issues.apache.org/jira/browse/SPARK-41360">SPARK-41360</a>)</li>
<li>Remove the limitation that single task result must fit in 2GB (<a href="https://issues.apache.org/jira/browse/SPARK-40622">SPARK-40622</a>)</li>
<li>Remove the support of deprecated spark.akka.* configs (<a href="https://issues.apache.org/jira/browse/SPARK-40401">SPARK-40401</a>)</li>
<li>Change default logging to stderr to consistent with the behavior of log4j (<a href="https://issues.apache.org/jira/browse/SPARK-40406">SPARK-40406</a>)</li>
<li>Exclude DirectTaskResult metadata when calculating result size (<a href="https://issues.apache.org/jira/browse/SPARK-40261">SPARK-40261</a>)</li>
<li>Allow customize initial partitions number in take() behavior (<a href="https://issues.apache.org/jira/browse/SPARK-40211">SPARK-40211</a>)</li>
<li>Use interruptible lock instead of synchronized in Executor.updateDependencies() (<a href="https://issues.apache.org/jira/browse/SPARK-40235">SPARK-40235</a>)</li>
<li>Task failure should always trigger task failure listeners (<a href="https://issues.apache.org/jira/browse/SPARK-40106">SPARK-40106</a>)</li>
<li>Add the ability to selectively disable watching or polling (<a href="https://issues.apache.org/jira/browse/SPARK-36462">SPARK-36462</a>)</li>
<li>Do not cache unserialized broadcast relations on the driver (<a href="https://issues.apache.org/jira/browse/SPARK-39983">SPARK-39983</a>)</li>
<li>Fix deadlock between TaskMemoryManager and UnsafeExternalSorter.SpillableIterator (<a href="https://issues.apache.org/jira/browse/SPARK-39283">SPARK-39283</a>)</li>
<li>Expose the number partitions in a stage to TaskContext (<a href="https://issues.apache.org/jira/browse/SPARK-38679">SPARK-38679</a>)</li>
<li>Make memory overhead factor configurable (<a href="https://issues.apache.org/jira/browse/SPARK-38194">SPARK-38194</a>)</li>
<li>Avoid using bash -c in ShellBasedGroupsMappingProvider (<a href="https://issues.apache.org/jira/browse/SPARK-38992">SPARK-38992</a>)</li>
</ul>
<h3 id="structured-streaming">Structured Streaming</h3>
<h4 id="major-features">Major Features</h4>
<ul>
<li>Async Progress Tracking (<a href="https://issues.apache.org/jira/browse/SPARK-39591">SPARK-39591</a>)</li>
<li>Python Arbitrary Stateful Processing in Structured Streaming (<a href="https://issues.apache.org/jira/browse/SPARK-40434">SPARK-40434</a>)</li>
<li>Protobuf Support in Structured Streaming (<a href="https://issues.apache.org/jira/browse/SPARK-40653">SPARK-40653</a>)</li>
<li>Fix late record filtering to support chaining of stateful operators (<a href="https://issues.apache.org/jira/browse/SPARK-40925">SPARK-40925</a>)</li>
</ul>
<h4 id="other-notable-changes-2">Other Notable Changes</h4>
<ul>
<li>Introducing a streaming checkpoint file manager based on Hadoop&#8217;s Abortable interface (<a href="https://issues.apache.org/jira/browse/SPARK-40039">SPARK-40039</a>)</li>
<li>Deprecate Trigger.Once and Promote Trigger.AvailableNow (<a href="https://issues.apache.org/jira/browse/SPARK-39805">SPARK-39805</a>)</li>
<li>Expose the information of catalog table to the logical plan in streaming query (<a href="https://issues.apache.org/jira/browse/SPARK-39564">SPARK-39564</a>)</li>
<li>Support collecting metrics from streaming sinks (<a href="https://issues.apache.org/jira/browse/SPARK-38564">SPARK-38564</a>)</li>
<li>Deprecate DStream API (<a href="https://issues.apache.org/jira/browse/SPARK-42075">SPARK-42075</a>)</li>
<li>Flip the default value of Kafka offset fetching config (<a href="https://issues.apache.org/jira/browse/SPARK-40844">SPARK-40844</a>)</li>
<li>Provide cloned spark session in DataFrame in user function for foreachBatch sink in PySpark (<a href="https://issues.apache.org/jira/browse/SPARK-41379">SPARK-41379</a>)</li>
</ul>
<h3 id="spark-connect">Spark Connect</h3>
<h4 id="python-client">Python Client</h4>
<ul>
<li>Implement DataFrame API (<a href="https://issues.apache.org/jira/browse/SPARK-41279">SPARK-41279</a>)</li>
<li>Implement Column API (<a href="https://issues.apache.org/jira/browse/SPARK-41282">SPARK-41282</a>)</li>
<li>Implement Functions API (<a href="https://issues.apache.org/jira/browse/SPARK-41283">SPARK-41283</a>)</li>
<li>Implement SparkSession API (<a href="https://issues.apache.org/jira/browse/SPARK-41281">SPARK-41281</a>)</li>
<li>Implement I/O API (<a href="https://issues.apache.org/jira/browse/SPARK-41284">SPARK-41284</a>)</li>
<li>Implement Catalog API (<a href="https://issues.apache.org/jira/browse/SPARK-41289">SPARK-41289</a>)</li>
<li>Support for User-defined Functions in Python (<a href="https://issues.apache.org/jira/browse/SPARK-41661">SPARK-41661</a>)</li>
<li>Support for Pandas/Arrow Function API (<a href="https://issues.apache.org/jira/browse/SPARK-42393">SPARK-42393</a>)</li>
<li>Support for Runtime SQL configuration (<a href="https://issues.apache.org/jira/browse/SPARK-42499">SPARK-42499</a>)</li>
<li>Build, package and infrastructure for Spark Connect (<a href="https://issues.apache.org/jira/browse/SPARK-41286">SPARK-41286</a>)</li>
<li>Type annotations for Spark Connect Python Client (<a href="https://issues.apache.org/jira/browse/SPARK-40451">SPARK-40451</a>)</li>
</ul>
<h4 id="scala-client">Scala Client</h4>
<ul>
<li>Implement basic Scala Client (<a href="https://issues.apache.org/jira/browse/SPARK-41534">SPARK-41534</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42133">SPARK-42133</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42043">SPARK-42043</a>, <a href="https://issues.apache.org/jira/browse/SPARK-41822">SPARK-41822</a>)</li>
<li>Implement SparkSession API (<a href="https://issues.apache.org/jira/browse/SPARK-42639">SPARK-42639</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42581">SPARK-42581</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42564">SPARK-42564</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42544">SPARK-42544</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42631">SPARK-42631</a>)</li>
<li>Implement Dataframe API (<a href="https://issues.apache.org/jira/browse/SPARK-42440">SPARK-42440</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42559">SPARK-42559</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42558">SPARK-42558</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42556">SPARK-42556</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42468">SPARK-42468</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42529">SPARK-42529</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42561">SPARK-42561</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42894">SPARK-42894</a>, <a href="https://issues.apache.org/jira/browse/SPARK-41874">SPARK-41874</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42691">SPARK-42691</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42692">SPARK-42692</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42481">SPARK-42481</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42541">SPARK-42541</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42542">SPARK-42542</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42520">SPARK-42520</a>, <a href="https://issues.apache.org/jira/browse/SPARK-41823">SPARK-41823</a>)</li>
<li>Implement Column API (<a href="https://issues.apache.org/jira/browse/SPARK-42441">SPARK-42441</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42560">SPARK-42560</a>)</li>
<li>Implement Functions API (<a href="https://issues.apache.org/jira/browse/SPARK-42461">SPARK-42461</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42579">SPARK-42579</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42527">SPARK-42527</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42531">SPARK-42531</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42495">SPARK-42495</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42557">SPARK-42557</a>)</li>
<li>Implement I/O API (<a href="https://issues.apache.org/jira/browse/SPARK-42457">SPARK-42457</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42555">SPARK-42555</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42690">SPARK-42690</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42878">SPARK-42878</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42757">SPARK-42757</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42482">SPARK-42482</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42733">SPARK-42733</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42518">SPARK-42518</a>)</li>
<li>Implement Runtime SQL configuration (<a href="https://issues.apache.org/jira/browse/SPARK-42586">SPARK-42586</a>)</li>
<li>Basic User Defined Function support (<a href="https://issues.apache.org/jira/browse/SPARK-42283">SPARK-42283</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42653">SPARK-42653</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42543">SPARK-42543</a>)</li>
<li>Basic Typed API support (<a href="https://issues.apache.org/jira/browse/SPARK-42580">SPARK-42580</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42605">SPARK-42605</a>)</li>
<li>Test infrastructure for Spark Connect (<a href="https://issues.apache.org/jira/browse/SPARK-42172">SPARK-42172</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42377">SPARK-42377</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42599">SPARK-42599</a>)</li>
<li>Implement REPL Support (<a href="https://issues.apache.org/jira/browse/SPARK-42656">SPARK-42656</a>, <a href="https://issues.apache.org/jira/browse/SPARK-42884">SPARK-42884</a>)</li>
</ul>
<h3 id="pyspark">PySpark</h3>
<h4 id="pandas-api-on-spark">Pandas API on Spark</h4>
<ul>
<li>Major improvement
<ul>
<li>Python Arbitrary Stateful Processing in Structured Streaming (<a href="https://issues.apache.org/jira/browse/SPARK-40434">SPARK-40434</a>)</li>
<li>Implement pandas API missing parameters (<a href="https://issues.apache.org/jira/browse/SPARK-42883">SPARK-42883</a>)</li>
<li>Pandas 1.5 support (<a href="https://issues.apache.org/jira/browse/SPARK-40576">SPARK-40576</a>)</li>
</ul>
</li>
<li>Major feature
<ul>
<li>Implement Series.searchsorted (<a href="https://issues.apache.org/jira/browse/SPARK-40330">SPARK-40330</a>)</li>
<li>Implement Series.autocorr (<a href="https://issues.apache.org/jira/browse/SPARK-38774">SPARK-38774</a>)</li>
<li>Implement DataFrame.mode (<a href="https://issues.apache.org/jira/browse/SPARK-40138">SPARK-40138</a>)</li>
<li>Implement DataFrame.boxplot and DataFrame.plot.box (<a href="https://issues.apache.org/jira/browse/SPARK-38993">SPARK-38993</a>)</li>
<li>Implement DataFrame.corrwith (<a href="https://issues.apache.org/jira/browse/SPARK-38907">SPARK-38907</a>)</li>
<li>Implement DataFrame.resample and Series.resample (<a href="https://issues.apache.org/jira/browse/SPARK-39081">SPARK-39081</a>)</li>
<li>Implement DataFrame.interpolate and Series.interpolate (<a href="https://issues.apache.org/jira/browse/SPARK-38844">SPARK-38844</a>)</li>
<li>Implement DataFrame.ewm and Series.ewm (<a href="https://issues.apache.org/jira/browse/SPARK-38785">SPARK-38785</a>)</li>
<li>Implement GroupBy.prod (<a href="https://issues.apache.org/jira/browse/SPARK-40334">SPARK-40334</a>)</li>
<li>Implement GroupBy.nth (<a href="https://issues.apache.org/jira/browse/SPARK-40333">SPARK-40333</a>)</li>
<li>Implement GroupBy.quantile (<a href="https://issues.apache.org/jira/browse/SPARK-40332">SPARK-40332</a>)</li>
<li>Implement GroupBy.sem (<a href="https://issues.apache.org/jira/browse/SPARK-40305">SPARK-40305</a>)</li>
<li>Implement GroupBy.mad (<a href="https://issues.apache.org/jira/browse/SPARK-39284">SPARK-39284</a>)</li>
<li>Implement GroupBy.skew (<a href="https://issues.apache.org/jira/browse/SPARK-39246">SPARK-39246</a>)</li>
<li>Implement GroupBy.ewm (<a href="https://issues.apache.org/jira/browse/SPARK-39129">SPARK-39129</a>)</li>
<li>Support GroupBy positional indexing (<a href="https://issues.apache.org/jira/browse/SPARK-38947">SPARK-38947</a>)</li>
</ul>
</li>
</ul>
<h4 id="other-notable-changes-3">Other Notable Changes</h4>
<ul>
<li>Major improvements
<ul>
<li>Provide a memory profiler for PySpark user-defined functions (<a href="https://issues.apache.org/jira/browse/SPARK-40281">SPARK-40281</a>)</li>
<li>Make Catalog API be compatible with 3-layer-namespace (<a href="https://issues.apache.org/jira/browse/SPARK-39235">SPARK-39235</a>)</li>
<li>NumPy input support in PySpark (<a href="https://issues.apache.org/jira/browse/SPARK-39405">SPARK-39405</a>)</li>
<li>PySpark error improvements (<a href="https://issues.apache.org/jira/browse/SPARK-41597">SPARK-41597</a>)</li>
</ul>
</li>
<li>Major features
<ul>
<li>Support parameterized SQL in PySpark (<a href="https://issues.apache.org/jira/browse/SPARK-41666">SPARK-41666</a>)</li>
<li>Implement ‘median’ function (<a href="https://issues.apache.org/jira/browse/SPARK-40003">SPARK-40003</a>)</li>
<li>Implement ‘mode’ function (<a href="https://issues.apache.org/jira/browse/SPARK-40007">SPARK-40007</a>)</li>
<li>Implement ‘unpivot/melt’ function (<a href="https://issues.apache.org/jira/browse/SPARK-39877">SPARK-39877</a>)</li>
<li>Support Varchar in PySpark (<a href="https://issues.apache.org/jira/browse/SPARK-39760">SPARK-39760</a>)</li>
<li>Support CharType in PySpark (<a href="https://issues.apache.org/jira/browse/SPARK-39809">SPARK-39809</a>)</li>
</ul>
</li>
</ul>
<h3 id="mllib">MLLIB</h3>
<ul>
<li>Implement PyTorch Distributor (<a href="https://issues.apache.org/jira/browse/SPARK-41589">SPARK-41589</a>)</li>
<li>Unify the data validation (<a href="https://issues.apache.org/jira/browse/SPARK-38584">SPARK-38584</a>)</li>
<li>Reduce the shuffle size of ALS (<a href="https://issues.apache.org/jira/browse/SPARK-40476">SPARK-40476</a>, <a href="https://issues.apache.org/jira/browse/SPARK-40745">SPARK-40745</a>)</li>
<li>Dedup isotonic regression duplicate features (<a href="https://issues.apache.org/jira/browse/SPARK-41008">SPARK-41008</a>)</li>
<li>KMeans blockify input vectors (<a href="https://issues.apache.org/jira/browse/SPARK-30661">SPARK-30661</a>)</li>
<li>Add relevance score for nDCG evaluation (<a href="https://issues.apache.org/jira/browse/SPARK-39446">SPARK-39446</a>)</li>
</ul>
<h3 id="sparkr">SparkR</h3>
<ul>
<li>Add unpivot / melt (<a href="https://issues.apache.org/jira/browse/SPARK-41267">SPARK-41267</a>)</li>
<li>Add array_sort(column, comparator) (<a href="https://issues.apache.org/jira/browse/SPARK-40167">SPARK-40167</a>)</li>
<li>Support multiple &#8220;Column&#8221; drop in R (<a href="https://issues.apache.org/jira/browse/SPARK-40087">SPARK-40087</a>)</li>
<li>Arrow 9.0.0 support with SparkR (<a href="https://issues.apache.org/jira/browse/SPARK-40114">SPARK-40114</a>)</li>
<li>Make Catalog API be compatible with 3-layer-namespace (<a href="https://issues.apache.org/jira/browse/SPARK-39579">SPARK-39579</a>, <a href="https://issues.apache.org/jira/browse/SPARK-39646">SPARK-39646</a>, <a href="https://issues.apache.org/jira/browse/SPARK-39645">SPARK-39645</a>, <a href="https://issues.apache.org/jira/browse/SPARK-39236">SPARK-39236</a>, <a href="https://issues.apache.org/jira/browse/SPARK-39716">SPARK-39716</a>, <a href="https://issues.apache.org/jira/browse/SPARK-39719">SPARK-39719</a>)</li>
<li>Support R 4.2.0 (<a href="https://issues.apache.org/jira/browse/SPARK-39372">SPARK-39372</a>)</li>
</ul>
<h3 id="live-ui-and-history-server-service">Live UI and History Server Service</h3>
<ul>
<li>Better Spark UI scalability and Driver stability for large applications(<a href="https://issues.apache.org/jira/browse/SPARK-41053">SPARK-41053</a>)</li>
<li>Use RocksDB for spark.history.store.hybridStore.diskBackend by default (<a href="https://issues.apache.org/jira/browse/SPARK-42277">SPARK-42277</a>)</li>
<li>Group nested executions under the root execution(<a href="https://issues.apache.org/jira/browse/SPARK-41752">SPARK-41752</a>)</li>
<li>Show metrics properties in the environment tab(<a href="https://issues.apache.org/jira/browse/SPARK-39110">SPARK-39110</a>)</li>
<li>Fix StagePage input size/records not show when records greater than zero(<a href="https://issues.apache.org/jira/browse/SPARK-34777">SPARK-34777</a>)</li>
<li>Improve event logging JsonProtocol performance by using Jackson instead of Json4s (<a href="https://issues.apache.org/jira/browse/SPARK-39489">SPARK-39489</a>)</li>
<li>Support spark.history.fs.update.batchSize (<a href="https://issues.apache.org/jira/browse/SPARK-39225">SPARK-39225</a>)</li>
</ul>
<h3 id="build">Build</h3>
<ul>
<li>Update cloudpickle to v2.2.0 (<a href="https://issues.apache.org/jira/browse/SPARK-40991">SPARK-40991</a>)</li>
<li>Deprecate Python 3.7 Support (<a href="https://issues.apache.org/jira/browse/SPARK-39861">SPARK-39861</a>)</li>
<li>Support Python 3.11 (<a href="https://issues.apache.org/jira/browse/SPARK-41454">SPARK-41454</a>)</li>
<li>Update dev.ludovic.netlib to 3.0.2 (<a href="https://issues.apache.org/jira/browse/SPARK-40251">SPARK-40251</a>)</li>
<li>Update breeze to 2.0 (<a href="https://issues.apache.org/jira/browse/SPARK-39616">SPARK-39616</a>)</li>
<li>Update slf4j version to 2.0.6 (<a href="https://issues.apache.org/jira/browse/SPARK-41561">SPARK-41561</a>)</li>
<li>Update kubernetes-client version to 6.4.1 (<a href="https://issues.apache.org/jira/browse/SPARK-42362">SPARK-42362</a>)</li>
<li>Update rocksdbjni to 7.9.2 ( <a href="https://issues.apache.org/jira/browse/SPARK-42129">SPARK-42129</a>)</li>
<li>Update Apache Arrow to 11.0.0 (<a href="https://issues.apache.org/jira/browse/SPARK-42161">SPARK-42161</a>)</li>
<li>Upgrade Apache Kafka to 3.3.2 (<a href="https://issues.apache.org/jira/browse/SPARK-42109">SPARK-42109</a>)</li>
</ul>
<h3 id="credits">Credits</h3>
<p>Last but not least, this release would not have been possible without the following contributors: Abhishek Dixit, Abu Bakr Siddiq, Adam Binford, Ahmed Mahran, Aimilios Tsouvelekakis, Ait Zeouay Amrane, Aki Sukegawa, Ala Luszczak, Alex Balikov, Alkis Evlogimenos, Allan Folting, Allison Portis, Allison Wang, Andrew Ray, Andy Grove, Andy Lam, Anish Shrigondekar, Ankit Prakash Gupta, Anton Ippolitov, Anton Okolnychyi, Aravind Patnam, Artsiom Yudovin, Arvin Zheng, Attila Zsolt Piros, Austin Wang, Ben Zhang, Bjorn Jorgensen, Bjørn Jørgensen, Bo Zhang, Bobby Wang, Brandon Dahler, Brennan Stein, Brian Schaefer, Brian Yue, Bruce Robbins, Carmen Kwan, Chandni Singh, Chao Sun, Chaoqin Li, Cheng Pan, Cheng Su, Chenhao Li, Chris Nauroth, Daniel Davies, Daniel Fiterman, Daniel Ranchal Parrado, Daniel Tenedorio, David Lewis, Dch Nguyen, Deepyaman Datta, Dennis Huo, Deshan Xiao, Desmond Cheong, Dongjoon Hyun, Dustin William Smith, ELHoussineT, Emil Ejbyfeldt, Enrico Minack, Erik Krogen, Eugene-Mark, Frank Yin, Fred Liu, Fredrik Mile, Fu Chen, Furcy Pin, Gabor Roczei, Gautham Banasandra, Gengliang Wang, Gidon Gershinsky, Guangxin Wang, Haejoon Lee, Hai Tao, Herman Van Hovell, Hisoka-X, Holden Karau, Huanli Wang, Hui An, Hyukjin Kwon, Immanuel Buder, Ismaël Mejía, Ivan Sadikov, Jack Chen, Jatin Sharma, Jeffrey Chen, Jelmer Kuperus, Jerry Peng, Jiaan Geng, JiexingLi, Johan Lasperas, John Caveman, John Zhuge, Jonathan Cui, Josh Rosen, Jove Yuan, Juliusz Sompolski, Jungtaek Lim, Kai-Hsun Chen, Kapil Kumar Singh, Karen Feng, Karuppayya Rajendran, Kazuaki Ishizaki, Kazuyuki Tanimura, Kelvin Jiang, Kent Yao, Keunhyun Oh, Khaled Hammouda, Khalid Mammadov, Kian Eliasi, Kimahriman, Kris Mok, Kumar, Pralabh, Kun Wan, Lee Yang, Liang-Chi Hsieh, Lingyun Yuan, Linhong Liu, Lorenzo Martini, LorenzoMartini, Luca Canali, Maciej Szymkiewicz, Manu Zhang, Mark Khaitman, Martin Grund, Martin Tzvetanov Grigorov, Maryann Xue, Max Gekk, Maya Anderson, Minchu Yang, Mridul Muralidharan, NarekDW, Nicholas Chammas, Niranjan Jayakar, Ole Sasse, Oleksiy Dyagilev, PJ Fanning, Peter Toth, Prashant Singh, Raghu Angadi, Rakesh Raushan, Reynold Xin, Rithwik Ediga Lakhamsani, Robert (Bobby) Evans, Rui Wang, Ruifeng Zheng, Runyao Chen, Ryan Johnson, Sandeep Katta, Sandeep Singh, SandishKumarHN, Santosh Pingale, Sean Owen, Serge Rielau, Shaoyun Chen, Shardul Mahadik, Shiqi Sun, Shixiong Zhu, Shrikant Prasad, Shuyou Dong, Stefaan Lippens, Steve Loughran, Steven Aerts, Sumeet Gajjar, Supun Nakandala, Swaminathan Balachandran, Takuya Ueshin, Tanel Kiis, Ted Yu, Tengfei Huang, Thejdeep Gudivada, Tobias Stadler, Tom Van Bussel, Tristan Nixon, Venki Korukanti, Vinod KC, Vitalii Li, Vivek Atal, Warren Zhu, Wei Liu, Weichen Xu, Weiwei Yang, Wenchen Fan, Wenli Looi, Wilfred Spiegelenburg, William Hyun, William Zijie, WolverineJiang, Xiduo You, Xing Lin, Xingbo Jiang, Xingchao, Zhang, Xinrong Meng, Xinyi Yu, XiuLi Wei, Yan Wei, Yang Jie, Yaohua628, Ye Zhou, Yi Wu, Yi Zhu, Yikf, Yikun Jiang, Yubi Lee, Yuming Wang, Zach Schuermann, Zhen Li, Zhen Wang, Zhiming She, Ziqi Liu, awdavidson, beobest2, bjornjorgensen, bzhaoopenstack, carlfu-db, cashmand, chenzhx, constzhou, dcoliversun, dengziming, fanyilun, fred-db, ganeshchand, gaoyajun02, guanziyue, harupy, huangxiaopingRD, huaxingao, idealspark, jackylee-ch, jiang13021, jiaoqingbo, khalidmammadov, kuwii, leesf, lvshaokang, lw33, mattshma, mcdull-zhang, minyyy, moritzkoerber, morvenhuang, mridulm, nyingping, panbingkun, philwalk, pralabhkumar, qiuliang988, santosh-d3vpl3x, seunggabi, smallzhongfeng, sus, thyecust, toujours33, uchiiii, utkarsh39, vicennial, wangshengjie123, wankunde, wayneguow, wecharyu, weiyuyilia, wineternity, wzx140, xiaonanyang-db, xiuzhu9527, yabola, yikf, zekai-li, zhangbutao, zheniantoushipashi, zhixingheyi-tian, zhouyifan279, zwangsheng, zzzzming95</p>
<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>
</div>
<div class="col-12 col-md-3">
<div class="news" style="margin-bottom: 20px;">
<h5>Latest News</h5>
<ul class="list-unstyled">
<li><a href="/news/spark-3-4-3-released.html">Spark 3.4.3 released</a>
<span class="small">(Apr 18, 2024)</span></li>
<li><a href="/news/spark-3-5-1-released.html">Spark 3.5.1 released</a>
<span class="small">(Feb 23, 2024)</span></li>
<li><a href="/news/spark-3-3-4-released.html">Spark 3.3.4 released</a>
<span class="small">(Dec 16, 2023)</span></li>
<li><a href="/news/spark-3-4-2-released.html">Spark 3.4.2 released</a>
<span class="small">(Nov 30, 2023)</span></li>
</ul>
<p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
</div>
<div style="text-align:center; margin-bottom: 20px;">
<a href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png" style="max-width: 100%;"/>
</a>
</div>
<div class="hidden-xs hidden-sm">
<a href="/downloads.html" class="btn btn-cta btn-lg d-grid" style="margin-bottom: 30px;">
Download Spark
</a>
<p style="font-size: 16px; font-weight: 500; color: #555;">
Built-in Libraries:
</p>
<ul class="list-none">
<li><a href="/sql/">SQL and DataFrames</a></li>
<li><a href="/streaming/">Spark Streaming</a></li>
<li><a href="/mllib/">MLlib (machine learning)</a></li>
<li><a href="/graphx/">GraphX (graph)</a></li>
</ul>
<a href="/third-party-projects.html">Third-Party Projects</a>
</div>
</div>
</div>
<footer class="small">
<hr>
Apache Spark, Spark, Apache, the Apache feather logo, and the Apache Spark project logo are either registered
trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
See guidance on use of Apache Spark <a href="/trademarks.html">trademarks</a>.
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
Copyright &copy; 2018 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.
</footer>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>
</body>
</html>