blob: cf3726ddb7a1f2249bef20ae138e8351659269bf [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
Spark Release 3.5.0 | Apache Spark
</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="/css/custom.css" rel="stylesheet">
<!-- Code highlighter CSS -->
<link href="/css/pygments-default.css" rel="stylesheet">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4" style="background: #1D6890;">
<a class="navbar-brand" href="/">
<img src="/images/spark-logo-rev.svg" alt="" width="141" height="72">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarContent"
aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse col-md-12 col-lg-auto pt-4" id="navbarContent">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/downloads.html">Download</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="libraries" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Libraries
</a>
<ul class="dropdown-menu" aria-labelledby="libraries">
<li><a class="dropdown-item" href="/sql/">SQL and DataFrames</a></li>
<li><a class="dropdown-item" href="/spark-connect/">Spark Connect</a></li>
<li><a class="dropdown-item" href="/streaming/">Spark Streaming</a></li>
<li><a class="dropdown-item" href="/pandas-on-spark/">pandas on Spark</a></li>
<li><a class="dropdown-item" href="/mllib/">MLlib (machine learning)</a></li>
<li><a class="dropdown-item" href="/graphx/">GraphX (graph)</a></li>
<li>
<hr class="dropdown-divider">
</li>
<li><a class="dropdown-item" href="/third-party-projects.html">Third-Party Projects</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="documentation" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Documentation
</a>
<ul class="dropdown-menu" aria-labelledby="documentation">
<li><a class="dropdown-item" href="/docs/latest/">Latest Release</a></li>
<li><a class="dropdown-item" href="/documentation.html">Older Versions and Other Resources</a></li>
<li><a class="dropdown-item" href="/faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/examples.html">Examples</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="community" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Community
</a>
<ul class="dropdown-menu" aria-labelledby="community">
<li><a class="dropdown-item" href="/community.html">Mailing Lists &amp; Resources</a></li>
<li><a class="dropdown-item" href="/contributing.html">Contributing to Spark</a></li>
<li><a class="dropdown-item" href="/improvement-proposals.html">Improvement Proposals (SPIP)</a>
</li>
<li><a class="dropdown-item" href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a>
</li>
<li><a class="dropdown-item" href="/powered-by.html">Powered By</a></li>
<li><a class="dropdown-item" href="/committers.html">Project Committers</a></li>
<li><a class="dropdown-item" href="/history.html">Project History</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="developers" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Developers
</a>
<ul class="dropdown-menu" aria-labelledby="developers">
<li><a class="dropdown-item" href="/developer-tools.html">Useful Developer Tools</a></li>
<li><a class="dropdown-item" href="/versioning-policy.html">Versioning Policy</a></li>
<li><a class="dropdown-item" href="/release-process.html">Release Process</a></li>
<li><a class="dropdown-item" href="/security.html">Security</a></li>
</ul>
</li>
</ul>
<ul class="navbar-nav ml-auto">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="apacheFoundation" role="button"
data-bs-toggle="dropdown" aria-expanded="false">
Apache Software Foundation
</a>
<ul class="dropdown-menu" aria-labelledby="apacheFoundation">
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Homepage</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/">License</a></li>
<li><a class="dropdown-item"
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/events/current-event">Event</a></li>
</ul>
</li>
</ul>
</div>
</nav>
<div class="container">
<div class="row mt-4">
<div class="col-12 col-md-9">
<h2>Spark Release 3.5.0</h2>
<p>Apache Spark 3.5.0 is the sixth release in the 3.x series. With significant contributions from the open-source community, this release addressed over 1,300 Jira tickets.</p>
<p>This release introduces more scenarios with general availability for Spark Connect, like Scala and Go client, distributed training and inference support, and enhancement of compatibility for Structured streaming; introduces new PySpark and SQL functionality such as like SQL IDENTIFIER clause, named argument support for SQL function calls, SQL function support for HyperLogLog approximate aggregations, and Python user-defined table functions; simplifies distributed training with DeepSpeed; introduces watermark propagation among operators, introduces dropDuplicatesWithinWatermark operations in Structured Streaming.</p>
<p>To download Apache Spark 3.5.0, please visit the <a href="https://spark.apache.org/downloads.html">downloads</a> page. For <a href="https://s.apache.org/spark-3.5.0">detailed changes</a>, you can consult JIRA. We have also curated a list of high-level changes here, grouped by major modules.</p>
<ul id="markdown-toc">
<li><a href="#highlights" id="markdown-toc-highlights">Highlights</a></li>
<li><a href="#spark-connect" id="markdown-toc-spark-connect">Spark Connect</a></li>
<li><a href="#spark-sql" id="markdown-toc-spark-sql">Spark SQL</a> <ul>
<li><a href="#features" id="markdown-toc-features">Features</a></li>
<li><a href="#functions" id="markdown-toc-functions">Functions</a></li>
<li><a href="#data-sources" id="markdown-toc-data-sources">Data Sources</a></li>
<li><a href="#query-optimization" id="markdown-toc-query-optimization">Query Optimization</a></li>
<li><a href="#code-generation-and-query-execution" id="markdown-toc-code-generation-and-query-execution">Code Generation and Query Execution</a></li>
<li><a href="#other-notable-changes" id="markdown-toc-other-notable-changes">Other Notable Changes</a></li>
</ul>
</li>
<li><a href="#pyspark" id="markdown-toc-pyspark">PySpark</a> <ul>
<li><a href="#features-1" id="markdown-toc-features-1">Features</a></li>
<li><a href="#other-notable-changes-1" id="markdown-toc-other-notable-changes-1">Other Notable Changes</a></li>
</ul>
</li>
<li><a href="#core" id="markdown-toc-core">Core</a></li>
<li><a href="#structured-streaming" id="markdown-toc-structured-streaming">Structured Streaming</a></li>
<li><a href="#ml" id="markdown-toc-ml">ML</a></li>
<li><a href="#ui" id="markdown-toc-ui">UI</a></li>
<li><a href="#build-and-others" id="markdown-toc-build-and-others">Build and Others</a></li>
<li><a href="#removals-behavior-changes-and-deprecations" id="markdown-toc-removals-behavior-changes-and-deprecations">Removals, Behavior Changes and Deprecations</a> <ul>
<li><a href="#upcoming-removal" id="markdown-toc-upcoming-removal">Upcoming Removal</a></li>
<li><a href="#migration-guides" id="markdown-toc-migration-guides">Migration Guides</a></li>
</ul>
</li>
<li><a href="#credits" id="markdown-toc-credits">Credits</a></li>
</ul>
<h3 id="highlights">Highlights</h3>
<ul>
<li>Scala and Go client support in Spark Connect <a href="https://issues.apache.org/jira/browse/SPARK-42554">SPARK-42554</a> <a href="https://issues.apache.org/jira/browse/SPARK-43351">SPARK-43351</a></li>
<li>PyTorch-based distributed ML Support for Spark Connect <a href="https://issues.apache.org/jira/browse/SPARK-42471">SPARK-42471</a></li>
<li>Structured Streaming support for Spark Connect in Python and Scala <a href="https://issues.apache.org/jira/browse/SPARK-42938">SPARK-42938</a></li>
<li>Pandas API support for the Python Spark Connect Client <a href="https://issues.apache.org/jira/browse/SPARK-42497">SPARK-42497</a></li>
<li>Introduce Arrow Python UDFs <a href="https://issues.apache.org/jira/browse/SPARK-40307">SPARK-40307</a></li>
<li>Support Python user-defined table functions <a href="https://issues.apache.org/jira/browse/SPARK-43798">SPARK-43798</a></li>
<li>Migrate PySpark errors onto error classes <a href="https://issues.apache.org/jira/browse/SPARK-42986">SPARK-42986</a></li>
<li>PySpark Test Framework <a href="https://issues.apache.org/jira/browse/SPARK-44042">SPARK-44042</a></li>
<li>Add support for Datasketches HllSketch <a href="https://issues.apache.org/jira/browse/SPARK-16484">SPARK-16484</a></li>
<li>Built-in SQL Function Improvement <a href="https://issues.apache.org/jira/browse/SPARK-41231">SPARK-41231</a></li>
<li>IDENTIFIER clause <a href="https://issues.apache.org/jira/browse/SPARK-43205">SPARK-43205</a></li>
<li>Add SQL functions into Scala, Python and R API <a href="https://issues.apache.org/jira/browse/SPARK-43907">SPARK-43907</a></li>
<li>Add named argument support for SQL functions <a href="https://issues.apache.org/jira/browse/SPARK-43922">SPARK-43922</a></li>
<li>Avoid unnecessary task rerun on decommissioned executor lost if shuffle data migrated <a href="https://issues.apache.org/jira/browse/SPARK-41469">SPARK-41469</a></li>
<li>Distributed ML &lt;&gt; spark connect <a href="https://issues.apache.org/jira/browse/SPARK-42471">SPARK-42471</a></li>
<li>DeepSpeed Distributor <a href="https://issues.apache.org/jira/browse/SPARK-44264">SPARK-44264</a></li>
<li>Implement changelog checkpointing for RocksDB state store <a href="https://issues.apache.org/jira/browse/SPARK-43421">SPARK-43421</a></li>
<li>Introduce watermark propagation among operators <a href="https://issues.apache.org/jira/browse/SPARK-42376">SPARK-42376</a></li>
<li>Introduce dropDuplicatesWithinWatermark <a href="https://issues.apache.org/jira/browse/SPARK-42931">SPARK-42931</a></li>
<li>RocksDB state store provider memory management enhancements <a href="https://issues.apache.org/jira/browse/SPARK-43311">SPARK-43311</a></li>
</ul>
<h3 id="spark-connect">Spark Connect</h3>
<ul>
<li>Refactoring of the sql module into sql and sql-api to produce a minimum set of dependencies that can be shared between the Scala Spark Connect client and Spark and avoids pulling all of the Spark transitive dependencies. <a href="https://issues.apache.org/jira/browse/SPARK-44273">SPARK-44273</a></li>
<li>Introducing the Scala client for Spark Connect <a href="https://issues.apache.org/jira/browse/SPARK-42554">SPARK-42554</a></li>
<li>Pandas API support for the Python Spark Connect Client <a href="https://issues.apache.org/jira/browse/SPARK-42497">SPARK-42497</a></li>
<li>PyTorch-based distributed ML Support for Spark Connect <a href="https://issues.apache.org/jira/browse/SPARK-42471">SPARK-42471</a></li>
<li>Structured Streaming support for Spark Connect in Python and Scala <a href="https://issues.apache.org/jira/browse/SPARK-42938">SPARK-42938</a></li>
<li>Initial version of the Go client <a href="https://issues.apache.org/jira/browse/SPARK-43351">SPARK-43351</a></li>
<li>Lot’s of compatibility improvements between Spark native and the Spark Connect clients across Python and Scala</li>
<li>Improved debugability and request handling for client applications (asynchronous processing, retries, long-lived queries)</li>
</ul>
<h3 id="spark-sql">Spark SQL</h3>
<h4 id="features">Features</h4>
<ul>
<li>Add metadata column file block start and length <a href="https://issues.apache.org/jira/browse/SPARK-42423">SPARK-42423</a></li>
<li>Support positional parameters in Scala/Java sql() <a href="https://issues.apache.org/jira/browse/SPARK-44066">SPARK-44066</a></li>
<li>Add named parameter support in parser for function calls <a href="https://issues.apache.org/jira/browse/SPARK-43922">SPARK-43922</a></li>
<li>Support SELECT DEFAULT with ORDER BY, LIMIT, OFFSET for INSERT source relation <a href="https://issues.apache.org/jira/browse/SPARK-43071">SPARK-43071</a></li>
<li>Add SQL grammar for PARTITION BY and ORDER BY clause after TABLE arguments for TVF calls <a href="https://issues.apache.org/jira/browse/SPARK-44503">SPARK-44503</a></li>
<li>Include column default values in DESCRIBE and SHOW CREATE TABLE output <a href="https://issues.apache.org/jira/browse/SPARK-42123">SPARK-42123</a></li>
<li>Add optional pattern for Catalog.listCatalogs <a href="https://issues.apache.org/jira/browse/SPARK-43792">SPARK-43792</a></li>
<li>Add optional pattern for Catalog.listDatabases <a href="https://issues.apache.org/jira/browse/SPARK-43881">SPARK-43881</a></li>
<li>Callback when ready for execution <a href="https://issues.apache.org/jira/browse/SPARK-44145">SPARK-44145</a></li>
<li>Support Insert By Name statement <a href="https://issues.apache.org/jira/browse/SPARK-42750">SPARK-42750</a></li>
<li>Add call_function for Scala API <a href="https://issues.apache.org/jira/browse/SPARK-44131">SPARK-44131</a></li>
<li>Stable derived column aliases <a href="https://issues.apache.org/jira/browse/SPARK-40822">SPARK-40822</a></li>
<li>Support general constant expressions as CREATE/REPLACE TABLE OPTIONS values <a href="https://issues.apache.org/jira/browse/SPARK-43529">SPARK-43529</a></li>
<li>Support subqueries with correlation through INTERSECT/EXCEPT <a href="https://issues.apache.org/jira/browse/SPARK-36124">SPARK-36124</a></li>
<li>IDENTIFIER clause <a href="https://issues.apache.org/jira/browse/SPARK-43205">SPARK-43205</a></li>
<li>ANSI MODE: Conv should return an error if the internal conversion overflows <a href="https://issues.apache.org/jira/browse/SPARK-42427">SPARK-42427</a></li>
</ul>
<h4 id="functions">Functions</h4>
<ul>
<li>Add support for Datasketches HllSketch <a href="https://issues.apache.org/jira/browse/SPARK-16484">SPARK-16484</a></li>
<li>Support the CBC mode by aes_encrypt()/aes_decrypt() <a href="https://issues.apache.org/jira/browse/SPARK-43038">SPARK-43038</a></li>
<li>Support TABLE argument parser rule for TableValuedFunction <a href="https://issues.apache.org/jira/browse/SPARK-44200">SPARK-44200</a></li>
<li>Implement bitmap functions <a href="https://issues.apache.org/jira/browse/SPARK-44154">SPARK-44154</a></li>
<li>Add the try_aes_decrypt() function <a href="https://issues.apache.org/jira/browse/SPARK-42701">SPARK-42701</a></li>
<li>array_insert should fail with 0 index <a href="https://issues.apache.org/jira/browse/SPARK-43011">SPARK-43011</a></li>
<li>Add to_varchar alias for to_char <a href="https://issues.apache.org/jira/browse/SPARK-43815">SPARK-43815</a></li>
<li>High-order function: array_compact implementation <a href="https://issues.apache.org/jira/browse/SPARK-41235">SPARK-41235</a></li>
<li>Add analyzer support of named arguments for built-in functions <a href="https://issues.apache.org/jira/browse/SPARK-44059">SPARK-44059</a></li>
<li>Add NULLs for INSERTs with user-specified lists of fewer columns than the target table <a href="https://issues.apache.org/jira/browse/SPARK-42521">SPARK-42521</a></li>
<li>Adds support for aes_encrypt IVs and AAD <a href="https://issues.apache.org/jira/browse/SPARK-43290">SPARK-43290</a></li>
<li>DECODE function returns wrong results when passed NULL <a href="https://issues.apache.org/jira/browse/SPARK-41668">SPARK-41668</a></li>
<li>Support udf &#8216;luhn_check&#8217; <a href="https://issues.apache.org/jira/browse/SPARK-42191">SPARK-42191</a></li>
<li>Support implicit lateral column alias resolution on Aggregate <a href="https://issues.apache.org/jira/browse/SPARK-41631">SPARK-41631</a></li>
<li>Support implicit lateral column alias in queries with Window <a href="https://issues.apache.org/jira/browse/SPARK-42217">SPARK-42217</a></li>
<li>Add 3-args function aliases DATE_ADD and DATE_DIFF <a href="https://issues.apache.org/jira/browse/SPARK-43492">SPARK-43492</a></li>
</ul>
<h4 id="data-sources">Data Sources</h4>
<ul>
<li>Char/Varchar Support for JDBC Catalog <a href="https://issues.apache.org/jira/browse/SPARK-42904">SPARK-42904</a></li>
<li>Support Get SQL Keywords Dynamically Thru JDBC API and TVF <a href="https://issues.apache.org/jira/browse/SPARK-43119">SPARK-43119</a></li>
<li>DataSource V2: Handle MERGE commands for delta-based sources <a href="https://issues.apache.org/jira/browse/SPARK-43885">SPARK-43885</a></li>
<li>DataSource V2: Handle MERGE commands for group-based sources <a href="https://issues.apache.org/jira/browse/SPARK-43963">SPARK-43963</a></li>
<li>DataSource V2: Handle UPDATE commands for group-based sources <a href="https://issues.apache.org/jira/browse/SPARK-43975">SPARK-43975</a></li>
<li>DataSource V2: Allow representing updates as deletes and inserts <a href="https://issues.apache.org/jira/browse/SPARK-43775">SPARK-43775</a></li>
<li>Allow jdbc dialects to override the query used to create a table <a href="https://issues.apache.org/jira/browse/SPARK-41516">SPARK-41516</a></li>
<li>SPJ: Support partially clustered distribution <a href="https://issues.apache.org/jira/browse/SPARK-42038">SPARK-42038</a></li>
<li>DSv2 allows CTAS/RTAS to reserve schema nullability <a href="https://issues.apache.org/jira/browse/SPARK-43390">SPARK-43390</a></li>
<li>Add spark.sql.files.maxPartitionNum <a href="https://issues.apache.org/jira/browse/SPARK-44021">SPARK-44021</a></li>
<li>Handle UPDATE commands for delta-based sources <a href="https://issues.apache.org/jira/browse/SPARK-43324">SPARK-43324</a></li>
<li>Allow V2 writes to indicate advisory shuffle partition size <a href="https://issues.apache.org/jira/browse/SPARK-42779">SPARK-42779</a></li>
<li>Support lz4raw compression codec for Parquet <a href="https://issues.apache.org/jira/browse/SPARK-43273">SPARK-43273</a></li>
<li>Avro: writing complex unions <a href="https://issues.apache.org/jira/browse/SPARK-25050">SPARK-25050</a></li>
<li>Speed up Timestamp type inference with user-provided format in JSON/CSV data source <a href="https://issues.apache.org/jira/browse/SPARK-39280">SPARK-39280</a></li>
<li>Avro to Support custom decimal type backed by Long <a href="https://issues.apache.org/jira/browse/SPARK-43901">SPARK-43901</a></li>
<li>Avoid shuffle in Storage-Partitioned Join when partition keys mismatch, but join expressions are compatible <a href="https://issues.apache.org/jira/browse/SPARK-41413">SPARK-41413</a></li>
<li>Change binary to unsupported dataType in CSV format <a href="https://issues.apache.org/jira/browse/SPARK-42237">SPARK-42237</a></li>
<li>Allow Avro to convert union type to SQL with field name stable with type <a href="https://issues.apache.org/jira/browse/SPARK-43333">SPARK-43333</a></li>
<li>Speed up Timestamp type inference with legacy format in JSON/CSV data source <a href="https://issues.apache.org/jira/browse/SPARK-39281">SPARK-39281</a></li>
</ul>
<h4 id="query-optimization">Query Optimization</h4>
<ul>
<li>Subexpression elimination support shortcut expression <a href="https://issues.apache.org/jira/browse/SPARK-42815">SPARK-42815</a></li>
<li>Improve join stats estimation if one side can keep uniqueness <a href="https://issues.apache.org/jira/browse/SPARK-39851">SPARK-39851</a></li>
<li>Introduce the group limit of Window for rank-based filter to optimize top-k computation <a href="https://issues.apache.org/jira/browse/SPARK-37099">SPARK-37099</a></li>
<li>Fix behavior of null IN (empty list) in optimization rules <a href="https://issues.apache.org/jira/browse/SPARK-44431">SPARK-44431</a></li>
<li>Infer and push down window limit through window if partitionSpec is empty <a href="https://issues.apache.org/jira/browse/SPARK-41171">SPARK-41171</a></li>
<li>Remove the outer join if they are all distinct aggregate functions <a href="https://issues.apache.org/jira/browse/SPARK-42583">SPARK-42583</a></li>
<li>Collapse two adjacent windows with the same partition/order in subquery <a href="https://issues.apache.org/jira/browse/SPARK-42525">SPARK-42525</a></li>
<li>Push down limit through Python UDFs <a href="https://issues.apache.org/jira/browse/SPARK-42115">SPARK-42115</a></li>
<li>Optimize the order of filtering predicates <a href="https://issues.apache.org/jira/browse/SPARK-40045">SPARK-40045</a></li>
</ul>
<h4 id="code-generation-and-query-execution">Code Generation and Query Execution</h4>
<ul>
<li>Runtime filter should supports multi level shuffle join side as filter creation side <a href="https://issues.apache.org/jira/browse/SPARK-41674">SPARK-41674</a></li>
<li>Codegen Support for HiveSimpleUDF <a href="https://issues.apache.org/jira/browse/SPARK-42052">SPARK-42052</a></li>
<li>Codegen Support for HiveGenericUDF <a href="https://issues.apache.org/jira/browse/SPARK-42051">SPARK-42051</a></li>
<li>Codegen Support for build side outer shuffled hash join <a href="https://issues.apache.org/jira/browse/SPARK-44060">SPARK-44060</a></li>
<li>Implement code generation for to_csv function (StructsToCsv) <a href="https://issues.apache.org/jira/browse/SPARK-42169">SPARK-42169</a></li>
<li>Make AQE support InMemoryTableScanExec <a href="https://issues.apache.org/jira/browse/SPARK-42101">SPARK-42101</a></li>
<li>Support left outer join build left or right outer join build right in shuffled hash join <a href="https://issues.apache.org/jira/browse/SPARK-36612">SPARK-36612</a></li>
<li>Respect RequiresDistributionAndOrdering in CTAS/RTAS <a href="https://issues.apache.org/jira/browse/SPARK-43088">SPARK-43088</a></li>
<li>Coalesce buckets in join applied on broadcast join stream side <a href="https://issues.apache.org/jira/browse/SPARK-43107">SPARK-43107</a></li>
<li>Set nullable correctly on coalesced join key in full outer USING join <a href="https://issues.apache.org/jira/browse/SPARK-44251">SPARK-44251</a></li>
<li>Fix IN subquery ListQuery nullability <a href="https://issues.apache.org/jira/browse/SPARK-43413">SPARK-43413</a></li>
</ul>
<h4 id="other-notable-changes">Other Notable Changes</h4>
<ul>
<li>Set nullable correctly for keys in USING joins <a href="https://issues.apache.org/jira/browse/SPARK-43718">SPARK-43718</a></li>
<li>Fix COUNT(*) is null bug in correlated scalar subquery <a href="https://issues.apache.org/jira/browse/SPARK-43156">SPARK-43156</a></li>
<li>Dataframe.joinWith outer-join should return a null value for unmatched row <a href="https://issues.apache.org/jira/browse/SPARK-37829">SPARK-37829</a></li>
<li>Automatically rename conflicting metadata columns <a href="https://issues.apache.org/jira/browse/SPARK-42683">SPARK-42683</a></li>
<li>Document the Spark SQL error classes in user-facing documentation <a href="https://issues.apache.org/jira/browse/SPARK-42706">SPARK-42706</a></li>
</ul>
<h3 id="pyspark">PySpark</h3>
<h4 id="features-1">Features</h4>
<ul>
<li>Support positional parameters in Python sql() <a href="https://issues.apache.org/jira/browse/SPARK-44140">SPARK-44140</a></li>
<li>Support parameterized SQL by sql() <a href="https://issues.apache.org/jira/browse/SPARK-41666">SPARK-41666</a></li>
<li>Support Python user-defined table functions <a href="https://issues.apache.org/jira/browse/SPARK-43797">SPARK-43797</a></li>
<li>Support to set Python executable for UDF and pandas function APIs in workers during runtime <a href="https://issues.apache.org/jira/browse/SPARK-43574">SPARK-43574</a></li>
<li>Add DataFrame.offset to PySpark <a href="https://issues.apache.org/jira/browse/SPARK-43213">SPARK-43213</a></li>
<li>Implement <strong>dir</strong>() in pyspark.sql.dataframe.DataFrame to include columns <a href="https://issues.apache.org/jira/browse/SPARK-43270">SPARK-43270</a></li>
<li>Add option to use large variable width vectors for arrow UDF operations <a href="https://issues.apache.org/jira/browse/SPARK-39979">SPARK-39979</a></li>
<li>Make mapInPandas / mapInArrow support barrier mode execution <a href="https://issues.apache.org/jira/browse/SPARK-42896">SPARK-42896</a></li>
<li>Add JobTag APIs to PySpark SparkContext <a href="https://issues.apache.org/jira/browse/SPARK-44194">SPARK-44194</a></li>
<li>Support for Python UDTF to analyze in Python <a href="https://issues.apache.org/jira/browse/SPARK-44380">SPARK-44380</a></li>
<li>Expose TimestampNTZType in pyspark.sql.types <a href="https://issues.apache.org/jira/browse/SPARK-43759">SPARK-43759</a></li>
<li>Support nested timestamp type <a href="https://issues.apache.org/jira/browse/SPARK-43545">SPARK-43545</a></li>
<li>Support UserDefinedType in createDataFrame from pandas DataFrame and toPandas [<a href="https://issues.apache.org/jira/browse/SPARK-43817">SPARK-43817</a>]<a href="https://issues.apache.org/jira/browse/SPARK-43702">SPARK-43702</a></li>
<li>Add descriptor binary option to Pyspark Protobuf API <a href="https://issues.apache.org/jira/browse/SPARK-43799">SPARK-43799</a></li>
<li>Accept generics tuple as typing hints of Pandas UDF <a href="https://issues.apache.org/jira/browse/SPARK-43886">SPARK-43886</a></li>
<li>Add array_prepend function <a href="https://issues.apache.org/jira/browse/SPARK-41233">SPARK-41233</a></li>
<li>Add assertDataFrameEqual util function <a href="https://issues.apache.org/jira/browse/SPARK-44061">SPARK-44061</a></li>
<li>Support arrow-optimized Python UDTFs <a href="https://issues.apache.org/jira/browse/SPARK-43964">SPARK-43964</a></li>
<li>Allow custom precision for fp approx equality <a href="https://issues.apache.org/jira/browse/SPARK-44217">SPARK-44217</a></li>
<li>Make assertSchemaEqual API public <a href="https://issues.apache.org/jira/browse/SPARK-44216">SPARK-44216</a></li>
<li>Support fill_value for ps.Series <a href="https://issues.apache.org/jira/browse/SPARK-42094">SPARK-42094</a></li>
<li>Support struct type in createDataFrame from pandas DataFrame <a href="https://issues.apache.org/jira/browse/SPARK-43473">SPARK-43473</a></li>
</ul>
<h4 id="other-notable-changes-1">Other Notable Changes</h4>
<ul>
<li>
<table>
<tbody>
<tr>
<td>Add autocomplete support for df[</td>
<td>] in pyspark.sql.dataframe.DataFrame [<a href="https://issues.apache.org/jira/browse/SPARK-43892">SPARK-43892</a>]</td>
</tr>
</tbody>
</table>
</li>
<li>Deprecate &amp; remove the APIs that will be removed in pandas 2.0 [<a href="https://issues.apache.org/jira/browse/SPARK-42593">SPARK-42593</a>]</li>
<li>Make Python the first tab for code examples - Spark SQL, DataFrames and Datasets Guide <a href="https://issues.apache.org/jira/browse/SPARK-42493">SPARK-42493</a></li>
<li>Updating remaining Spark documentation code examples to show Python by default <a href="https://issues.apache.org/jira/browse/SPARK-42642">SPARK-42642</a></li>
<li>Use deduplicated field names when creating Arrow RecordBatch [<a href="https://issues.apache.org/jira/browse/SPARK-41971">SPARK-41971</a>]</li>
<li>Support duplicated field names in createDataFrame with pandas DataFrame [<a href="https://issues.apache.org/jira/browse/SPARK-43528">SPARK-43528</a>]</li>
<li>Allow columns parameter when creating DataFrame with Series [<a href="https://issues.apache.org/jira/browse/SPARK-42194">SPARK-42194</a>]</li>
</ul>
<h3 id="core">Core</h3>
<ul>
<li>Schedule mergeFinalize when push merge shuffleMapStage retry but no running tasks <a href="https://issues.apache.org/jira/browse/SPARK-40082">SPARK-40082</a></li>
<li>Introduce PartitionEvaluator for SQL operator execution <a href="https://issues.apache.org/jira/browse/SPARK-43061">SPARK-43061</a></li>
<li>Allow ShuffleDriverComponent to declare if shuffle data is reliably stored <a href="https://issues.apache.org/jira/browse/SPARK-42689">SPARK-42689</a></li>
<li>Add max attempts limitation for stages to avoid potential infinite retry <a href="https://issues.apache.org/jira/browse/SPARK-42577">SPARK-42577</a></li>
<li>Support log level configuration with static Spark conf <a href="https://issues.apache.org/jira/browse/SPARK-43782">SPARK-43782</a></li>
<li>Optimize PercentileHeap <a href="https://issues.apache.org/jira/browse/SPARK-42528">SPARK-42528</a></li>
<li>Add reason argument to TaskScheduler.cancelTasks <a href="https://issues.apache.org/jira/browse/SPARK-42602">SPARK-42602</a></li>
<li>Avoid unnecessary task rerun on decommissioned executor lost if shuffle data migrated <a href="https://issues.apache.org/jira/browse/SPARK-41469">SPARK-41469</a></li>
<li>Fixing accumulator undercount in the case of the retry task with rdd cache <a href="https://issues.apache.org/jira/browse/SPARK-41497">SPARK-41497</a></li>
<li>Use RocksDB for spark.history.store.hybridStore.diskBackend by default <a href="https://issues.apache.org/jira/browse/SPARK-42277">SPARK-42277</a></li>
<li>Support spark.kubernetes.setSubmitTimeInDriver <a href="https://issues.apache.org/jira/browse/SPARK-43014">SPARK-43014</a></li>
<li>NonFateSharingCache wrapper for Guava Cache <a href="https://issues.apache.org/jira/browse/SPARK-43300">SPARK-43300</a></li>
<li>Improve the performance of MapOutputTracker.updateMapOutput <a href="https://issues.apache.org/jira/browse/SPARK-43043">SPARK-43043</a></li>
<li>Allowing apps to control whether their metadata gets saved in the db by the External Shuffle Service <a href="https://issues.apache.org/jira/browse/SPARK-43179">SPARK-43179</a></li>
<li>Port executor failure tracker from Spark on YARN to K8s <a href="https://issues.apache.org/jira/browse/SPARK-41210">SPARK-41210</a></li>
<li>Parameterize the max number of attempts for driver props fetcher in KubernetesExecutorBackend <a href="https://issues.apache.org/jira/browse/SPARK-42764">SPARK-42764</a></li>
<li>Add SPARK_DRIVER_POD_IP env variable to executor pods <a href="https://issues.apache.org/jira/browse/SPARK-42769">SPARK-42769</a></li>
<li>Mounts the hadoop config map on the executor pod <a href="https://issues.apache.org/jira/browse/SPARK-43504">SPARK-43504</a></li>
</ul>
<h3 id="structured-streaming">Structured Streaming</h3>
<ul>
<li>Add support for tracking pinned blocks memory usage for RocksDB state store <a href="https://issues.apache.org/jira/browse/SPARK-43120">SPARK-43120</a></li>
<li>Add RocksDB state store provider memory management enhancements <a href="https://issues.apache.org/jira/browse/SPARK-43311">SPARK-43311</a></li>
<li>Introduce dropDuplicatesWithinWatermark <a href="https://issues.apache.org/jira/browse/SPARK-42931">SPARK-42931</a></li>
<li>Introduce a new callback onQueryIdle() to StreamingQueryListener <a href="https://issues.apache.org/jira/browse/SPARK-43183">SPARK-43183</a></li>
<li>Add option to skip commit coordinator as part of StreamingWrite API for DSv2 sources/sinks <a href="https://issues.apache.org/jira/browse/SPARK-42968">SPARK-42968</a></li>
<li>Introduce a new callback &#8220;onQueryIdle&#8221; to StreamingQueryListener <a href="https://issues.apache.org/jira/browse/SPARK-43183">SPARK-43183</a></li>
<li>Implement Changelog based Checkpointing for RocksDB State Store Provider <a href="https://issues.apache.org/jira/browse/SPARK-43421">SPARK-43421</a></li>
<li>Add support for WRITE_FLUSH_BYTES for RocksDB used in streaming stateful operators <a href="https://issues.apache.org/jira/browse/SPARK-42792">SPARK-42792</a></li>
<li>Add support for setting max_write_buffer_number and write_buffer_size for RocksDB used in streaming <a href="https://issues.apache.org/jira/browse/SPARK-42819">SPARK-42819</a></li>
<li>RocksDB StateStore lock acquisition should happen after getting input iterator from inputRDD <a href="https://issues.apache.org/jira/browse/SPARK-42566">SPARK-42566</a></li>
<li>Introduce watermark propagation among operators <a href="https://issues.apache.org/jira/browse/SPARK-42376">SPARK-42376</a></li>
<li>Cleanup orphan sst and log files in RocksDB checkpoint directory <a href="https://issues.apache.org/jira/browse/SPARK-42353">SPARK-42353</a></li>
<li>Expand QueryTerminatedEvent to contain error class if it exists in exception <a href="https://issues.apache.org/jira/browse/SPARK-43482">SPARK-43482</a></li>
</ul>
<h3 id="ml">ML</h3>
<ul>
<li>Support Distributed Training of Functions Using Deepspeed <a href="https://issues.apache.org/jira/browse/SPARK-44264">SPARK-44264</a></li>
<li>Base interfaces of sparkML for spark3.5: estimator/transformer/model/evaluator <a href="https://issues.apache.org/jira/browse/SPARK-43516">SPARK-43516</a></li>
<li>Make MLv2 (ML on spark connect) supports pandas &gt;= 2.0 <a href="https://issues.apache.org/jira/browse/SPARK-43783">SPARK-43783</a></li>
<li>Update MLv2 Transformer interfaces <a href="https://issues.apache.org/jira/browse/SPARK-43516">SPARK-43516</a></li>
<li>New pyspark ML logistic regression estimator implemented on top of distributor <a href="https://issues.apache.org/jira/browse/SPARK-43097">SPARK-43097</a></li>
<li>Add Classifier.getNumClasses back <a href="https://issues.apache.org/jira/browse/SPARK-42526">SPARK-42526</a></li>
<li>Write a Deepspeed Distributed Learning Class DeepspeedTorchDistributor <a href="https://issues.apache.org/jira/browse/SPARK-44264">SPARK-44264</a></li>
<li>Basic saving / loading implementation for ML on spark connect <a href="https://issues.apache.org/jira/browse/SPARK-43981">SPARK-43981</a></li>
<li>Improve logistic regression model saving <a href="https://issues.apache.org/jira/browse/SPARK-43097">SPARK-43097</a></li>
<li>Implement pipeline estimator for ML on spark connect <a href="https://issues.apache.org/jira/browse/SPARK-43982">SPARK-43982</a></li>
<li>Implement cross validator estimator <a href="https://issues.apache.org/jira/browse/SPARK-43983">SPARK-43983</a></li>
<li>Implement classification evaluator <a href="https://issues.apache.org/jira/browse/SPARK-44250">SPARK-44250</a></li>
<li>Make PyTorch Distributor compatible with Spark Connect <a href="https://issues.apache.org/jira/browse/SPARK-42993">SPARK-42993</a></li>
</ul>
<h3 id="ui">UI</h3>
<ul>
<li>Add a Spark UI page for Spark Connect <a href="https://issues.apache.org/jira/browse/SPARK-44394">SPARK-44394</a></li>
<li>Support Heap Histogram column in Executors tab <a href="https://issues.apache.org/jira/browse/SPARK-44153">SPARK-44153</a></li>
<li>Show error message on UI for each failed query <a href="https://issues.apache.org/jira/browse/SPARK-44367">SPARK-44367</a></li>
<li>Display Add/Remove Time of Executors on Executors Tab <a href="https://issues.apache.org/jira/browse/SPARK-44309">SPARK-44309</a></li>
</ul>
<h3 id="build-and-others">Build and Others</h3>
<ul>
<li>Remove Python 3.7 Support <a href="https://issues.apache.org/jira/browse/SPARK-43347">SPARK-43347</a></li>
<li>Increate PyArrow minimum version to 4.0.0 <a href="https://issues.apache.org/jira/browse/SPARK-44183">SPARK-44183</a></li>
<li>Support R 4.3.1 <a href="https://issues.apache.org/jira/browse/SPARK-43447">SPARK-43447</a> <a href="https://issues.apache.org/jira/browse/SPARK-44192">SPARK-44192</a></li>
<li>Add JobTag APIs to SparkR SparkContext <a href="https://issues.apache.org/jira/browse/SPARK-44195">SPARK-44195</a></li>
<li>Add math functions to SparkR <a href="https://issues.apache.org/jira/browse/SPARK-44349">SPARK-44349</a></li>
<li>Upgrade Parquet to 1.13.1 <a href="https://issues.apache.org/jira/browse/SPARK-43519">SPARK-43519</a></li>
<li>Upgrade kubernetes-client to 6.7.2 <a href="https://issues.apache.org/jira/browse/SPARK-42362">SPARK-42362</a> <a href="https://issues.apache.org/jira/browse/SPARK-42761">SPARK-42761</a> <a href="https://issues.apache.org/jira/browse/SPARK-42885">SPARK-42885</a> <a href="https://issues.apache.org/jira/browse/SPARK-43355">SPARK-43355</a> <a href="https://issues.apache.org/jira/browse/SPARK-43581">SPARK-43581</a> <a href="https://issues.apache.org/jira/browse/SPARK-43950">SPARK-43950</a> <a href="https://issues.apache.org/jira/browse/SPARK-43990">SPARK-43990</a></li>
<li>Upgrade ASM to 9.5 <a href="https://issues.apache.org/jira/browse/SPARK-43537">SPARK-43537</a> <a href="https://issues.apache.org/jira/browse/SPARK-43588">SPARK-43588</a></li>
<li>Upgrade rocksdbjni to 8.3.2 <a href="https://issues.apache.org/jira/browse/SPARK-41569">SPARK-41569</a> <a href="https://issues.apache.org/jira/browse/SPARK-42718">SPARK-42718</a> <a href="https://issues.apache.org/jira/browse/SPARK-43007">SPARK-43007</a> <a href="https://issues.apache.org/jira/browse/SPARK-43436">SPARK-43436</a> <a href="https://issues.apache.org/jira/browse/SPARK-44256">SPARK-44256</a></li>
<li>Upgrade Netty to 4.1.93 <a href="https://issues.apache.org/jira/browse/SPARK-42218">SPARK-42218</a> <a href="https://issues.apache.org/jira/browse/SPARK-42417">SPARK-42417</a> <a href="https://issues.apache.org/jira/browse/SPARK-42487">SPARK-42487</a> <a href="https://issues.apache.org/jira/browse/SPARK-43609">SPARK-43609</a> <a href="https://issues.apache.org/jira/browse/SPARK-44128">SPARK-44128</a></li>
<li>Upgrade zstd-jni to 1.5.5-5 <a href="https://issues.apache.org/jira/browse/SPARK-42409">SPARK-42409</a> <a href="https://issues.apache.org/jira/browse/SPARK-42625">SPARK-42625</a> <a href="https://issues.apache.org/jira/browse/SPARK-43080">SPARK-43080</a> <a href="https://issues.apache.org/jira/browse/SPARK-43294">SPARK-43294</a> <a href="https://issues.apache.org/jira/browse/SPARK-43737">SPARK-43737</a> <a href="https://issues.apache.org/jira/browse/SPARK-43994">SPARK-43994</a> <a href="https://issues.apache.org/jira/browse/SPARK-44465">SPARK-44465</a></li>
<li>Upgrade dropwizard metrics 4.2.19 <a href="https://issues.apache.org/jira/browse/SPARK-42654">SPARK-42654</a> <a href="https://issues.apache.org/jira/browse/SPARK-43738">SPARK-43738</a> <a href="https://issues.apache.org/jira/browse/SPARK-44296">SPARK-44296</a></li>
<li>Upgrade gcs-connector to 2.2.14 <a href="https://issues.apache.org/jira/browse/SPARK-42888">SPARK-42888</a> <a href="https://issues.apache.org/jira/browse/SPARK-43842">SPARK-43842</a></li>
<li>Upgrade commons-crypto to 1.2.0 <a href="https://issues.apache.org/jira/browse/SPARK-42488">SPARK-42488</a></li>
<li>Upgrade scala-parser-combinators from 2.1.1 to 2.2.0 <a href="https://issues.apache.org/jira/browse/SPARK-42489">SPARK-42489</a></li>
<li>Upgrade protobuf-java to 3.23.4 <a href="https://issues.apache.org/jira/browse/SPARK-41711">SPARK-41711</a> <a href="https://issues.apache.org/jira/browse/SPARK-42490">SPARK-42490</a> <a href="https://issues.apache.org/jira/browse/SPARK-42798">SPARK-42798</a> <a href="https://issues.apache.org/jira/browse/SPARK-43899">SPARK-43899</a> <a href="https://issues.apache.org/jira/browse/SPARK-44382">SPARK-44382</a></li>
<li>Upgrade commons-codec to 1.16.0 <a href="https://issues.apache.org/jira/browse/SPARK-44151">SPARK-44151</a></li>
<li>Upgrade Apache Kafka to 3.4.1 <a href="https://issues.apache.org/jira/browse/SPARK-42396">SPARK-42396</a> <a href="https://issues.apache.org/jira/browse/SPARK-44181">SPARK-44181</a></li>
<li>Upgrade RoaringBitmap to 0.9.45 <a href="https://issues.apache.org/jira/browse/SPARK-42385">SPARK-42385</a> <a href="https://issues.apache.org/jira/browse/SPARK-43495">SPARK-43495</a> <a href="https://issues.apache.org/jira/browse/SPARK-44221">SPARK-44221</a></li>
<li>Update ORC to 1.9.0 <a href="https://issues.apache.org/jira/browse/SPARK-42820">SPARK-42820</a> <a href="https://issues.apache.org/jira/browse/SPARK-44053">SPARK-44053</a> <a href="https://issues.apache.org/jira/browse/SPARK-44231">SPARK-44231</a></li>
<li>Upgrade to Avro 1.11.2 <a href="https://issues.apache.org/jira/browse/SPARK-44277">SPARK-44277</a></li>
<li>Upgrade commons-compress to 1.23.0 <a href="https://issues.apache.org/jira/browse/SPARK-43102">SPARK-43102</a></li>
<li>Upgrade joda-time from 2.12.2 to 2.12.5 <a href="https://issues.apache.org/jira/browse/SPARK-43008">SPARK-43008</a></li>
<li>Upgrade snappy-java to 1.1.10.3 <a href="https://issues.apache.org/jira/browse/SPARK-42242">SPARK-42242</a> <a href="https://issues.apache.org/jira/browse/SPARK-43758">SPARK-43758</a> <a href="https://issues.apache.org/jira/browse/SPARK-44070">SPARK-44070</a> <a href="https://issues.apache.org/jira/browse/SPARK-44415">SPARK-44415</a> <a href="https://issues.apache.org/jira/browse/SPARK-44513">SPARK-44513</a></li>
<li>Upgrade mysql-connector-java from 8.0.31 to 8.0.32 <a href="https://issues.apache.org/jira/browse/SPARK-42717">SPARK-42717</a></li>
<li>Upgrade Apache Arrow to 12.0.1 <a href="https://issues.apache.org/jira/browse/SPARK-42161">SPARK-42161</a> <a href="https://issues.apache.org/jira/browse/SPARK-43446">SPARK-43446</a> <a href="https://issues.apache.org/jira/browse/SPARK-44094">SPARK-44094</a></li>
<li>Upgrade commons-io to 2.12.0 <a href="https://issues.apache.org/jira/browse/SPARK-43739">SPARK-43739</a></li>
<li>Upgrade Apache commons-io to 2.13.0 <a href="https://issues.apache.org/jira/browse/SPARK-43739">SPARK-43739</a> <a href="https://issues.apache.org/jira/browse/SPARK-44028">SPARK-44028</a></li>
<li>Upgrade FasterXML jackson to 2.15.2 <a href="https://issues.apache.org/jira/browse/SPARK-42354">SPARK-42354</a> <a href="https://issues.apache.org/jira/browse/SPARK-43774">SPARK-43774</a> <a href="https://issues.apache.org/jira/browse/SPARK-43904">SPARK-43904</a></li>
<li>Upgrade log4j2 to 2.20.0 <a href="https://issues.apache.org/jira/browse/SPARK-42536">SPARK-42536</a></li>
<li>Upgrade slf4j to 2.0.7 <a href="https://issues.apache.org/jira/browse/SPARK-42871">SPARK-42871</a></li>
<li>Upgrade numpy and pandas in the release Dockerfile <a href="https://issues.apache.org/jira/browse/SPARK-42524">SPARK-42524</a></li>
<li>Upgrade Jersey to 2.40 <a href="https://issues.apache.org/jira/browse/SPARK-44316">SPARK-44316</a></li>
<li>Upgrade H2 from 2.1.214 to 2.2.220 <a href="https://issues.apache.org/jira/browse/SPARK-44393">SPARK-44393</a></li>
<li>Upgrade optionator to ^0.9.3 <a href="https://issues.apache.org/jira/browse/SPARK-44279">SPARK-44279</a></li>
<li>Upgrade bcprov-jdk15on and bcpkix-jdk15on to 1.70 <a href="https://issues.apache.org/jira/browse/SPARK-44441">SPARK-44441</a></li>
<li>Upgrade mlflow to 2.3.1 <a href="https://issues.apache.org/jira/browse/SPARK-43344">SPARK-43344</a></li>
<li>Upgrade Tink to 1.9.0 <a href="https://issues.apache.org/jira/browse/SPARK-42780">SPARK-42780</a></li>
<li>Upgrade silencer to 1.7.13 <a href="https://issues.apache.org/jira/browse/SPARK-41787">SPARK-41787</a> <a href="https://issues.apache.org/jira/browse/SPARK-44031">SPARK-44031</a></li>
<li>Upgrade Ammonite to 2.5.9 <a href="https://issues.apache.org/jira/browse/SPARK-44041">SPARK-44041</a></li>
<li>Upgrade Scala to 2.12.18 <a href="https://issues.apache.org/jira/browse/SPARK-43832">SPARK-43832</a></li>
<li>Upgrade org.scalatestplus:selenium-4-4 to org.scalatestplus:selenium-4-7 <a href="https://issues.apache.org/jira/browse/SPARK-41587">SPARK-41587</a></li>
<li>Upgrade minimatch to 3.1.2 <a href="https://issues.apache.org/jira/browse/SPARK-41634">SPARK-41634</a></li>
<li>Upgrade sbt-assembly from 2.0.0 to 2.1.0 <a href="https://issues.apache.org/jira/browse/SPARK-41704">SPARK-41704</a></li>
<li>Update maven-checkstyle-plugin from 3.1.2 to 3.2.0 <a href="https://issues.apache.org/jira/browse/SPARK-41714">SPARK-41714</a></li>
<li>Upgrade dev.ludovic.netlib to 3.0.3 <a href="https://issues.apache.org/jira/browse/SPARK-41750">SPARK-41750</a></li>
<li>Upgrade hive-storage-api to 2.8.1 <a href="https://issues.apache.org/jira/browse/SPARK-41798">SPARK-41798</a></li>
<li>Upgrade Apache httpcore to 4.4.16 <a href="https://issues.apache.org/jira/browse/SPARK-41802">SPARK-41802</a></li>
<li>Upgrade jetty to 9.4.52.v20230823 <a href="https://issues.apache.org/jira/browse/SPARK-45052">SPARK-45052</a></li>
<li>Upgrade compress-lzf to 1.1.2 <a href="https://issues.apache.org/jira/browse/SPARK-42274">SPARK-42274</a></li>
</ul>
<h3 id="removals-behavior-changes-and-deprecations">Removals, Behavior Changes and Deprecations</h3>
<h4 id="upcoming-removal">Upcoming Removal</h4>
<p>The following features will be removed in the next Spark major release</p>
<ul>
<li>Support for Java 8 and Java 11, and the minimal supported Java version will be Java 17</li>
<li>Support for Scala 2.12, and the minimal supported Scala version will be 2.13</li>
</ul>
<h4 id="migration-guides">Migration Guides</h4>
<ul>
<li><a href="https://spark.apache.org/docs/3.5.0/core-migration-guide.html">Spark Core</a></li>
<li><a href="https://spark.apache.org/docs/3.5.0/sql-migration-guide.html">SQL, Datasets, and DataFrame</a></li>
<li><a href="https://spark.apache.org/docs/3.5.0/ss-migration-guide.html">Structured Streaming</a></li>
<li><a href="https://spark.apache.org/docs/3.5.0/ml-migration-guide.html">MLlib (Machine Learning)</a></li>
<li><a href="https://spark.apache.org/docs/3.5.0/api/python/migration_guide/pyspark_upgrade.html">PySpark (Python on Spark)</a></li>
<li><a href="https://spark.apache.org/docs/3.5.0/sparkr-migration-guide.html">SparkR (R on Spark)</a></li>
</ul>
<h3 id="credits">Credits</h3>
<p>Last but not least, this release would not have been possible without the following contributors:
Adam Binford, Ahmed Hussein, Alex Jing, Alice Sayutina, Alkis Evlogimenos, Allan Folting, Allison Portis, Allison Wang, Amanda Liu, Andrey Gubichev, Andy Grove, Anish Shrigondekar, Anton Okolnychyi, Bartosz Konieczny, Beishao Cao, Bjørn Jørgensen, Bo Zhang, Bruce Robbins, Chandni Singh, Chao Sun, Chaoqin Li, Cheng Pan, Christopher Cooper, Christopher Watford, ConeyLiu, Daniel Sparing, Daniel Tenedorio, David Lewis, Dongjoon Hyun, Emil Ejbyfeldt, Enrico Minack, Eric Marnadi, Eric Ogren, Erik Krogen, Eugene Gusev, Fei Wang, Fokko Driesprong, Frank Yin, Fu Chen, Gene Pang, Gengliang Wang, Gera Shegalov, Giambattista Bloisi, Guilhem Vuillier, Gurpreet Singh, Haejoon Lee, Harsh Motwani, Henry Mai, Herman Van Hovell, Hisoka-X, Holden Karau, Huanli Wang, Hyukjin Kwon, Ismaël Mejía, Ivan Sadikov, Jack Chen, James Lamb, Jason Li, Jerry Peng, Jia Fan, Jiaan Geng, JinHelin404, Joe Wang, Johan Lasperas, John Zhuge, Josh Rosen, Juliusz Sompolski, Jungtaek Lim, Kazuyuki Tanimura, Kent Yao, Khalid Mammadov, Koray Beyaz, Kris Mok, Kun Wan, Kwafoor, Lee Yang, Leibnizhu, Liang Yan, Liang-Chi Hsieh, Lingkai Kong, Luca Canali, Ludovic Henry, Manu Zhang, Martin Grund, Mathew Jacob, Max Gekk, Menelaos Karavelas, Michael Chen, Michael Zhang, Mingkang Li, Mridul Muralidharan, NarekDW, Navin Viswanath, Niranjan Jayakar, Ole Sasse, Parth Upadhyay, Peter Toth, Qi Tan, Raghu Angadi, Richard Yu, Ritika Maheshwari, Rob Reeves, Robert Dillitz, Rui Wang, Ruifeng Zheng, Runyao Chen, Ryan Berti, Ryan Johnson, Serge Rielau, Serge Smertin, Sergii Druzkin, Shaoyun Chen, Shreyesh Shaju Arangath, Shrikant Prasad, Shu Wang, Siying Dong, Steve Vaughan Jr, Steve Weis, Steven Aerts, Steven Chen, Stove-hust, Szehon Ho, Takuya UESHIN, Tengfei Huang, Terry Kim, Thejdeep Gudivada, Thomas Graves, Tim Nieradzik, Venki Korukanti, Vihang Karajgaonkar, Vinod KC, Vitalii Li, Vsevolod Stepanov, Wanqiang Ji, Warren Zhu, Wei Liu, Weichen Xu, Wen Yuen Pang, Wenchen Fan, Willi Raschkowski, William Hyun, Xiduo You, Xieming LI, Xingbo Jiang, Xinrong Meng, Xinyi Yu, Yang Jie, Yi Zhu, Yihong He, Yikf, Yikun Jiang, Yiqun Zhang, Yohahaha, Yuanjian Li, Yuming Wang, Zhen Li, Ziqi Liu, advancedxy, alexanderwu-db, amousavigourabi, bjornjorgensen, bogao007, bowenliang123, caican00, chong0929, clownxc, frankliee, haoyanzhang, hdaly0, huangxiaopingRD, jdesjean, jerqi, jwang0306, khalidmammadov, kings129, kylerong-db, liangyu-1, lucaspompeun, mcdull-zhang, panbingkun, pegasas, pengzhon-db, ronandoolan2, smallzhongfeng, srielau, stijndehaes, sudoliyang, ted-jenks, tianhanhu, vicennial, wForget, wayne-kyungwonpark, wayneguow, yabola, zeruibao, zhouyifan279, zml1206, zzzzming95</p>
<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>
</div>
<div class="col-12 col-md-3">
<div class="news" style="margin-bottom: 20px;">
<h5>Latest News</h5>
<ul class="list-unstyled">
<li><a href="/news/spark-3-4-3-released.html">Spark 3.4.3 released</a>
<span class="small">(Apr 18, 2024)</span></li>
<li><a href="/news/spark-3-5-1-released.html">Spark 3.5.1 released</a>
<span class="small">(Feb 23, 2024)</span></li>
<li><a href="/news/spark-3-3-4-released.html">Spark 3.3.4 released</a>
<span class="small">(Dec 16, 2023)</span></li>
<li><a href="/news/spark-3-4-2-released.html">Spark 3.4.2 released</a>
<span class="small">(Nov 30, 2023)</span></li>
</ul>
<p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
</div>
<div style="text-align:center; margin-bottom: 20px;">
<a href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png" style="max-width: 100%;"/>
</a>
</div>
<div class="hidden-xs hidden-sm">
<a href="/downloads.html" class="btn btn-cta btn-lg d-grid" style="margin-bottom: 30px;">
Download Spark
</a>
<p style="font-size: 16px; font-weight: 500; color: #555;">
Built-in Libraries:
</p>
<ul class="list-none">
<li><a href="/sql/">SQL and DataFrames</a></li>
<li><a href="/streaming/">Spark Streaming</a></li>
<li><a href="/mllib/">MLlib (machine learning)</a></li>
<li><a href="/graphx/">GraphX (graph)</a></li>
</ul>
<a href="/third-party-projects.html">Third-Party Projects</a>
</div>
</div>
</div>
<footer class="small">
<hr>
Apache Spark, Spark, Apache, the Apache feather logo, and the Apache Spark project logo are either registered
trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
See guidance on use of Apache Spark <a href="/trademarks.html">trademarks</a>.
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
Copyright &copy; 2018 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.
</footer>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>
</body>
</html>