blob: a94dfbcfe823a78d314c2fbabf75401a718badef [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
Spark Release 3.2.3 | Apache Spark
</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="/css/custom.css" rel="stylesheet">
<!-- Code highlighter CSS -->
<link href="/css/pygments-default.css" rel="stylesheet">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4" style="background: #1D6890;">
<a class="navbar-brand" href="/">
<img src="/images/spark-logo-rev.svg" alt="" width="141" height="72">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarContent"
aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse col-md-12 col-lg-auto pt-4" id="navbarContent">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/downloads.html">Download</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="libraries" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Libraries
</a>
<ul class="dropdown-menu" aria-labelledby="libraries">
<li><a class="dropdown-item" href="/sql/">SQL and DataFrames</a></li>
<li><a class="dropdown-item" href="/spark-connect/">Spark Connect</a></li>
<li><a class="dropdown-item" href="/streaming/">Spark Streaming</a></li>
<li><a class="dropdown-item" href="/pandas-on-spark/">pandas on Spark</a></li>
<li><a class="dropdown-item" href="/mllib/">MLlib (machine learning)</a></li>
<li><a class="dropdown-item" href="/graphx/">GraphX (graph)</a></li>
<li>
<hr class="dropdown-divider">
</li>
<li><a class="dropdown-item" href="/third-party-projects.html">Third-Party Projects</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="documentation" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Documentation
</a>
<ul class="dropdown-menu" aria-labelledby="documentation">
<li><a class="dropdown-item" href="/docs/latest/">Latest Release</a></li>
<li><a class="dropdown-item" href="/documentation.html">Older Versions and Other Resources</a></li>
<li><a class="dropdown-item" href="/faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/examples.html">Examples</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="community" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Community
</a>
<ul class="dropdown-menu" aria-labelledby="community">
<li><a class="dropdown-item" href="/community.html">Mailing Lists &amp; Resources</a></li>
<li><a class="dropdown-item" href="/contributing.html">Contributing to Spark</a></li>
<li><a class="dropdown-item" href="/improvement-proposals.html">Improvement Proposals (SPIP)</a>
</li>
<li><a class="dropdown-item" href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a>
</li>
<li><a class="dropdown-item" href="/powered-by.html">Powered By</a></li>
<li><a class="dropdown-item" href="/committers.html">Project Committers</a></li>
<li><a class="dropdown-item" href="/history.html">Project History</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="developers" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Developers
</a>
<ul class="dropdown-menu" aria-labelledby="developers">
<li><a class="dropdown-item" href="/developer-tools.html">Useful Developer Tools</a></li>
<li><a class="dropdown-item" href="/versioning-policy.html">Versioning Policy</a></li>
<li><a class="dropdown-item" href="/release-process.html">Release Process</a></li>
<li><a class="dropdown-item" href="/security.html">Security</a></li>
</ul>
</li>
</ul>
<ul class="navbar-nav ml-auto">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="apacheFoundation" role="button"
data-bs-toggle="dropdown" aria-expanded="false">
Apache Software Foundation
</a>
<ul class="dropdown-menu" aria-labelledby="apacheFoundation">
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Homepage</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/">License</a></li>
<li><a class="dropdown-item"
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/events/current-event">Event</a></li>
</ul>
</li>
</ul>
</div>
</nav>
<div class="container">
<div class="row mt-4">
<div class="col-12 col-md-9">
<h2>Spark Release 3.2.3</h2>
<p>Spark 3.2.3 is a maintenance release containing stability fixes. This release is based on the branch-3.2 maintenance branch of Spark. We strongly recommend all 3.2 users to upgrade to this stable release.</p>
<h3 id="notable-changes">Notable changes</h3>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38697">[SPARK-38697]</a>: Extend SparkSessionExtensions to inject rules into AQE Optimizer</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39200">[SPARK-39200]</a>: Stream is corrupted Exception while fetching the blocks from fallback storage system</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-8731">[SPARK-8731]</a>: Beeline doesn&#8217;t work with -e option when started in background</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32380">[SPARK-32380]</a>: sparksql cannot access hive table while data in hbase</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-35542">[SPARK-35542]</a>: Bucketizer created for multiple columns with parameters splitsArray, inputCols and outputCols can not be loaded after saving it.</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39184">[SPARK-39184]</a>: ArrayIndexOutOfBoundsException for some date/time sequences in some time-zones</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39647">[SPARK-39647]</a>: Block push fails with java.lang.IllegalArgumentException: Active local dirs list has not been updated by any executor registration even when the NodeManager hasn&#8217;t been restarted</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39775">[SPARK-39775]</a>: Regression due to AVRO-2035</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39833">[SPARK-39833]</a>: Filtered parquet data frame count() and show() produce inconsistent results when spark.sql.parquet.filterPushdown is true</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39835">[SPARK-39835]</a>: Fix EliminateSorts remove global sort below the local sort</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39839">[SPARK-39839]</a>: Handle special case of null variable-length Decimal with non-zero offsetAndSize in UnsafeRow structural integrity check</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39847">[SPARK-39847]</a>: Race condition related to interruption of task threads while they are in RocksDBLoader.loadLibrary()</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39867">[SPARK-39867]</a>: Global limit should not inherit OrderPreservingUnaryNode</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39887">[SPARK-39887]</a>: Expression transform error</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39900">[SPARK-39900]</a>: Issue with querying dataframe produced by &#8216;binaryFile&#8217; format using &#8216;not&#8217; operator</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39932">[SPARK-39932]</a>: WindowExec should clear the final partition buffer</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39952">[SPARK-39952]</a>: SaveIntoDataSourceCommand should recache result relation</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39962">[SPARK-39962]</a>: Global aggregation against pandas aggregate UDF does not take the column order into account</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39965">[SPARK-39965]</a>: Skip PVC cleanup when driver doesn&#8217;t own PVCs</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39972">[SPARK-39972]</a>: Revert the test case of SPARK-39962 in branch-3.2 and branch-3.1</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40002">[SPARK-40002]</a>: Limit improperly pushed down through window using ntile function</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40065">[SPARK-40065]</a>: Executor ConfigMap is not mounted if profile is not default</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40079">[SPARK-40079]</a>: Add Imputer inputCols validation for empty input case</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40089">[SPARK-40089]</a>: Sorting of at least Decimal(20, 2) fails for some values near the max.</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40117">[SPARK-40117]</a>: Convert condition to java in DataFrameWriterV2.overwrite</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40121">[SPARK-40121]</a>: Initialize projection used for Python UDF</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40124">[SPARK-40124]</a>: Update TPCDS v1.4 q32 for Plan Stability tests</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40149">[SPARK-40149]</a>: Star expansion after outer join asymmetrically includes joining key</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40169">[SPARK-40169]</a>: Fix the issue with Parquet column index and predicate pushdown in Data source V1</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40212">[SPARK-40212]</a>: SparkSQL castPartValue does not properly handle byte &amp; short</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40218">[SPARK-40218]</a>: GROUPING SETS should preserve the grouping columns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40270">[SPARK-40270]</a>: Make compute.max_rows as None working in DataFrame.style</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40280">[SPARK-40280]</a>: Failure to create parquet predicate push down for ints and longs on some valid files</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40315">[SPARK-40315]</a>: Non-deterministic hashCode() calculations for ArrayBasedMapData on equal objects</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40407">[SPARK-40407]</a>: Repartition of DataFrame can result in severe data skew in some special case</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40459">[SPARK-40459]</a>: recoverDiskStore should not stop by existing recomputed files</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40470">[SPARK-40470]</a>: arrays_zip output unexpected alias column names when using GetMapValue and GetArrayStructFields</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40493">[SPARK-40493]</a>: Revert &#8220;[SPARK-33861][SQL] Simplify conditional in predicate&#8221;</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40562">[SPARK-40562]</a>: Add spark.sql.legacy.groupingIdWithAppendedUserGroupBy</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40583">[SPARK-40583]</a>: Documentation error in &#8220;Integration with Cloud Infrastructures&#8221;</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40588">[SPARK-40588]</a>: Sorting issue with partitioned-writing and AQE turned on</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40612">[SPARK-40612]</a>: On Kubernetes for long running app Spark using an invalid principal to renew the delegation token</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40636">[SPARK-40636]</a>: Fix wrong remained shuffles log in BlockManagerDecommissioner</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40660">[SPARK-40660]</a>: Switch to XORShiftRandom to distribute elements</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40829">[SPARK-40829]</a>: STORED AS serde in CREATE TABLE LIKE view does not work</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40851">[SPARK-40851]</a>: TimestampFormatter behavior changed when using the latest Java 8/11/17</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40869">[SPARK-40869]</a>: KubernetesConf.getResourceNamePrefix creates invalid name prefixes</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40874">[SPARK-40874]</a>: Fix broadcasts in Python UDFs when encryption is enabled</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40902">[SPARK-40902]</a>: Quick submission of drivers in tests to mesos scheduler results in dropping drivers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40963">[SPARK-40963]</a>: ExtractGenerator sets incorrect nullability in new Project</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41035">[SPARK-41035]</a>: Incorrect results or NPE when a literal is reused across distinct aggregations</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41091">[SPARK-41091]</a>: Fix Docker release tool for branch-3.2</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41188">[SPARK-41188]</a>: Set executorEnv OMP_NUM_THREADS to be spark.task.cpus by default for spark executor JVM processes</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38034">[SPARK-38034]</a>: Optimize time complexity and extend applicable cases for TransposeWindow</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39831">[SPARK-39831]</a>: R dependencies installation start to fail after devtools_2.4.4 was released</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39879">[SPARK-39879]</a>: Reduce local-cluster memory configuration in BroadcastJoinSuite* and HiveSparkSubmitSuite</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40022">[SPARK-40022]</a>: YarnClusterSuite should not ABORTED when there is no Python3 environment</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40241">[SPARK-40241]</a>: Correct the link of GenericUDTF</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40490">[SPARK-40490]</a>: <code class="language-plaintext highlighter-rouge">YarnShuffleIntegrationSuite</code> no longer verifies <code class="language-plaintext highlighter-rouge">registeredExecFile</code> reload after SPARK-17321</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40574">[SPARK-40574]</a>: Add PURGE to DROP TABLE doc</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40172">[SPARK-40172]</a>: Temporarily disable flaky test cases in ImageFileFormatSuite</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40461">[SPARK-40461]</a>: Set upperbound for pyzmq 24.0.0 for Python linter</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40213">[SPARK-40213]</a>: Incorrect ASCII value for Latin-1 Supplement characters</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40292">[SPARK-40292]</a>: arrays_zip output unexpected alias column names</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40043">[SPARK-40043]</a>: Document DataStreamWriter.toTable and DataStreamReader.table</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40983">[SPARK-40983]</a>: Remove Hadoop requirements for zstd mention in Parquet compression codec</li>
</ul>
<h3 id="dependency-changes">Dependency Changes</h3>
<p>While being a maintence release we did still upgrade some dependencies in this release they are:</p>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40801">[SPARK-40801]</a>: Upgrade Apache Commons Text to 1.10</li>
</ul>
<p>You can consult JIRA for the <a href="https://s.apache.org/spark-3.2.3">detailed changes</a>.</p>
<p>We would like to acknowledge all community members for contributing patches to this release.</p>
<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>
</div>
<div class="col-12 col-md-3">
<div class="news" style="margin-bottom: 20px;">
<h5>Latest News</h5>
<ul class="list-unstyled">
<li><a href="/news/spark-3-4-3-released.html">Spark 3.4.3 released</a>
<span class="small">(Apr 18, 2024)</span></li>
<li><a href="/news/spark-3-5-1-released.html">Spark 3.5.1 released</a>
<span class="small">(Feb 23, 2024)</span></li>
<li><a href="/news/spark-3-3-4-released.html">Spark 3.3.4 released</a>
<span class="small">(Dec 16, 2023)</span></li>
<li><a href="/news/spark-3-4-2-released.html">Spark 3.4.2 released</a>
<span class="small">(Nov 30, 2023)</span></li>
</ul>
<p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
</div>
<div style="text-align:center; margin-bottom: 20px;">
<a href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png" style="max-width: 100%;"/>
</a>
</div>
<div class="hidden-xs hidden-sm">
<a href="/downloads.html" class="btn btn-cta btn-lg d-grid" style="margin-bottom: 30px;">
Download Spark
</a>
<p style="font-size: 16px; font-weight: 500; color: #555;">
Built-in Libraries:
</p>
<ul class="list-none">
<li><a href="/sql/">SQL and DataFrames</a></li>
<li><a href="/streaming/">Spark Streaming</a></li>
<li><a href="/mllib/">MLlib (machine learning)</a></li>
<li><a href="/graphx/">GraphX (graph)</a></li>
</ul>
<a href="/third-party-projects.html">Third-Party Projects</a>
</div>
</div>
</div>
<footer class="small">
<hr>
Apache Spark, Spark, Apache, the Apache feather logo, and the Apache Spark project logo are either registered
trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
See guidance on use of Apache Spark <a href="/trademarks.html">trademarks</a>.
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
Copyright &copy; 2018 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.
</footer>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>
</body>
</html>