blob: bba609d933620234ee6c6ba83e9dcbb3d226916b [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
Spark Release 3.3.1 | Apache Spark
</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="/css/custom.css" rel="stylesheet">
<!-- Code highlighter CSS -->
<link href="/css/pygments-default.css" rel="stylesheet">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4" style="background: #1D6890;">
<a class="navbar-brand" href="/">
<img src="/images/spark-logo-rev.svg" alt="" width="141" height="72">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarContent"
aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse col-md-12 col-lg-auto pt-4" id="navbarContent">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/downloads.html">Download</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="libraries" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Libraries
</a>
<ul class="dropdown-menu" aria-labelledby="libraries">
<li><a class="dropdown-item" href="/sql/">SQL and DataFrames</a></li>
<li><a class="dropdown-item" href="/spark-connect/">Spark Connect</a></li>
<li><a class="dropdown-item" href="/streaming/">Spark Streaming</a></li>
<li><a class="dropdown-item" href="/pandas-on-spark/">pandas on Spark</a></li>
<li><a class="dropdown-item" href="/mllib/">MLlib (machine learning)</a></li>
<li><a class="dropdown-item" href="/graphx/">GraphX (graph)</a></li>
<li>
<hr class="dropdown-divider">
</li>
<li><a class="dropdown-item" href="/third-party-projects.html">Third-Party Projects</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="documentation" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Documentation
</a>
<ul class="dropdown-menu" aria-labelledby="documentation">
<li><a class="dropdown-item" href="/docs/latest/">Latest Release</a></li>
<li><a class="dropdown-item" href="/documentation.html">Older Versions and Other Resources</a></li>
<li><a class="dropdown-item" href="/faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/examples.html">Examples</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="community" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Community
</a>
<ul class="dropdown-menu" aria-labelledby="community">
<li><a class="dropdown-item" href="/community.html">Mailing Lists &amp; Resources</a></li>
<li><a class="dropdown-item" href="/contributing.html">Contributing to Spark</a></li>
<li><a class="dropdown-item" href="/improvement-proposals.html">Improvement Proposals (SPIP)</a>
</li>
<li><a class="dropdown-item" href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a>
</li>
<li><a class="dropdown-item" href="/powered-by.html">Powered By</a></li>
<li><a class="dropdown-item" href="/committers.html">Project Committers</a></li>
<li><a class="dropdown-item" href="/history.html">Project History</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="developers" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Developers
</a>
<ul class="dropdown-menu" aria-labelledby="developers">
<li><a class="dropdown-item" href="/developer-tools.html">Useful Developer Tools</a></li>
<li><a class="dropdown-item" href="/versioning-policy.html">Versioning Policy</a></li>
<li><a class="dropdown-item" href="/release-process.html">Release Process</a></li>
<li><a class="dropdown-item" href="/security.html">Security</a></li>
</ul>
</li>
</ul>
<ul class="navbar-nav ml-auto">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="apacheFoundation" role="button"
data-bs-toggle="dropdown" aria-expanded="false">
Apache Software Foundation
</a>
<ul class="dropdown-menu" aria-labelledby="apacheFoundation">
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Homepage</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/">License</a></li>
<li><a class="dropdown-item"
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/events/current-event">Event</a></li>
</ul>
</li>
</ul>
</div>
</nav>
<div class="container">
<div class="row mt-4">
<div class="col-12 col-md-9">
<h2>Spark Release 3.3.1</h2>
<p>Spark 3.3.1 is a maintenance release containing stability fixes. This release is based on the branch-3.3 maintenance branch of Spark. We strongly recommend all 3.3 users to upgrade to this stable release.</p>
<h3 id="notable-changes">Notable changes</h3>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-35542">[SPARK-35542]</a>: Fix: Bucketizer created for multiple columns with parameters splitsArray, inputCols and outputCols can not be loaded after saving it</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-36057">[SPARK-36057]</a>: SPIP: Support Customized Kubernetes Schedulers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38034">[SPARK-38034]</a>: Optimize TransposeWindow rule</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38404">[SPARK-38404]</a>: Improve CTE resolution when a nested CTE references an outer CTE</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38614">[SPARK-38614]</a>: Don&#8217;t push down limit through window that&#8217;s using percent_rank</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38717">[SPARK-38717]</a>: Handle Hive&#8217;s bucket spec case preserving behaviour</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38796">[SPARK-38796]</a>: Update to_number and try_to_number functions to allow PR with positive numbers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39184">[SPARK-39184]</a>: Handle undersized result array in date and timestamp sequences</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39200">[SPARK-39200]</a>: Make Fallback Storage readFully on content</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39340">[SPARK-39340]</a>: DS v2 agg pushdown should allow dots in the name of top-level columns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39355">[SPARK-39355]</a>: Single column uses quoted to construct UnresolvedAttribute</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39419">[SPARK-39419]</a>: Fix ArraySort to throw an exception when the comparator returns null</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39447">[SPARK-39447]</a>: Avoid AssertionError in AdaptiveSparkPlanExec.doExecuteBroadcast</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39476">[SPARK-39476]</a>: Disable Unwrap cast optimize when casting from Long to Float/ Double or from Integer to Float</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39548">[SPARK-39548]</a>: CreateView Command with a window clause query hit a wrong window definition not found issue</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39570">[SPARK-39570]</a>: Inline table should allow expressions with alias</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39614">[SPARK-39614]</a>: K8s pod name follows DNS Subdomain Names rule</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39633">[SPARK-39633]</a>: Support timestamp in seconds for TimeTravel using Dataframe options</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39647">[SPARK-39647]</a>: Register the executor with ESS before registering the BlockManager</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39650">[SPARK-39650]</a>: Fix incorrect value schema in streaming deduplication with backward compatibility</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39656">[SPARK-39656]</a>: Fix wrong namespace in DescribeNamespaceExec</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39657">[SPARK-39657]</a>: YARN AM client should call the non-static setTokensConf method</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39672">[SPARK-39672]</a>: Fix removing project before filter with correlated subquery</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39758">[SPARK-39758]</a>: Fix NPE from the regexp functions on invalid patterns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39775">[SPARK-39775]</a>: Disable validate default values when parsing Avro schemas</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39806">[SPARK-39806]</a>: Accessing _metadata on partitioned table can crash a query</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39833">[SPARK-39833]</a>: Disable Parquet column index in DSv1 to fix a correctness issue in the case of overlapping partition and data columns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39835">[SPARK-39835]</a>: Fix EliminateSorts remove global sort below the local sort</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39839">[SPARK-39839]</a>: Handle special case of null variable-length Decimal with non-zero offsetAndSize in UnsafeRow structural integrity check</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39847">[SPARK-39847]</a>: Fix race condition in RocksDBLoader.loadLibrary() if caller thread is interrupted</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39857">[SPARK-39857]</a>: V2ExpressionBuilder uses the wrong LiteralValue data type for In predicate</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39867">[SPARK-39867]</a>: Global limit should not inherit OrderPreservingUnaryNode</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39887">[SPARK-39887]</a>: RemoveRedundantAliases should keep aliases that make the output of projection nodes unique</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39896">[SPARK-39896]</a>: UnwrapCastInBinaryComparison should work when the literal of In/InSet downcast failed</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39900">[SPARK-39900]</a>: Address partial or negated condition in binary format&#8217;s predicate pushdown</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39911">[SPARK-39911]</a>: Optimize global Sort to RepartitionByExpression</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39915">[SPARK-39915]</a>: Dataset.repartition(N) may not create N partitions Non-AQE part</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39915">[SPARK-39915]</a>: Ensure the output partitioning is user-specified in AQE</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39932">[SPARK-39932]</a>: WindowExec should clear the final partition buffer</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39951">[SPARK-39951]</a>: Update Parquet V2 columnar check for nested fields</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39952">[SPARK-39952]</a>: SaveIntoDataSourceCommand should recache result relation</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39962">[SPARK-39962]</a>: Apply projection when group attributes are empty</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39976">[SPARK-39976]</a>: ArrayIntersect should handle null in left expression correctly</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40002">[SPARK-40002]</a>: Don&#8217;t push down limit through window using ntile</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40065">[SPARK-40065]</a>: Mount ConfigMap on executors with non-default profile as well</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40079">[SPARK-40079]</a>: Add Imputer inputCols validation for empty input case</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40089">[SPARK-40089]</a>: Fix sorting for some Decimal types</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40117">[SPARK-40117]</a>: Convert condition to java in DataFrameWriterV2.overwrite</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40121">[SPARK-40121]</a>: Initialize projection used for Python UDF</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40132">[SPARK-40132]</a>: Restore rawPredictionCol to MultilayerPerceptronClassifier.setParams</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40149">[SPARK-40149]</a>: Propagate metadata columns through Project</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40152">[SPARK-40152]</a>: Fix split_part codegen compilation issue</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40169">[SPARK-40169]</a>: Don&#8217;t pushdown Parquet filters with no reference to data schema</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40212">[SPARK-40212]</a>: SparkSQL castPartValue does not properly handle byte, short, or float</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40213">[SPARK-40213]</a>: Support ASCII value conversion for Latin-1 characters</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40218">[SPARK-40218]</a>: GROUPING SETS should preserve the grouping columns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40228">[SPARK-40228]</a>: Do not simplify multiLike if child is not a cheap expression</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40247">[SPARK-40247]</a>: Fix BitSet equality check</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40280">[SPARK-40280]</a>: Add support for parquet push down for annotated int and long</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40297">[SPARK-40297]</a>: CTE outer reference nested in CTE main body cannot be resolved</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40362">[SPARK-40362]</a>: Fix BinaryComparison canonicalization</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40380">[SPARK-40380]</a>: Fix constant-folding of InvokeLike to avoid non-serializable literal embedded in the plan</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40385">[SPARK-40385]</a>: Fix interpreted path for companion object constructor</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40389">[SPARK-40389]</a>: Decimals can&#8217;t upcast as integral types if the cast can overflow</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40468">[SPARK-40468]</a>: Fix column pruning in CSV when _corrupt_record is selected</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40508">[SPARK-40508]</a>: Treat unknown partitioning as UnknownPartitioning</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40535">[SPARK-40535]</a>: Fix bug the buffer of AggregatingAccumulator will not be created if the input rows is empty</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40562">[SPARK-40562]</a>: Add <code class="language-plaintext highlighter-rouge">spark.sql.legacy.groupingIdWithAppendedUserGroupBy</code></li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40612">[SPARK-40612]</a>: Fixing the principal used for delegation token renewal on non-YARN resource managers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40660">[SPARK-40660]</a>: Switch to XORShiftRandom to distribute elements</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40703">[SPARK-40703]</a>: Introduce shuffle on SinglePartition to improve parallelism</li>
</ul>
<h3 id="dependency-changes">Dependency Changes</h3>
<p>While being a maintence release we did still upgrade some dependencies in this release they are:</p>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39947">[SPARK-39947]</a>: Upgrade Jersey to 2.36</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40134">[SPARK-40134]</a>: Update ORC to 1.7.6</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40326">[SPARK-40326]</a>: Upgrade fasterxml.jackson.version to 2.13.4</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39725">[SPARK-39725]</a>: Upgrade jetty to 9.4.48.v20220622</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40782">[SPARK-40782]</a>: Upgrade jackson-databind to 2.13.4.1</li>
</ul>
<p>You can consult JIRA for the <a href="https://s.apache.org/spark-3.3.1">detailed changes</a>.</p>
<p>We would like to acknowledge all community members for contributing patches to this release.</p>
<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>
</div>
<div class="col-12 col-md-3">
<div class="news" style="margin-bottom: 20px;">
<h5>Latest News</h5>
<ul class="list-unstyled">
<li><a href="/news/spark-3-4-3-released.html">Spark 3.4.3 released</a>
<span class="small">(Apr 18, 2024)</span></li>
<li><a href="/news/spark-3-5-1-released.html">Spark 3.5.1 released</a>
<span class="small">(Feb 23, 2024)</span></li>
<li><a href="/news/spark-3-3-4-released.html">Spark 3.3.4 released</a>
<span class="small">(Dec 16, 2023)</span></li>
<li><a href="/news/spark-3-4-2-released.html">Spark 3.4.2 released</a>
<span class="small">(Nov 30, 2023)</span></li>
</ul>
<p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
</div>
<div style="text-align:center; margin-bottom: 20px;">
<a href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png" style="max-width: 100%;"/>
</a>
</div>
<div class="hidden-xs hidden-sm">
<a href="/downloads.html" class="btn btn-cta btn-lg d-grid" style="margin-bottom: 30px;">
Download Spark
</a>
<p style="font-size: 16px; font-weight: 500; color: #555;">
Built-in Libraries:
</p>
<ul class="list-none">
<li><a href="/sql/">SQL and DataFrames</a></li>
<li><a href="/streaming/">Spark Streaming</a></li>
<li><a href="/mllib/">MLlib (machine learning)</a></li>
<li><a href="/graphx/">GraphX (graph)</a></li>
</ul>
<a href="/third-party-projects.html">Third-Party Projects</a>
</div>
</div>
</div>
<footer class="small">
<hr>
Apache Spark, Spark, Apache, the Apache feather logo, and the Apache Spark project logo are either registered
trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
See guidance on use of Apache Spark <a href="/trademarks.html">trademarks</a>.
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
Copyright &copy; 2018 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.
</footer>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>
</body>
</html>