blob: df3d99adfd7319ecee17d8143766b6b1e5d6de5c [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
Spark Release 2.4.8 | Apache Spark
</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="/css/custom.css" rel="stylesheet">
<!-- Code highlighter CSS -->
<link href="/css/pygments-default.css" rel="stylesheet">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4" style="background: #1D6890;">
<a class="navbar-brand" href="/">
<img src="/images/spark-logo-rev.svg" alt="" width="141" height="72">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarContent"
aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse col-md-12 col-lg-auto pt-4" id="navbarContent">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/downloads.html">Download</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="libraries" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Libraries
</a>
<ul class="dropdown-menu" aria-labelledby="libraries">
<li><a class="dropdown-item" href="/sql/">SQL and DataFrames</a></li>
<li><a class="dropdown-item" href="/spark-connect/">Spark Connect</a></li>
<li><a class="dropdown-item" href="/streaming/">Spark Streaming</a></li>
<li><a class="dropdown-item" href="/pandas-on-spark/">pandas on Spark</a></li>
<li><a class="dropdown-item" href="/mllib/">MLlib (machine learning)</a></li>
<li><a class="dropdown-item" href="/graphx/">GraphX (graph)</a></li>
<li>
<hr class="dropdown-divider">
</li>
<li><a class="dropdown-item" href="/third-party-projects.html">Third-Party Projects</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="documentation" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Documentation
</a>
<ul class="dropdown-menu" aria-labelledby="documentation">
<li><a class="dropdown-item" href="/docs/latest/">Latest Release</a></li>
<li><a class="dropdown-item" href="/documentation.html">Older Versions and Other Resources</a></li>
<li><a class="dropdown-item" href="/faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/examples.html">Examples</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="community" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Community
</a>
<ul class="dropdown-menu" aria-labelledby="community">
<li><a class="dropdown-item" href="/community.html">Mailing Lists &amp; Resources</a></li>
<li><a class="dropdown-item" href="/contributing.html">Contributing to Spark</a></li>
<li><a class="dropdown-item" href="/improvement-proposals.html">Improvement Proposals (SPIP)</a>
</li>
<li><a class="dropdown-item" href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a>
</li>
<li><a class="dropdown-item" href="/powered-by.html">Powered By</a></li>
<li><a class="dropdown-item" href="/committers.html">Project Committers</a></li>
<li><a class="dropdown-item" href="/history.html">Project History</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="developers" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Developers
</a>
<ul class="dropdown-menu" aria-labelledby="developers">
<li><a class="dropdown-item" href="/developer-tools.html">Useful Developer Tools</a></li>
<li><a class="dropdown-item" href="/versioning-policy.html">Versioning Policy</a></li>
<li><a class="dropdown-item" href="/release-process.html">Release Process</a></li>
<li><a class="dropdown-item" href="/security.html">Security</a></li>
</ul>
</li>
</ul>
<ul class="navbar-nav ml-auto">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="apacheFoundation" role="button"
data-bs-toggle="dropdown" aria-expanded="false">
Apache Software Foundation
</a>
<ul class="dropdown-menu" aria-labelledby="apacheFoundation">
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Homepage</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/">License</a></li>
<li><a class="dropdown-item"
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/events/current-event">Event</a></li>
</ul>
</li>
</ul>
</div>
</nav>
<div class="container">
<div class="row mt-4">
<div class="col-12 col-md-9">
<h2>Spark Release 2.4.8</h2>
<p>Spark 2.4.8 is a maintenance release containing stability, correctness, and security fixes. This release is based on the branch-2.4 maintenance branch of Spark. We strongly recommend all 2.4 users to upgrade to this stable release.</p>
<h3 id="notable-changes">Notable changes</h3>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-21492">[SPARK-21492]</a>: Fix memory leak in SortMergeJoin</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-25271">[SPARK-25271]</a>: Creating parquet table with all the column null throws exception</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-26625">[SPARK-26625]</a>: spark.redaction.regex should include oauthToken</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-26645">[SPARK-26645]</a>: CSV infer schema bug infers decimal(9,-1)</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-27575">[SPARK-27575]</a>: Spark overwrites existing value of spark.yarn.dist.* instead of merging value</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-27872">[SPARK-27872]</a>: Driver and executors use a different service account breaking pull secrets</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-29574">[SPARK-29574]</a>: spark with user provided hadoop doesn&#8217;t work on kubernetes</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-30201">[SPARK-30201]</a>: HiveOutputWriter standardOI should use ObjectInspectorCopyOption.DEFAULT</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32635">[SPARK-32635]</a>: When pyspark.sql.functions.lit() function is used with dataframe cache, it returns wrong result</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32708">[SPARK-32708]</a>: Query optimization fails to reuse exchange with DataSourceV2</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32715">[SPARK-32715]</a>: Broadcast block pieces may memory leak</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32738">[SPARK-32738]</a>: thread safe endpoints may hang due to fatal error</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32794">[SPARK-32794]</a>: Rare corner case error in micro-batch engine with some stateful queries + no-data-batches + V1 streaming sources</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32815">[SPARK-32815]</a>: Fix LibSVM data source loading error on file paths with glob metacharacters</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32836">[SPARK-32836]</a>: Fix DataStreamReaderWriterSuite to check writer options correctly</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32872">[SPARK-32872]</a>: BytesToBytesMap at MAX_CAPACITY exceeds growth threshold</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32900">[SPARK-32900]</a>: UnsafeExternalSorter.SpillableIterator cannot spill when there are NULLs in the input and radix sorting is used.</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32901">[SPARK-32901]</a>: UnsafeExternalSorter may cause a SparkOutOfMemoryError to be thrown while spilling</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32908">[SPARK-32908]</a>: percentile_approx() returns incorrect results</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32999">[SPARK-32999]</a>: TreeNode.nodeName should not throw malformed class name error</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33094">[SPARK-33094]</a>: ORC format does not propagate Hadoop config from DS options to underlying HDFS file system</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33101">[SPARK-33101]</a>: LibSVM format does not propagate Hadoop config from DS options to underlying HDFS file system</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33131">[SPARK-33131]</a>: Fix grouping sets with having clause can not resolve qualified col name</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33136">[SPARK-33136]</a>: Handling nullability for complex types is broken during resolution of V2 write command</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33183">[SPARK-33183]</a>: Bug in optimizer rule EliminateSorts</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33230">[SPARK-33230]</a>: FileOutputWriter jobs have duplicate JobIDs if launched in same second</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33268">[SPARK-33268]</a>: Fix bugs for casting data from/to PythonUserDefinedType</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33277">[SPARK-33277]</a>: Python/Pandas UDF right after off-heap vectorized reader could cause executor crash.</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33292">[SPARK-33292]</a>: Make Literal ArrayBasedMapData string representation disambiguous</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33338">[SPARK-33338]</a>: GROUP BY using literal map should not fail</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33339">[SPARK-33339]</a>: Pyspark application will hang due to non Exception</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33372">[SPARK-33372]</a>: Fix InSet bucket pruning</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33472">[SPARK-33472]</a>: IllegalArgumentException when applying RemoveRedundantSorts before EnsureRequirements</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33588">[SPARK-33588]</a>: Partition spec in SHOW TABLE EXTENDED doesn&#8217;t respect <code class="language-plaintext highlighter-rouge">spark.sql.caseSensitive</code></li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33593">[SPARK-33593]</a>: Vector reader got incorrect data with binary partition value</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33726">[SPARK-33726]</a>: Duplicate field names causes wrong answers during aggregation</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33733">[SPARK-33733]</a>: PullOutNondeterministic should check and collect deterministic field</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33756">[SPARK-33756]</a>: BytesToBytesMap&#8217;s iterator hasNext method should be idempotent.</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34125">[SPARK-34125]</a>: Make EventLoggingListener.codecMap thread-safe</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34187">[SPARK-34187]</a>: Use available offset range obtained during polling when checking offset validation</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34212">[SPARK-34212]</a>: For parquet table, after changing the precision and scale of decimal type in hive, spark reads incorrect value</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34229">[SPARK-34229]</a>: Avro should read decimal values with the file schema</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34260">[SPARK-34260]</a>: UnresolvedException when creating temp view twice</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34273">[SPARK-34273]</a>: Do not reregister BlockManager when SparkContext is stopped</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34318">[SPARK-34318]</a>: Dataset.colRegex should work with column names and qualifiers which contain newlines</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34327">[SPARK-34327]</a>: Omit inlining passwords during build process.</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34596">[SPARK-34596]</a>: NewInstance.doGenCode should not throw malformed class name error</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34607">[SPARK-34607]</a>: NewInstance.resolved should not throw malformed class name error</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34724">[SPARK-34724]</a>: Fix Interpreted evaluation by using getClass.getMethod instead of getDeclaredMethod</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34726">[SPARK-34726]</a>: Fix collectToPython timeouts</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34776">[SPARK-34776]</a>: Catalyst error on on certain struct operation (Couldn&#8217;t find <em>gen_alias</em>)</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34811">[SPARK-34811]</a>: Redact fs.s3a.access.key like secret and token</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34855">[SPARK-34855]</a>: SparkContext - avoid using local lazy val</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34876">[SPARK-34876]</a>: Non-nullable aggregates can return NULL in a correlated subquery</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34909">[SPARK-34909]</a>: conv() does not convert negative inputs to unsigned correctly</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34939">[SPARK-34939]</a>: Throw fetch failure exception when unable to deserialize broadcasted map statuses</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34963">[SPARK-34963]</a>: Nested column pruning fails to extract case-insensitive struct field from array</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-35080">[SPARK-35080]</a>: Correlated subqueries with equality predicates can return wrong results</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-35278">[SPARK-35278]</a>: Invoke should find the method with correct number of parameters</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-35288">[SPARK-35288]</a>: StaticInvoke should find the method without exact argument classes match</li>
</ul>
<h3 id="dependency-changes">Dependency Changes</h3>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-30228">[SPARK-30228]</a>: Update zstd-jni to 1.4.4-3</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33831">[SPARK-33831]</a>: Update Jetty to 9.4.34</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33333">[SPARK-33333]</a>: Upgrade Jetty to 9.4.28.v20200408</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33405">[SPARK-33405]</a>: Upgrade commons-compress to 1.20</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-33725">[SPARK-33725]</a>: Upgrade snappy-java to 1.1.8.2</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34449">[SPARK-34449]</a>: Upgrade Jetty to fix CVE-2020-27218</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-34988">[SPARK-34988]</a>: Upgrade Jetty for CVE-2021-28165</li>
</ul>
<h3 id="known-issues">Known issues</h3>
<p>You can consult JIRA for the <a href="https://s.apache.org/spark-2.4.8">detailed changes</a>.</p>
<p>We would like to acknowledge all community members for contributing patches to this release.</p>
<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>
</div>
<div class="col-12 col-md-3">
<div class="news" style="margin-bottom: 20px;">
<h5>Latest News</h5>
<ul class="list-unstyled">
<li><a href="/news/spark-3-4-3-released.html">Spark 3.4.3 released</a>
<span class="small">(Apr 18, 2024)</span></li>
<li><a href="/news/spark-3-5-1-released.html">Spark 3.5.1 released</a>
<span class="small">(Feb 23, 2024)</span></li>
<li><a href="/news/spark-3-3-4-released.html">Spark 3.3.4 released</a>
<span class="small">(Dec 16, 2023)</span></li>
<li><a href="/news/spark-3-4-2-released.html">Spark 3.4.2 released</a>
<span class="small">(Nov 30, 2023)</span></li>
</ul>
<p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
</div>
<div style="text-align:center; margin-bottom: 20px;">
<a href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png" style="max-width: 100%;"/>
</a>
</div>
<div class="hidden-xs hidden-sm">
<a href="/downloads.html" class="btn btn-cta btn-lg d-grid" style="margin-bottom: 30px;">
Download Spark
</a>
<p style="font-size: 16px; font-weight: 500; color: #555;">
Built-in Libraries:
</p>
<ul class="list-none">
<li><a href="/sql/">SQL and DataFrames</a></li>
<li><a href="/streaming/">Spark Streaming</a></li>
<li><a href="/mllib/">MLlib (machine learning)</a></li>
<li><a href="/graphx/">GraphX (graph)</a></li>
</ul>
<a href="/third-party-projects.html">Third-Party Projects</a>
</div>
</div>
</div>
<footer class="small">
<hr>
Apache Spark, Spark, Apache, the Apache feather logo, and the Apache Spark project logo are either registered
trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
See guidance on use of Apache Spark <a href="/trademarks.html">trademarks</a>.
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
Copyright &copy; 2018 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.
</footer>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>
</body>
</html>