blob: c757eeb3964740b8220b201b3e8a4d1a40177da0 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
Spark Release 0.9.0 | Apache Spark
</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="/css/custom.css" rel="stylesheet">
<!-- Code highlighter CSS -->
<link href="/css/pygments-default.css" rel="stylesheet">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4" style="background: #1D6890;">
<a class="navbar-brand" href="/">
<img src="/images/spark-logo-rev.svg" alt="" width="141" height="72">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarContent"
aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse col-md-12 col-lg-auto pt-4" id="navbarContent">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/downloads.html">Download</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="libraries" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Libraries
</a>
<ul class="dropdown-menu" aria-labelledby="libraries">
<li><a class="dropdown-item" href="/sql/">SQL and DataFrames</a></li>
<li><a class="dropdown-item" href="/spark-connect/">Spark Connect</a></li>
<li><a class="dropdown-item" href="/streaming/">Spark Streaming</a></li>
<li><a class="dropdown-item" href="/pandas-on-spark/">pandas on Spark</a></li>
<li><a class="dropdown-item" href="/mllib/">MLlib (machine learning)</a></li>
<li><a class="dropdown-item" href="/graphx/">GraphX (graph)</a></li>
<li>
<hr class="dropdown-divider">
</li>
<li><a class="dropdown-item" href="/third-party-projects.html">Third-Party Projects</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="documentation" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Documentation
</a>
<ul class="dropdown-menu" aria-labelledby="documentation">
<li><a class="dropdown-item" href="/docs/latest/">Latest Release</a></li>
<li><a class="dropdown-item" href="/documentation.html">Older Versions and Other Resources</a></li>
<li><a class="dropdown-item" href="/faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/examples.html">Examples</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="community" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Community
</a>
<ul class="dropdown-menu" aria-labelledby="community">
<li><a class="dropdown-item" href="/community.html">Mailing Lists &amp; Resources</a></li>
<li><a class="dropdown-item" href="/contributing.html">Contributing to Spark</a></li>
<li><a class="dropdown-item" href="/improvement-proposals.html">Improvement Proposals (SPIP)</a>
</li>
<li><a class="dropdown-item" href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a>
</li>
<li><a class="dropdown-item" href="/powered-by.html">Powered By</a></li>
<li><a class="dropdown-item" href="/committers.html">Project Committers</a></li>
<li><a class="dropdown-item" href="/history.html">Project History</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="developers" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Developers
</a>
<ul class="dropdown-menu" aria-labelledby="developers">
<li><a class="dropdown-item" href="/developer-tools.html">Useful Developer Tools</a></li>
<li><a class="dropdown-item" href="/versioning-policy.html">Versioning Policy</a></li>
<li><a class="dropdown-item" href="/release-process.html">Release Process</a></li>
<li><a class="dropdown-item" href="/security.html">Security</a></li>
</ul>
</li>
</ul>
<ul class="navbar-nav ml-auto">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="apacheFoundation" role="button"
data-bs-toggle="dropdown" aria-expanded="false">
Apache Software Foundation
</a>
<ul class="dropdown-menu" aria-labelledby="apacheFoundation">
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Homepage</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/">License</a></li>
<li><a class="dropdown-item"
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/events/current-event">Event</a></li>
</ul>
</li>
</ul>
</div>
</nav>
<div class="container">
<div class="row mt-4">
<div class="col-12 col-md-9">
<h2>Spark Release 0.9.0</h2>
<p>Spark 0.9.0 is a major release that adds significant new features. It updates Spark to Scala 2.10, simplifies high availability, and updates numerous components of the project. This release includes a first version of <a href="/graphx/">GraphX</a>, a powerful new framework for graph processing that comes with a library of standard algorithms. In addition, <a href="/streaming/">Spark Streaming</a> is now out of alpha, and includes significant optimizations and simplified high availability deployment.</p>
<p>You can download Spark 0.9.0 as either a
<a href="http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating.tgz" onclick="trackOutboundLink(this, 'Release Download Links', 'cloudfront_spark-0.9.0-incubating.tgz'); return false;">source package</a>
(5 MB tgz) or a prebuilt package for
<a href="http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating-bin-hadoop1.tgz" onclick="trackOutboundLink(this, 'Release Download Links', 'cloudfront_spark-0.9.0-incubating-bin-hadoop1.tgz'); return false;">Hadoop 1 / CDH3</a>,
<a href="http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating-bin-cdh4.tgz" onclick="trackOutboundLink(this, 'Release Download Links', 'cloudfront_spark-0.9.0-incubating-bin-cdh4.tgz'); return false;">CDH4</a>, or
<a href="http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating-bin-hadoop2.tgz" onclick="trackOutboundLink(this, 'Release Download Links', 'cloudfront_spark-0.9.0-incubating-bin-hadoop2.tgz'); return false;">Hadoop 2 / CDH5 / HDP2</a>
(160 MB tgz). Release signatures and checksums are available at the official <a href="http://www.apache.org/dist/incubator/spark/spark-0.9.0-incubating/">Apache download site</a>.</p>
<h3 id="scala-210-support">Scala 2.10 Support</h3>
<p>Spark now runs on Scala 2.10, letting users benefit from the language and library improvements in this version.</p>
<h3 id="configuration-system">Configuration System</h3>
<p>The new <a href="/docs/latest/api/core/index.html#org.apache.spark.SparkConf">SparkConf</a> class is now the preferred way to configure advanced settings on your SparkContext, though the previous Java system property method still works. SparkConf is especially useful in tests to make sure properties don’t stay set across tests.</p>
<h3 id="spark-streaming-improvements">Spark Streaming Improvements</h3>
<p>Spark Streaming is now out of alpha, and comes with simplified high availability and several optimizations.</p>
<ul>
<li>When running on a Spark standalone cluster with the <a href="/docs/0.9.0/spark-standalone.html#high-availability">standalone cluster high availability mode</a>, you can submit a Spark Streaming driver application to the cluster and have it automatically recovered if either the driver or the cluster master crashes.</li>
<li>Windowed operators have been sped up by 30-50%.</li>
<li>Spark Streaming’s input source plugins (e.g. for Twitter, Kafka and Flume) are now separate Maven modules, making it easier to pull in only the dependencies you need.</li>
<li>A new <a href="/docs/0.9.0/api/streaming/index.html#org.apache.spark.streaming.scheduler.StreamingListener">StreamingListener</a> interface has been added for monitoring statistics about the streaming computation.</li>
<li>A few aspects of the API have been improved:
<ul>
<li><code class="language-plaintext highlighter-rouge">DStream</code> and <code class="language-plaintext highlighter-rouge">PairDStream</code> classes have been moved from <code class="language-plaintext highlighter-rouge">org.apache.spark.streaming</code> to <code class="language-plaintext highlighter-rouge">org.apache.spark.streaming.dstream</code> to keep it consistent with <code class="language-plaintext highlighter-rouge">org.apache.spark.rdd.RDD</code>.</li>
<li><code class="language-plaintext highlighter-rouge">DStream.foreach</code> has been renamed to <code class="language-plaintext highlighter-rouge">foreachRDD</code> to make it explicit that it works for every RDD, not every element</li>
<li><code class="language-plaintext highlighter-rouge">StreamingContext.awaitTermination()</code> allows you wait for context shutdown and catch any exception that occurs in the streaming computation.
*<code class="language-plaintext highlighter-rouge">StreamingContext.stop()</code> now allows stopping of StreamingContext without stopping the underlying SparkContext.</li>
</ul>
</li>
</ul>
<h3 id="graphx-alpha">GraphX Alpha</h3>
<p><a href="/graphx/">GraphX</a> is a new framework for graph processing that uses recent advances in graph-parallel computation. It lets you build a graph within a Spark program using the standard Spark operators, then process it with new graph operators that are optimized for distributed computation. It includes <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.Graph">basic transformations</a>, a <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.Pregel$">Pregel API</a> for iterative computation, and a standard library of <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.util.GraphGenerators$">graph loaders</a> and <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.lib.package">analytics algorithms</a>. By offering these features <em>within</em> the Spark engine, GraphX can significantly speed up processing pipelines compared to workflows that use different engines.</p>
<p>GraphX features in this release include:</p>
<ul>
<li>Building graphs from arbitrary Spark RDDs</li>
<li>Basic operations to transform graphs or extract subgraphs</li>
<li>An optimized Pregel API that takes advantage of graph partitioning and indexing</li>
<li>Standard algorithms including <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.lib.PageRank$">PageRank</a>, <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.lib.ConnectedComponents$">connected components</a>, <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.lib.StronglyConnectedComponents$">strongly connected components</a>, <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.lib.SVDPlusPlus$">SVD++</a>, and <a href="/docs/0.9.0/api/graphx/index.html#org.apache.spark.graphx.lib.TriangleCount$">triangle counting</a></li>
<li>Interactive use from the Spark shell</li>
</ul>
<p>GraphX is still marked as alpha in this first release, but we recommend for new users to use it instead of the more limited Bagel API.</p>
<h3 id="mllib-improvements">MLlib Improvements</h3>
<ul>
<li>Spark’s machine learning library (MLlib) is now <a href="/docs/0.9.0/mllib-guide.html#using-mllib-in-python">available in Python</a>, where it operates on NumPy data (currently requires Python 2.7 and NumPy 1.7)</li>
<li>A new algorithm has been added for <a href="/docs/0.9.0/api/mllib/index.html#org.apache.spark.mllib.classification.NaiveBayes">Naive Bayes classification</a></li>
<li>Alternating Least Squares models can now be used to predict ratings for multiple items in parallel</li>
<li>MLlib’s documentation was expanded to include more examples in Scala, Java and Python</li>
</ul>
<h3 id="python-changes">Python Changes</h3>
<ul>
<li>Python users can now use MLlib (requires Python 2.7 and NumPy 1.7)</li>
<li>PySpark now shows the call sites of running jobs in the Spark application UI (http://<driver>:4040), making it easy to see which part of your code is running</driver></li>
<li>IPython integration has been updated to work with newer versions</li>
</ul>
<h3 id="packaging">Packaging</h3>
<ul>
<li>Spark’s scripts have been organized into “bin” and “sbin” directories to make it easier to separate admin scripts from user ones and install Spark on standard Linux paths.</li>
<li>Log configuration has been improved so that Spark finds a default log4j.properties file if you don’t specify one.</li>
</ul>
<h3 id="core-engine">Core Engine</h3>
<ul>
<li>Spark’s standalone mode now supports submitting a driver program to run on the cluster instead of on the external machine submitting it. You can access this functionality through the <a href="/docs/0.9.0/spark-standalone.html#launching-applications-inside-the-cluster">org.apache.spark.deploy.Client</a> class.</li>
<li>Large reduce operations now automatically spill data to disk if it does not fit in memory.</li>
<li>Users of standalone mode can now limit how many cores an application will use by default if the application writer didn’t configure its size. Previously, such applications took all available cores on the cluster.</li>
<li><code class="language-plaintext highlighter-rouge">spark-shell</code> now supports the <code class="language-plaintext highlighter-rouge">-i</code> option to run a script on startup.</li>
<li>New <code class="language-plaintext highlighter-rouge">histogram</code> and <code class="language-plaintext highlighter-rouge">countDistinctApprox</code> operators have been added for working with numerical data.</li>
<li>YARN mode now supports distributing extra files with the application, and several bugs have been fixed.</li>
</ul>
<h3 id="compatibility">Compatibility</h3>
<p>This release is compatible with the previous APIs in stable components, but several language versions and script locations have changed.</p>
<ul>
<li>Scala programs now need to use Scala 2.10 instead of 2.9.</li>
<li>Scripts such as <code class="language-plaintext highlighter-rouge">spark-shell</code> and <code class="language-plaintext highlighter-rouge">pyspark</code> have been moved into the <code class="language-plaintext highlighter-rouge">bin</code> folder, while administrative scripts to start and stop standalone clusters have been moved into <code class="language-plaintext highlighter-rouge">sbin</code>.</li>
<li>Spark Streaming’s API has been changed to move external input sources into separate modules, <code class="language-plaintext highlighter-rouge">DStream</code> and <code class="language-plaintext highlighter-rouge">PairDStream</code> has been moved to package <code class="language-plaintext highlighter-rouge">org.apache.spark.streaming.dstream</code> and <code class="language-plaintext highlighter-rouge">DStream.foreach</code> has been renamed to <code class="language-plaintext highlighter-rouge">foreachRDD</code>. We expect the current API to be stable now that Spark Streaming is out of alpha.</li>
<li>While the old method of configuring Spark through Java system properties still works, we recommend that users update to the new [SparkConf], which is easier to inspect and use.</li>
</ul>
<p>We expect all of the current APIs and script locations in Spark 0.9 to remain stable when we release Spark 1.0. We wanted to make these updates early to give users a chance to switch to the new API.</p>
<h3 id="contributors">Contributors</h3>
<p>The following developers contributed to this release:</p>
<ul>
<li>Andrew Ash &#8211; documentation improvements</li>
<li>Pierre Borckmans &#8211; documentation fix</li>
<li>Russell Cardullo &#8211; graphite sink for metrics</li>
<li>Evan Chan &#8211; local:// URI feature</li>
<li>Vadim Chekan &#8211; bug fix</li>
<li>Lian Cheng &#8211; refactoring and code clean-up in several locations, bug fixes</li>
<li>Ewen Cheslack-Postava &#8211; Spark EC2 and PySpark improvements</li>
<li>Mosharaf Chowdhury &#8211; optimized broadcast</li>
<li>Dan Crankshaw &#8211; GraphX contributions</li>
<li>Haider Haidi &#8211; documentation fix</li>
<li>Frank Dai &#8211; Naive Bayes classifier in MLlib, documentation improvements</li>
<li>Tathagata Das &#8211; new operators, fixes, and improvements to Spark Streaming (lead)</li>
<li>Ankur Dave &#8211; GraphX contributions</li>
<li>Henry Davidge &#8211; warning for large tasks</li>
<li>Aaron Davidson &#8211; shuffle file consolidation, H/A mode for standalone scheduler, various improvements and fixes</li>
<li>Kyle Ellrott &#8211; GraphX contributions</li>
<li>Hossein Falaki &#8211; new statistical operators, Scala and Python examples in MLlib</li>
<li>Harvey Feng &#8211; hadoop file optimizations and YARN integration</li>
<li>Ali Ghodsi &#8211; support for SIMR</li>
<li>Joseph E. Gonzalez &#8211; GraphX contributions</li>
<li>Thomas Graves &#8211; fixes and improvements for YARN support (lead)</li>
<li>Rong Gu &#8211; documentation fix</li>
<li>Stephen Haberman &#8211; bug fixes</li>
<li>Walker Hamilton &#8211; bug fix</li>
<li>Mark Hamstra &#8211; scheduler improvements and fixes, build fixes</li>
<li>Damien Hardy &#8211; Debian build fix</li>
<li>Nathan Howell &#8211; sbt upgrade</li>
<li>Grace Huang &#8211; improvements to metrics code</li>
<li>Shane Huang &#8211; separation of admin and user scripts:</li>
<li>Prabeesh K &#8211; MQTT integration for Spark Streaming and code fix</li>
<li>Holden Karau &#8211; sbt build improvements and Java API extensions</li>
<li>KarthikTunga &#8211; bug fix</li>
<li>Grega Kespret &#8211; bug fix</li>
<li>Marek Kolodziej &#8211; optimized random number generator</li>
<li>Jey Kottalam &#8211; EC2 script improvements</li>
<li>Du Li &#8211; bug fixes</li>
<li>Haoyuan Li &#8211; tachyon support in EC2</li>
<li>LiGuoqiang &#8211; fixes to build and YARN integration</li>
<li>Raymond Liu &#8211; build improvement and various fixes for YARN support</li>
<li>George Loentiev &#8211; Maven build fixes</li>
<li>Akihiro Matsukawa &#8211; GraphX contributions</li>
<li>David McCauley &#8211; improvements to json endpoint</li>
<li>Mike &#8211; bug fixes</li>
<li>Fabrizio (Misto) Milo &#8211; bug fix</li>
<li>Mridul Muralidharan &#8211; speculation improvements, several bug fixes</li>
<li>Tor Myklebust &#8211; Python mllib bindings, instrumentation for task serialization</li>
<li>Sundeep Narravula &#8211; bug fix</li>
<li>Binh Nguyen &#8211; Java API improvements and version upgrades</li>
<li>Adam Novak &#8211; bug fix</li>
<li>Andrew Or &#8211; external sorting</li>
<li>Kay Ousterhout &#8211; several bug fixes and improvements to Spark scheduler</li>
<li>Sean Owen &#8211; style fixes</li>
<li>Nick Pentreath &#8211; ALS implicit feedback algorithm</li>
<li>Pillis &#8211; <code class="language-plaintext highlighter-rouge">Vector.random()</code> method</li>
<li>Imran Rashid &#8211; bug fix</li>
<li>Ahir Reddy &#8211; support for SIMR</li>
<li>Luca Rosellini &#8211; script loading for Scala shell</li>
<li>Josh Rosen &#8211; fixes, clean-up, and extensions to scala and Java API’s</li>
<li>Henry Saputra &#8211; style improvements and clean-up</li>
<li>Andre Schumacher &#8211; Python improvements and bug fixes</li>
<li>Jerry Shao &#8211; multi-user support, various fixes and improvements</li>
<li>Prashant Sharma &#8211; Scala 2.10 support, configuration system, several smaller fixes</li>
<li>Shiyun &#8211; style fix</li>
<li>Wangda Tan &#8211; UI improvement and bug fixes</li>
<li>Matthew Taylor &#8211; bug fix</li>
<li>Jyun-Fan Tsai &#8211; documentation fix</li>
<li>Takuya Ueshin &#8211; bug fix</li>
<li>Shivaram Venkataraman &#8211; sbt build optimization, EC2 improvements, Java and Python API</li>
<li>Jianping J Wang &#8211; GraphX contributions</li>
<li>Martin Weindel &#8211; build fix</li>
<li>Patrick Wendell &#8211; standalone driver submission, various fixes, release manager</li>
<li>Neal Wiggins &#8211; bug fix</li>
<li>Andrew Xia &#8211; bug fixes and code cleanup</li>
<li>Reynold Xin &#8211; GraphX contributions, task killing, various fixes, improvements and optimizations</li>
<li>Dong Yan &#8211; bug fix</li>
<li>Haitao Yao &#8211; bug fix</li>
<li>Xusen Yin &#8211; bug fix</li>
<li>Fengdong Yu &#8211; documentation fixes</li>
<li>Matei Zaharia &#8211; new configuration system, Python MLlib bindings, scheduler improvements, various fixes and optimizations</li>
<li>Wu Zeming &#8211; bug fix</li>
<li>Nan Zhu &#8211; documentation improvements</li>
</ul>
<p><em>Thanks to everyone who contributed!</em></p>
<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>
</div>
<div class="col-12 col-md-3">
<div class="news" style="margin-bottom: 20px;">
<h5>Latest News</h5>
<ul class="list-unstyled">
<li><a href="/news/spark-3-4-3-released.html">Spark 3.4.3 released</a>
<span class="small">(Apr 18, 2024)</span></li>
<li><a href="/news/spark-3-5-1-released.html">Spark 3.5.1 released</a>
<span class="small">(Feb 23, 2024)</span></li>
<li><a href="/news/spark-3-3-4-released.html">Spark 3.3.4 released</a>
<span class="small">(Dec 16, 2023)</span></li>
<li><a href="/news/spark-3-4-2-released.html">Spark 3.4.2 released</a>
<span class="small">(Nov 30, 2023)</span></li>
</ul>
<p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
</div>
<div style="text-align:center; margin-bottom: 20px;">
<a href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png" style="max-width: 100%;"/>
</a>
</div>
<div class="hidden-xs hidden-sm">
<a href="/downloads.html" class="btn btn-cta btn-lg d-grid" style="margin-bottom: 30px;">
Download Spark
</a>
<p style="font-size: 16px; font-weight: 500; color: #555;">
Built-in Libraries:
</p>
<ul class="list-none">
<li><a href="/sql/">SQL and DataFrames</a></li>
<li><a href="/streaming/">Spark Streaming</a></li>
<li><a href="/mllib/">MLlib (machine learning)</a></li>
<li><a href="/graphx/">GraphX (graph)</a></li>
</ul>
<a href="/third-party-projects.html">Third-Party Projects</a>
</div>
</div>
</div>
<footer class="small">
<hr>
Apache Spark, Spark, Apache, the Apache feather logo, and the Apache Spark project logo are either registered
trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
See guidance on use of Apache Spark <a href="/trademarks.html">trademarks</a>.
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
Copyright &copy; 2018 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.
</footer>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
crossorigin="anonymous"></script>
<script src="https://code.jquery.com/jquery.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>
</body>
</html>