blob: 37de5b09841e434938686c89120448d12a5b3b05 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Spark Core &#8212; PySpark 3.2.3 documentation</title>
<link rel="stylesheet" href="../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/language_data.js"></script>
<script src="../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="pyspark.SparkContext" href="api/pyspark.SparkContext.html" />
<link rel="prev" title="Saveable" href="api/pyspark.mllib.util.Saveable.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../index.html">
<img src="../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../user_guide/index.html">User Guide</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../migration_guide/index.html">Migration Guide</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="">
<a href="pyspark.sql.html">Spark SQL</a>
</li>
<li class="">
<a href="pyspark.pandas/index.html">Pandas API on Spark</a>
</li>
<li class="">
<a href="pyspark.ss.html">Structured Streaming</a>
</li>
<li class="">
<a href="pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="pyspark.streaming.html">Spark Streaming</a>
</li>
<li class="">
<a href="pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="active">
<a href="">Spark Core</a>
</li>
<li class="">
<a href="pyspark.resource.html">Resource Management</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
<li class="nav-item toc-entry toc-h2">
<a href="#public-classes" class="nav-link">Public Classes</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#spark-context-apis" class="nav-link">Spark Context APIs</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#rdd-apis" class="nav-link">RDD APIs</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#broadcast-and-accumulator" class="nav-link">Broadcast and Accumulator</a>
</li>
<li class="nav-item toc-entry toc-h2">
<a href="#management" class="nav-link">Management</a>
</li>
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="spark-core">
<h1>Spark Core<a class="headerlink" href="#spark-core" title="Permalink to this headline"></a></h1>
<div class="section" id="public-classes">
<h2>Public Classes<a class="headerlink" href="#public-classes" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.html#pyspark.SparkContext" title="pyspark.SparkContext"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext</span></code></a>([master, appName, sparkHome, …])</p></td>
<td><p>Main entry point for Spark functionality.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.html#pyspark.RDD" title="pyspark.RDD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD</span></code></a>(jrdd, ctx[, jrdd_deserializer])</p></td>
<td><p>A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.Broadcast.html#pyspark.Broadcast" title="pyspark.Broadcast"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Broadcast</span></code></a>([sc, value, pickle_registry, …])</p></td>
<td><p>A broadcast variable created with <a class="reference internal" href="api/pyspark.SparkContext.broadcast.html#pyspark.SparkContext.broadcast" title="pyspark.SparkContext.broadcast"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SparkContext.broadcast()</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.Accumulator.html#pyspark.Accumulator" title="pyspark.Accumulator"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Accumulator</span></code></a>(aid, value, accum_param)</p></td>
<td><p>A shared variable that can be accumulated, i.e., has a commutative and associative “add” operation.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.AccumulatorParam.html#pyspark.AccumulatorParam" title="pyspark.AccumulatorParam"><code class="xref py py-obj docutils literal notranslate"><span class="pre">AccumulatorParam</span></code></a></p></td>
<td><p>Helper object that defines how to accumulate values of a given type.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkConf.html#pyspark.SparkConf" title="pyspark.SparkConf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf</span></code></a>([loadDefaults, _jvm, _jconf])</p></td>
<td><p>Configuration for a Spark application.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkFiles.html#pyspark.SparkFiles" title="pyspark.SparkFiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkFiles</span></code></a>()</p></td>
<td><p>Resolves paths to files added through <a class="reference internal" href="api/pyspark.SparkContext.addFile.html#pyspark.SparkContext.addFile" title="pyspark.SparkContext.addFile"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SparkContext.addFile()</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.html#pyspark.StorageLevel" title="pyspark.StorageLevel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel</span></code></a>(useDisk, useMemory, useOffHeap, …)</p></td>
<td><p>Flags for controlling the storage of an RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.TaskContext.html#pyspark.TaskContext" title="pyspark.TaskContext"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext</span></code></a></p></td>
<td><p>Contextual information about a task which can be read or mutated during execution.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDDBarrier.html#pyspark.RDDBarrier" title="pyspark.RDDBarrier"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDDBarrier</span></code></a>(rdd)</p></td>
<td><p>Wraps an RDD in a barrier stage, which forces Spark to launch tasks of this stage together.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.html#pyspark.BarrierTaskContext" title="pyspark.BarrierTaskContext"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext</span></code></a></p></td>
<td><p>A <a class="reference internal" href="api/pyspark.TaskContext.html#pyspark.TaskContext" title="pyspark.TaskContext"><code class="xref py py-class docutils literal notranslate"><span class="pre">TaskContext</span></code></a> with extra contextual info and tooling for tasks in a barrier stage.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskInfo.html#pyspark.BarrierTaskInfo" title="pyspark.BarrierTaskInfo"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskInfo</span></code></a>(address)</p></td>
<td><p>Carries all task infos of a barrier task.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.InheritableThread.html#pyspark.InheritableThread" title="pyspark.InheritableThread"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InheritableThread</span></code></a>(target, *args, **kwargs)</p></td>
<td><p>Thread that is recommended to be used in PySpark instead of <code class="xref py py-class docutils literal notranslate"><span class="pre">threading.Thread</span></code> when the pinned thread mode is enabled.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.util.VersionUtils.html#pyspark.util.VersionUtils" title="pyspark.util.VersionUtils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">util.VersionUtils</span></code></a></p></td>
<td><p>Provides utility method to determine Spark versions with given input string.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="spark-context-apis">
<h2>Spark Context APIs<a class="headerlink" href="#spark-context-apis" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.PACKAGE_EXTENSIONS.html#pyspark.SparkContext.PACKAGE_EXTENSIONS" title="pyspark.SparkContext.PACKAGE_EXTENSIONS"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.PACKAGE_EXTENSIONS</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.accumulator.html#pyspark.SparkContext.accumulator" title="pyspark.SparkContext.accumulator"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.accumulator</span></code></a>(value[, accum_param])</p></td>
<td><p>Create an <a class="reference internal" href="api/pyspark.Accumulator.html#pyspark.Accumulator" title="pyspark.Accumulator"><code class="xref py py-class docutils literal notranslate"><span class="pre">Accumulator</span></code></a> with the given initial value, using a given <a class="reference internal" href="api/pyspark.AccumulatorParam.html#pyspark.AccumulatorParam" title="pyspark.AccumulatorParam"><code class="xref py py-class docutils literal notranslate"><span class="pre">AccumulatorParam</span></code></a> helper object to define how to add values of the data type if provided.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.addFile.html#pyspark.SparkContext.addFile" title="pyspark.SparkContext.addFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.addFile</span></code></a>(path[, recursive])</p></td>
<td><p>Add a file to be downloaded with this Spark job on every node.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.addPyFile.html#pyspark.SparkContext.addPyFile" title="pyspark.SparkContext.addPyFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.addPyFile</span></code></a>(path)</p></td>
<td><p>Add a .py or .zip dependency for all tasks to be executed on this SparkContext in the future.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.applicationId.html#pyspark.SparkContext.applicationId" title="pyspark.SparkContext.applicationId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.applicationId</span></code></a></p></td>
<td><p>A unique identifier for the Spark application.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.binaryFiles.html#pyspark.SparkContext.binaryFiles" title="pyspark.SparkContext.binaryFiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.binaryFiles</span></code></a>(path[, minPartitions])</p></td>
<td><p>Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.binaryRecords.html#pyspark.SparkContext.binaryRecords" title="pyspark.SparkContext.binaryRecords"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.binaryRecords</span></code></a>(path, recordLength)</p></td>
<td><p>Load data from a flat binary file, assuming each record is a set of numbers with the specified numerical format (see ByteBuffer), and the number of bytes per record is constant.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.broadcast.html#pyspark.SparkContext.broadcast" title="pyspark.SparkContext.broadcast"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.broadcast</span></code></a>(value)</p></td>
<td><p>Broadcast a read-only variable to the cluster, returning a <a class="reference internal" href="api/pyspark.Broadcast.html#pyspark.Broadcast" title="pyspark.Broadcast"><code class="xref py py-class docutils literal notranslate"><span class="pre">Broadcast</span></code></a> object for reading it in distributed functions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.cancelAllJobs.html#pyspark.SparkContext.cancelAllJobs" title="pyspark.SparkContext.cancelAllJobs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.cancelAllJobs</span></code></a>()</p></td>
<td><p>Cancel all jobs that have been scheduled or are running.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.cancelJobGroup.html#pyspark.SparkContext.cancelJobGroup" title="pyspark.SparkContext.cancelJobGroup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.cancelJobGroup</span></code></a>(groupId)</p></td>
<td><p>Cancel active jobs for the specified group.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.defaultMinPartitions.html#pyspark.SparkContext.defaultMinPartitions" title="pyspark.SparkContext.defaultMinPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.defaultMinPartitions</span></code></a></p></td>
<td><p>Default min number of partitions for Hadoop RDDs when not given by user</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.defaultParallelism.html#pyspark.SparkContext.defaultParallelism" title="pyspark.SparkContext.defaultParallelism"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.defaultParallelism</span></code></a></p></td>
<td><p>Default level of parallelism to use when not given by user (e.g.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.dump_profiles.html#pyspark.SparkContext.dump_profiles" title="pyspark.SparkContext.dump_profiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.dump_profiles</span></code></a>(path)</p></td>
<td><p>Dump the profile stats into directory <cite>path</cite></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.emptyRDD.html#pyspark.SparkContext.emptyRDD" title="pyspark.SparkContext.emptyRDD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.emptyRDD</span></code></a>()</p></td>
<td><p>Create an RDD that has no partitions or elements.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.getCheckpointDir.html#pyspark.SparkContext.getCheckpointDir" title="pyspark.SparkContext.getCheckpointDir"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.getCheckpointDir</span></code></a>()</p></td>
<td><p>Return the directory where RDDs are checkpointed.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.getConf.html#pyspark.SparkContext.getConf" title="pyspark.SparkContext.getConf"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.getConf</span></code></a>()</p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.getLocalProperty.html#pyspark.SparkContext.getLocalProperty" title="pyspark.SparkContext.getLocalProperty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.getLocalProperty</span></code></a>(key)</p></td>
<td><p>Get a local property set in this thread, or null if it is missing.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.getOrCreate.html#pyspark.SparkContext.getOrCreate" title="pyspark.SparkContext.getOrCreate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.getOrCreate</span></code></a>([conf])</p></td>
<td><p>Get or instantiate a SparkContext and register it as a singleton object.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.hadoopFile.html#pyspark.SparkContext.hadoopFile" title="pyspark.SparkContext.hadoopFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.hadoopFile</span></code></a>(path, …[, …])</p></td>
<td><p>Read an ‘old’ Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.hadoopRDD.html#pyspark.SparkContext.hadoopRDD" title="pyspark.SparkContext.hadoopRDD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.hadoopRDD</span></code></a>(inputFormatClass, …)</p></td>
<td><p>Read an ‘old’ Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.newAPIHadoopFile.html#pyspark.SparkContext.newAPIHadoopFile" title="pyspark.SparkContext.newAPIHadoopFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.newAPIHadoopFile</span></code></a>(path, …[, …])</p></td>
<td><p>Read a ‘new API’ Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.newAPIHadoopRDD.html#pyspark.SparkContext.newAPIHadoopRDD" title="pyspark.SparkContext.newAPIHadoopRDD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.newAPIHadoopRDD</span></code></a>(…[, …])</p></td>
<td><p>Read a ‘new API’ Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.parallelize.html#pyspark.SparkContext.parallelize" title="pyspark.SparkContext.parallelize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.parallelize</span></code></a>(c[, numSlices])</p></td>
<td><p>Distribute a local Python collection to form an RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.pickleFile.html#pyspark.SparkContext.pickleFile" title="pyspark.SparkContext.pickleFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.pickleFile</span></code></a>(name[, minPartitions])</p></td>
<td><p>Load an RDD previously saved using <a class="reference internal" href="api/pyspark.RDD.saveAsPickleFile.html#pyspark.RDD.saveAsPickleFile" title="pyspark.RDD.saveAsPickleFile"><code class="xref py py-meth docutils literal notranslate"><span class="pre">RDD.saveAsPickleFile()</span></code></a> method.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.range.html#pyspark.SparkContext.range" title="pyspark.SparkContext.range"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.range</span></code></a>(start[, end, step, numSlices])</p></td>
<td><p>Create a new RDD of int containing elements from <cite>start</cite> to <cite>end</cite> (exclusive), increased by <cite>step</cite> every element.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.resources.html#pyspark.SparkContext.resources" title="pyspark.SparkContext.resources"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.resources</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.runJob.html#pyspark.SparkContext.runJob" title="pyspark.SparkContext.runJob"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.runJob</span></code></a>(rdd, partitionFunc[, …])</p></td>
<td><p>Executes the given partitionFunc on the specified set of partitions, returning the result as an array of elements.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.sequenceFile.html#pyspark.SparkContext.sequenceFile" title="pyspark.SparkContext.sequenceFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.sequenceFile</span></code></a>(path[, keyClass, …])</p></td>
<td><p>Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.setCheckpointDir.html#pyspark.SparkContext.setCheckpointDir" title="pyspark.SparkContext.setCheckpointDir"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.setCheckpointDir</span></code></a>(dirName)</p></td>
<td><p>Set the directory under which RDDs are going to be checkpointed.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.setJobDescription.html#pyspark.SparkContext.setJobDescription" title="pyspark.SparkContext.setJobDescription"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.setJobDescription</span></code></a>(value)</p></td>
<td><p>Set a human readable description of the current job.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.setJobGroup.html#pyspark.SparkContext.setJobGroup" title="pyspark.SparkContext.setJobGroup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.setJobGroup</span></code></a>(groupId, description)</p></td>
<td><p>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.setLocalProperty.html#pyspark.SparkContext.setLocalProperty" title="pyspark.SparkContext.setLocalProperty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.setLocalProperty</span></code></a>(key, value)</p></td>
<td><p>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.setLogLevel.html#pyspark.SparkContext.setLogLevel" title="pyspark.SparkContext.setLogLevel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.setLogLevel</span></code></a>(logLevel)</p></td>
<td><p>Control our logLevel.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.setSystemProperty.html#pyspark.SparkContext.setSystemProperty" title="pyspark.SparkContext.setSystemProperty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.setSystemProperty</span></code></a>(key, value)</p></td>
<td><p>Set a Java system property, such as spark.executor.memory.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.show_profiles.html#pyspark.SparkContext.show_profiles" title="pyspark.SparkContext.show_profiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.show_profiles</span></code></a>()</p></td>
<td><p>Print the profile stats to stdout</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.sparkUser.html#pyspark.SparkContext.sparkUser" title="pyspark.SparkContext.sparkUser"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.sparkUser</span></code></a>()</p></td>
<td><p>Get SPARK_USER for user who is running SparkContext.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.startTime.html#pyspark.SparkContext.startTime" title="pyspark.SparkContext.startTime"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.startTime</span></code></a></p></td>
<td><p>Return the epoch time when the Spark Context was started.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.statusTracker.html#pyspark.SparkContext.statusTracker" title="pyspark.SparkContext.statusTracker"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.statusTracker</span></code></a>()</p></td>
<td><p>Return <code class="xref py py-class docutils literal notranslate"><span class="pre">StatusTracker</span></code> object</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.stop.html#pyspark.SparkContext.stop" title="pyspark.SparkContext.stop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.stop</span></code></a>()</p></td>
<td><p>Shut down the SparkContext.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.textFile.html#pyspark.SparkContext.textFile" title="pyspark.SparkContext.textFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.textFile</span></code></a>(name[, minPartitions, …])</p></td>
<td><p>Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.uiWebUrl.html#pyspark.SparkContext.uiWebUrl" title="pyspark.SparkContext.uiWebUrl"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.uiWebUrl</span></code></a></p></td>
<td><p>Return the URL of the SparkUI instance started by this SparkContext</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.union.html#pyspark.SparkContext.union" title="pyspark.SparkContext.union"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.union</span></code></a>(rdds)</p></td>
<td><p>Build the union of a list of RDDs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkContext.version.html#pyspark.SparkContext.version" title="pyspark.SparkContext.version"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.version</span></code></a></p></td>
<td><p>The version of Spark on which this application is running.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkContext.wholeTextFiles.html#pyspark.SparkContext.wholeTextFiles" title="pyspark.SparkContext.wholeTextFiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkContext.wholeTextFiles</span></code></a>(path[, …])</p></td>
<td><p>Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="rdd-apis">
<h2>RDD APIs<a class="headerlink" href="#rdd-apis" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.aggregate.html#pyspark.RDD.aggregate" title="pyspark.RDD.aggregate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.aggregate</span></code></a>(zeroValue, seqOp, combOp)</p></td>
<td><p>Aggregate the elements of each partition, and then the results for all the partitions, using a given combine functions and a neutral “zero value.”</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.aggregateByKey.html#pyspark.RDD.aggregateByKey" title="pyspark.RDD.aggregateByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.aggregateByKey</span></code></a>(zeroValue, seqFunc, combFunc)</p></td>
<td><p>Aggregate the values of each key, using given combine functions and a neutral “zero value”.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.barrier.html#pyspark.RDD.barrier" title="pyspark.RDD.barrier"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.barrier</span></code></a>()</p></td>
<td><p>Marks the current stage as a barrier stage, where Spark must launch all tasks together.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.cache.html#pyspark.RDD.cache" title="pyspark.RDD.cache"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.cache</span></code></a>()</p></td>
<td><p>Persist this RDD with the default storage level (<cite>MEMORY_ONLY</cite>).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.cartesian.html#pyspark.RDD.cartesian" title="pyspark.RDD.cartesian"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.cartesian</span></code></a>(other)</p></td>
<td><p>Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements <code class="docutils literal notranslate"><span class="pre">(a,</span> <span class="pre">b)</span></code> where <code class="docutils literal notranslate"><span class="pre">a</span></code> is in <cite>self</cite> and <code class="docutils literal notranslate"><span class="pre">b</span></code> is in <cite>other</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.checkpoint.html#pyspark.RDD.checkpoint" title="pyspark.RDD.checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.checkpoint</span></code></a>()</p></td>
<td><p>Mark this RDD for checkpointing.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.coalesce.html#pyspark.RDD.coalesce" title="pyspark.RDD.coalesce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.coalesce</span></code></a>(numPartitions[, shuffle])</p></td>
<td><p>Return a new RDD that is reduced into <cite>numPartitions</cite> partitions.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.cogroup.html#pyspark.RDD.cogroup" title="pyspark.RDD.cogroup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.cogroup</span></code></a>(other[, numPartitions])</p></td>
<td><p>For each key k in <cite>self</cite> or <cite>other</cite>, return a resulting RDD that contains a tuple with the list of values for that key in <cite>self</cite> as well as <cite>other</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.collect.html#pyspark.RDD.collect" title="pyspark.RDD.collect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.collect</span></code></a>()</p></td>
<td><p>Return a list that contains all of the elements in this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.collectAsMap.html#pyspark.RDD.collectAsMap" title="pyspark.RDD.collectAsMap"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.collectAsMap</span></code></a>()</p></td>
<td><p>Return the key-value pairs in this RDD to the master as a dictionary.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.collectWithJobGroup.html#pyspark.RDD.collectWithJobGroup" title="pyspark.RDD.collectWithJobGroup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.collectWithJobGroup</span></code></a>(groupId, description)</p></td>
<td><p>When collect rdd, use this method to specify job group.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.combineByKey.html#pyspark.RDD.combineByKey" title="pyspark.RDD.combineByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.combineByKey</span></code></a>(createCombiner, mergeValue, …)</p></td>
<td><p>Generic function to combine the elements for each key using a custom set of aggregation functions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.context.html#pyspark.RDD.context" title="pyspark.RDD.context"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.context</span></code></a></p></td>
<td><p>The <a class="reference internal" href="api/pyspark.SparkContext.html#pyspark.SparkContext" title="pyspark.SparkContext"><code class="xref py py-class docutils literal notranslate"><span class="pre">SparkContext</span></code></a> that this RDD was created on.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.count.html#pyspark.RDD.count" title="pyspark.RDD.count"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.count</span></code></a>()</p></td>
<td><p>Return the number of elements in this RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.countApprox.html#pyspark.RDD.countApprox" title="pyspark.RDD.countApprox"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.countApprox</span></code></a>(timeout[, confidence])</p></td>
<td><p>Approximate version of count() that returns a potentially incomplete result within a timeout, even if not all tasks have finished.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.countApproxDistinct.html#pyspark.RDD.countApproxDistinct" title="pyspark.RDD.countApproxDistinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.countApproxDistinct</span></code></a>([relativeSD])</p></td>
<td><p>Return approximate number of distinct elements in the RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.countByKey.html#pyspark.RDD.countByKey" title="pyspark.RDD.countByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.countByKey</span></code></a>()</p></td>
<td><p>Count the number of elements for each key, and return the result to the master as a dictionary.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.countByValue.html#pyspark.RDD.countByValue" title="pyspark.RDD.countByValue"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.countByValue</span></code></a>()</p></td>
<td><p>Return the count of each unique value in this RDD as a dictionary of (value, count) pairs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.distinct.html#pyspark.RDD.distinct" title="pyspark.RDD.distinct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.distinct</span></code></a>([numPartitions])</p></td>
<td><p>Return a new RDD containing the distinct elements in this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.filter.html#pyspark.RDD.filter" title="pyspark.RDD.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.filter</span></code></a>(f)</p></td>
<td><p>Return a new RDD containing only the elements that satisfy a predicate.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.first.html#pyspark.RDD.first" title="pyspark.RDD.first"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.first</span></code></a>()</p></td>
<td><p>Return the first element in this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.flatMap.html#pyspark.RDD.flatMap" title="pyspark.RDD.flatMap"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.flatMap</span></code></a>(f[, preservesPartitioning])</p></td>
<td><p>Return a new RDD by first applying a function to all elements of this RDD, and then flattening the results.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.flatMapValues.html#pyspark.RDD.flatMapValues" title="pyspark.RDD.flatMapValues"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.flatMapValues</span></code></a>(f)</p></td>
<td><p>Pass each value in the key-value pair RDD through a flatMap function without changing the keys; this also retains the original RDD’s partitioning.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.fold.html#pyspark.RDD.fold" title="pyspark.RDD.fold"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.fold</span></code></a>(zeroValue, op)</p></td>
<td><p>Aggregate the elements of each partition, and then the results for all the partitions, using a given associative function and a neutral “zero value.”</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.foldByKey.html#pyspark.RDD.foldByKey" title="pyspark.RDD.foldByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.foldByKey</span></code></a>(zeroValue, func[, …])</p></td>
<td><p>Merge the values for each key using an associative function “func” and a neutral “zeroValue” which may be added to the result an arbitrary number of times, and must not change the result (e.g., 0 for addition, or 1 for multiplication.).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.foreach.html#pyspark.RDD.foreach" title="pyspark.RDD.foreach"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.foreach</span></code></a>(f)</p></td>
<td><p>Applies a function to all elements of this RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.foreachPartition.html#pyspark.RDD.foreachPartition" title="pyspark.RDD.foreachPartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.foreachPartition</span></code></a>(f)</p></td>
<td><p>Applies a function to each partition of this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.fullOuterJoin.html#pyspark.RDD.fullOuterJoin" title="pyspark.RDD.fullOuterJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.fullOuterJoin</span></code></a>(other[, numPartitions])</p></td>
<td><p>Perform a right outer join of <cite>self</cite> and <cite>other</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.getCheckpointFile.html#pyspark.RDD.getCheckpointFile" title="pyspark.RDD.getCheckpointFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.getCheckpointFile</span></code></a>()</p></td>
<td><p>Gets the name of the file to which this RDD was checkpointed</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.getNumPartitions.html#pyspark.RDD.getNumPartitions" title="pyspark.RDD.getNumPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.getNumPartitions</span></code></a>()</p></td>
<td><p>Returns the number of partitions in RDD</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.getResourceProfile.html#pyspark.RDD.getResourceProfile" title="pyspark.RDD.getResourceProfile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.getResourceProfile</span></code></a>()</p></td>
<td><p>Get the <a class="reference internal" href="api/pyspark.resource.ResourceProfile.html#pyspark.resource.ResourceProfile" title="pyspark.resource.ResourceProfile"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.resource.ResourceProfile</span></code></a> specified with this RDD or None if it wasn’t specified.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.getStorageLevel.html#pyspark.RDD.getStorageLevel" title="pyspark.RDD.getStorageLevel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.getStorageLevel</span></code></a>()</p></td>
<td><p>Get the RDD’s current storage level.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.glom.html#pyspark.RDD.glom" title="pyspark.RDD.glom"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.glom</span></code></a>()</p></td>
<td><p>Return an RDD created by coalescing all elements within each partition into a list.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.groupBy.html#pyspark.RDD.groupBy" title="pyspark.RDD.groupBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.groupBy</span></code></a>(f[, numPartitions, partitionFunc])</p></td>
<td><p>Return an RDD of grouped items.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.groupByKey.html#pyspark.RDD.groupByKey" title="pyspark.RDD.groupByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.groupByKey</span></code></a>([numPartitions, partitionFunc])</p></td>
<td><p>Group the values for each key in the RDD into a single sequence.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.groupWith.html#pyspark.RDD.groupWith" title="pyspark.RDD.groupWith"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.groupWith</span></code></a>(other, *others)</p></td>
<td><p>Alias for cogroup but with support for multiple RDDs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.histogram.html#pyspark.RDD.histogram" title="pyspark.RDD.histogram"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.histogram</span></code></a>(buckets)</p></td>
<td><p>Compute a histogram using the provided buckets.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.id.html#pyspark.RDD.id" title="pyspark.RDD.id"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.id</span></code></a>()</p></td>
<td><p>A unique ID for this RDD (within its SparkContext).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.intersection.html#pyspark.RDD.intersection" title="pyspark.RDD.intersection"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.intersection</span></code></a>(other)</p></td>
<td><p>Return the intersection of this RDD and another one.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.isCheckpointed.html#pyspark.RDD.isCheckpointed" title="pyspark.RDD.isCheckpointed"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.isCheckpointed</span></code></a>()</p></td>
<td><p>Return whether this RDD is checkpointed and materialized, either reliably or locally.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.isEmpty.html#pyspark.RDD.isEmpty" title="pyspark.RDD.isEmpty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.isEmpty</span></code></a>()</p></td>
<td><p>Returns true if and only if the RDD contains no elements at all.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.isLocallyCheckpointed.html#pyspark.RDD.isLocallyCheckpointed" title="pyspark.RDD.isLocallyCheckpointed"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.isLocallyCheckpointed</span></code></a>()</p></td>
<td><p>Return whether this RDD is marked for local checkpointing.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.join.html#pyspark.RDD.join" title="pyspark.RDD.join"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.join</span></code></a>(other[, numPartitions])</p></td>
<td><p>Return an RDD containing all pairs of elements with matching keys in <cite>self</cite> and <cite>other</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.keyBy.html#pyspark.RDD.keyBy" title="pyspark.RDD.keyBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.keyBy</span></code></a>(f)</p></td>
<td><p>Creates tuples of the elements in this RDD by applying <cite>f</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.keys.html#pyspark.RDD.keys" title="pyspark.RDD.keys"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.keys</span></code></a>()</p></td>
<td><p>Return an RDD with the keys of each tuple.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.leftOuterJoin.html#pyspark.RDD.leftOuterJoin" title="pyspark.RDD.leftOuterJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.leftOuterJoin</span></code></a>(other[, numPartitions])</p></td>
<td><p>Perform a left outer join of <cite>self</cite> and <cite>other</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.localCheckpoint.html#pyspark.RDD.localCheckpoint" title="pyspark.RDD.localCheckpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.localCheckpoint</span></code></a>()</p></td>
<td><p>Mark this RDD for local checkpointing using Spark’s existing caching layer.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.lookup.html#pyspark.RDD.lookup" title="pyspark.RDD.lookup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.lookup</span></code></a>(key)</p></td>
<td><p>Return the list of values in the RDD for key <cite>key</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.map.html#pyspark.RDD.map" title="pyspark.RDD.map"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.map</span></code></a>(f[, preservesPartitioning])</p></td>
<td><p>Return a new RDD by applying a function to each element of this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.mapPartitions.html#pyspark.RDD.mapPartitions" title="pyspark.RDD.mapPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.mapPartitions</span></code></a>(f[, preservesPartitioning])</p></td>
<td><p>Return a new RDD by applying a function to each partition of this RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.mapPartitionsWithIndex.html#pyspark.RDD.mapPartitionsWithIndex" title="pyspark.RDD.mapPartitionsWithIndex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.mapPartitionsWithIndex</span></code></a>(f[, …])</p></td>
<td><p>Return a new RDD by applying a function to each partition of this RDD, while tracking the index of the original partition.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.mapPartitionsWithSplit.html#pyspark.RDD.mapPartitionsWithSplit" title="pyspark.RDD.mapPartitionsWithSplit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.mapPartitionsWithSplit</span></code></a>(f[, …])</p></td>
<td><p>Return a new RDD by applying a function to each partition of this RDD, while tracking the index of the original partition.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.mapValues.html#pyspark.RDD.mapValues" title="pyspark.RDD.mapValues"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.mapValues</span></code></a>(f)</p></td>
<td><p>Pass each value in the key-value pair RDD through a map function without changing the keys; this also retains the original RDD’s partitioning.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.max.html#pyspark.RDD.max" title="pyspark.RDD.max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.max</span></code></a>([key])</p></td>
<td><p>Find the maximum item in this RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.mean.html#pyspark.RDD.mean" title="pyspark.RDD.mean"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.mean</span></code></a>()</p></td>
<td><p>Compute the mean of this RDD’s elements.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.meanApprox.html#pyspark.RDD.meanApprox" title="pyspark.RDD.meanApprox"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.meanApprox</span></code></a>(timeout[, confidence])</p></td>
<td><p>Approximate operation to return the mean within a timeout or meet the confidence.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.min.html#pyspark.RDD.min" title="pyspark.RDD.min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.min</span></code></a>([key])</p></td>
<td><p>Find the minimum item in this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.name.html#pyspark.RDD.name" title="pyspark.RDD.name"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.name</span></code></a>()</p></td>
<td><p>Return the name of this RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.partitionBy.html#pyspark.RDD.partitionBy" title="pyspark.RDD.partitionBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.partitionBy</span></code></a>(numPartitions[, partitionFunc])</p></td>
<td><p>Return a copy of the RDD partitioned using the specified partitioner.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.persist.html#pyspark.RDD.persist" title="pyspark.RDD.persist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.persist</span></code></a>([storageLevel])</p></td>
<td><p>Set this RDD’s storage level to persist its values across operations after the first time it is computed.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.pipe.html#pyspark.RDD.pipe" title="pyspark.RDD.pipe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.pipe</span></code></a>(command[, env, checkCode])</p></td>
<td><p>Return an RDD created by piping elements to a forked external process.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.randomSplit.html#pyspark.RDD.randomSplit" title="pyspark.RDD.randomSplit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.randomSplit</span></code></a>(weights[, seed])</p></td>
<td><p>Randomly splits this RDD with the provided weights.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.reduce.html#pyspark.RDD.reduce" title="pyspark.RDD.reduce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.reduce</span></code></a>(f)</p></td>
<td><p>Reduces the elements of this RDD using the specified commutative and associative binary operator.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.reduceByKey.html#pyspark.RDD.reduceByKey" title="pyspark.RDD.reduceByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.reduceByKey</span></code></a>(func[, numPartitions, …])</p></td>
<td><p>Merge the values for each key using an associative and commutative reduce function.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.reduceByKeyLocally.html#pyspark.RDD.reduceByKeyLocally" title="pyspark.RDD.reduceByKeyLocally"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.reduceByKeyLocally</span></code></a>(func)</p></td>
<td><p>Merge the values for each key using an associative and commutative reduce function, but return the results immediately to the master as a dictionary.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.repartition.html#pyspark.RDD.repartition" title="pyspark.RDD.repartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.repartition</span></code></a>(numPartitions)</p></td>
<td><p>Return a new RDD that has exactly numPartitions partitions.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.repartitionAndSortWithinPartitions.html#pyspark.RDD.repartitionAndSortWithinPartitions" title="pyspark.RDD.repartitionAndSortWithinPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.repartitionAndSortWithinPartitions</span></code></a>([…])</p></td>
<td><p>Repartition the RDD according to the given partitioner and, within each resulting partition, sort records by their keys.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.rightOuterJoin.html#pyspark.RDD.rightOuterJoin" title="pyspark.RDD.rightOuterJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.rightOuterJoin</span></code></a>(other[, numPartitions])</p></td>
<td><p>Perform a right outer join of <cite>self</cite> and <cite>other</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.sample.html#pyspark.RDD.sample" title="pyspark.RDD.sample"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sample</span></code></a>(withReplacement, fraction[, seed])</p></td>
<td><p>Return a sampled subset of this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.sampleByKey.html#pyspark.RDD.sampleByKey" title="pyspark.RDD.sampleByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sampleByKey</span></code></a>(withReplacement, fractions)</p></td>
<td><p>Return a subset of this RDD sampled by key (via stratified sampling).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.sampleStdev.html#pyspark.RDD.sampleStdev" title="pyspark.RDD.sampleStdev"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sampleStdev</span></code></a>()</p></td>
<td><p>Compute the sample standard deviation of this RDD’s elements (which corrects for bias in estimating the standard deviation by dividing by N-1 instead of N).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.sampleVariance.html#pyspark.RDD.sampleVariance" title="pyspark.RDD.sampleVariance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sampleVariance</span></code></a>()</p></td>
<td><p>Compute the sample variance of this RDD’s elements (which corrects for bias in estimating the variance by dividing by N-1 instead of N).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.saveAsHadoopDataset.html#pyspark.RDD.saveAsHadoopDataset" title="pyspark.RDD.saveAsHadoopDataset"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.saveAsHadoopDataset</span></code></a>(conf[, …])</p></td>
<td><p>Output a Python RDD of key-value pairs (of form <code class="docutils literal notranslate"><span class="pre">RDD[(K,</span> <span class="pre">V)]</span></code>) to any Hadoop file system, using the old Hadoop OutputFormat API (mapred package).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.saveAsHadoopFile.html#pyspark.RDD.saveAsHadoopFile" title="pyspark.RDD.saveAsHadoopFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.saveAsHadoopFile</span></code></a>(path, outputFormatClass)</p></td>
<td><p>Output a Python RDD of key-value pairs (of form <code class="docutils literal notranslate"><span class="pre">RDD[(K,</span> <span class="pre">V)]</span></code>) to any Hadoop file system, using the old Hadoop OutputFormat API (mapred package).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.saveAsNewAPIHadoopDataset.html#pyspark.RDD.saveAsNewAPIHadoopDataset" title="pyspark.RDD.saveAsNewAPIHadoopDataset"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.saveAsNewAPIHadoopDataset</span></code></a>(conf[, …])</p></td>
<td><p>Output a Python RDD of key-value pairs (of form <code class="docutils literal notranslate"><span class="pre">RDD[(K,</span> <span class="pre">V)]</span></code>) to any Hadoop file system, using the new Hadoop OutputFormat API (mapreduce package).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.saveAsNewAPIHadoopFile.html#pyspark.RDD.saveAsNewAPIHadoopFile" title="pyspark.RDD.saveAsNewAPIHadoopFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.saveAsNewAPIHadoopFile</span></code></a>(path, …[, …])</p></td>
<td><p>Output a Python RDD of key-value pairs (of form <code class="docutils literal notranslate"><span class="pre">RDD[(K,</span> <span class="pre">V)]</span></code>) to any Hadoop file system, using the new Hadoop OutputFormat API (mapreduce package).</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.saveAsPickleFile.html#pyspark.RDD.saveAsPickleFile" title="pyspark.RDD.saveAsPickleFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.saveAsPickleFile</span></code></a>(path[, batchSize])</p></td>
<td><p>Save this RDD as a SequenceFile of serialized objects.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.saveAsSequenceFile.html#pyspark.RDD.saveAsSequenceFile" title="pyspark.RDD.saveAsSequenceFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.saveAsSequenceFile</span></code></a>(path[, …])</p></td>
<td><p>Output a Python RDD of key-value pairs (of form <code class="docutils literal notranslate"><span class="pre">RDD[(K,</span> <span class="pre">V)]</span></code>) to any Hadoop file system, using the “org.apache.hadoop.io.Writable” types that we convert from the RDD’s key and value types.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.saveAsTextFile.html#pyspark.RDD.saveAsTextFile" title="pyspark.RDD.saveAsTextFile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.saveAsTextFile</span></code></a>(path[, compressionCodecClass])</p></td>
<td><p>Save this RDD as a text file, using string representations of elements.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.setName.html#pyspark.RDD.setName" title="pyspark.RDD.setName"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.setName</span></code></a>(name)</p></td>
<td><p>Assign a name to this RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.sortBy.html#pyspark.RDD.sortBy" title="pyspark.RDD.sortBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sortBy</span></code></a>(keyfunc[, ascending, numPartitions])</p></td>
<td><p>Sorts this RDD by the given keyfunc</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.sortByKey.html#pyspark.RDD.sortByKey" title="pyspark.RDD.sortByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sortByKey</span></code></a>([ascending, numPartitions, …])</p></td>
<td><p>Sorts this RDD, which is assumed to consist of (key, value) pairs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.stats.html#pyspark.RDD.stats" title="pyspark.RDD.stats"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.stats</span></code></a>()</p></td>
<td><p>Return a <code class="xref py py-class docutils literal notranslate"><span class="pre">StatCounter</span></code> object that captures the mean, variance and count of the RDD’s elements in one operation.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.stdev.html#pyspark.RDD.stdev" title="pyspark.RDD.stdev"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.stdev</span></code></a>()</p></td>
<td><p>Compute the standard deviation of this RDD’s elements.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.subtract.html#pyspark.RDD.subtract" title="pyspark.RDD.subtract"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.subtract</span></code></a>(other[, numPartitions])</p></td>
<td><p>Return each value in <cite>self</cite> that is not contained in <cite>other</cite>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.subtractByKey.html#pyspark.RDD.subtractByKey" title="pyspark.RDD.subtractByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.subtractByKey</span></code></a>(other[, numPartitions])</p></td>
<td><p>Return each (key, value) pair in <cite>self</cite> that has no pair with matching key in <cite>other</cite>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.sum.html#pyspark.RDD.sum" title="pyspark.RDD.sum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sum</span></code></a>()</p></td>
<td><p>Add up the elements in this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.sumApprox.html#pyspark.RDD.sumApprox" title="pyspark.RDD.sumApprox"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.sumApprox</span></code></a>(timeout[, confidence])</p></td>
<td><p>Approximate operation to return the sum within a timeout or meet the confidence.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.take.html#pyspark.RDD.take" title="pyspark.RDD.take"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.take</span></code></a>(num)</p></td>
<td><p>Take the first num elements of the RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.takeOrdered.html#pyspark.RDD.takeOrdered" title="pyspark.RDD.takeOrdered"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.takeOrdered</span></code></a>(num[, key])</p></td>
<td><p>Get the N elements from an RDD ordered in ascending order or as specified by the optional key function.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.takeSample.html#pyspark.RDD.takeSample" title="pyspark.RDD.takeSample"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.takeSample</span></code></a>(withReplacement, num[, seed])</p></td>
<td><p>Return a fixed-size sampled subset of this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.toDebugString.html#pyspark.RDD.toDebugString" title="pyspark.RDD.toDebugString"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.toDebugString</span></code></a>()</p></td>
<td><p>A description of this RDD and its recursive dependencies for debugging.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.toLocalIterator.html#pyspark.RDD.toLocalIterator" title="pyspark.RDD.toLocalIterator"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.toLocalIterator</span></code></a>([prefetchPartitions])</p></td>
<td><p>Return an iterator that contains all of the elements in this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.top.html#pyspark.RDD.top" title="pyspark.RDD.top"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.top</span></code></a>(num[, key])</p></td>
<td><p>Get the top N elements from an RDD.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.treeAggregate.html#pyspark.RDD.treeAggregate" title="pyspark.RDD.treeAggregate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.treeAggregate</span></code></a>(zeroValue, seqOp, combOp)</p></td>
<td><p>Aggregates the elements of this RDD in a multi-level tree pattern.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.treeReduce.html#pyspark.RDD.treeReduce" title="pyspark.RDD.treeReduce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.treeReduce</span></code></a>(f[, depth])</p></td>
<td><p>Reduces the elements of this RDD in a multi-level tree pattern.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.union.html#pyspark.RDD.union" title="pyspark.RDD.union"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.union</span></code></a>(other)</p></td>
<td><p>Return the union of this RDD and another one.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.unpersist.html#pyspark.RDD.unpersist" title="pyspark.RDD.unpersist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.unpersist</span></code></a>([blocking])</p></td>
<td><p>Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.values.html#pyspark.RDD.values" title="pyspark.RDD.values"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.values</span></code></a>()</p></td>
<td><p>Return an RDD with the values of each tuple.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.variance.html#pyspark.RDD.variance" title="pyspark.RDD.variance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.variance</span></code></a>()</p></td>
<td><p>Compute the variance of this RDD’s elements.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.withResources.html#pyspark.RDD.withResources" title="pyspark.RDD.withResources"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.withResources</span></code></a>(profile)</p></td>
<td><p>Specify a <a class="reference internal" href="api/pyspark.resource.ResourceProfile.html#pyspark.resource.ResourceProfile" title="pyspark.resource.ResourceProfile"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.resource.ResourceProfile</span></code></a> to use when calculating this RDD.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.zip.html#pyspark.RDD.zip" title="pyspark.RDD.zip"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.zip</span></code></a>(other)</p></td>
<td><p>Zips this RDD with another one, returning key-value pairs with the first element in each RDD second element in each RDD, etc.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDD.zipWithIndex.html#pyspark.RDD.zipWithIndex" title="pyspark.RDD.zipWithIndex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.zipWithIndex</span></code></a>()</p></td>
<td><p>Zips this RDD with its element indices.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDD.zipWithUniqueId.html#pyspark.RDD.zipWithUniqueId" title="pyspark.RDD.zipWithUniqueId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDD.zipWithUniqueId</span></code></a>()</p></td>
<td><p>Zips this RDD with generated unique Long ids.</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="broadcast-and-accumulator">
<h2>Broadcast and Accumulator<a class="headerlink" href="#broadcast-and-accumulator" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.Broadcast.destroy.html#pyspark.Broadcast.destroy" title="pyspark.Broadcast.destroy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Broadcast.destroy</span></code></a>([blocking])</p></td>
<td><p>Destroy all data and metadata related to this broadcast variable.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.Broadcast.dump.html#pyspark.Broadcast.dump" title="pyspark.Broadcast.dump"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Broadcast.dump</span></code></a>(value, f)</p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.Broadcast.load.html#pyspark.Broadcast.load" title="pyspark.Broadcast.load"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Broadcast.load</span></code></a>(file)</p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.Broadcast.load_from_path.html#pyspark.Broadcast.load_from_path" title="pyspark.Broadcast.load_from_path"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Broadcast.load_from_path</span></code></a>(path)</p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.Broadcast.unpersist.html#pyspark.Broadcast.unpersist" title="pyspark.Broadcast.unpersist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Broadcast.unpersist</span></code></a>([blocking])</p></td>
<td><p>Delete cached copies of this broadcast on the executors.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.Broadcast.value.html#pyspark.Broadcast.value" title="pyspark.Broadcast.value"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Broadcast.value</span></code></a></p></td>
<td><p>Return the broadcasted value</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.Accumulator.add.html#pyspark.Accumulator.add" title="pyspark.Accumulator.add"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Accumulator.add</span></code></a>(term)</p></td>
<td><p>Adds a term to this accumulator’s value</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.Accumulator.value.html#pyspark.Accumulator.value" title="pyspark.Accumulator.value"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Accumulator.value</span></code></a></p></td>
<td><p>Get the accumulator’s value; only usable in driver program</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.AccumulatorParam.addInPlace.html#pyspark.AccumulatorParam.addInPlace" title="pyspark.AccumulatorParam.addInPlace"><code class="xref py py-obj docutils literal notranslate"><span class="pre">AccumulatorParam.addInPlace</span></code></a>(value1, value2)</p></td>
<td><p>Add two values of the accumulator’s data type, returning a new value; for efficiency, can also update <cite>value1</cite> in place and return it.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.AccumulatorParam.zero.html#pyspark.AccumulatorParam.zero" title="pyspark.AccumulatorParam.zero"><code class="xref py py-obj docutils literal notranslate"><span class="pre">AccumulatorParam.zero</span></code></a>(value)</p></td>
<td><p>Provide a “zero value” for the type, compatible in dimensions with the provided <cite>value</cite> (e.g., a zero vector)</p></td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="management">
<h2>Management<a class="headerlink" href="#management" title="Permalink to this headline"></a></h2>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.inheritable_thread_target.html#pyspark.inheritable_thread_target" title="pyspark.inheritable_thread_target"><code class="xref py py-obj docutils literal notranslate"><span class="pre">inheritable_thread_target</span></code></a>(f)</p></td>
<td><p>Return thread target wrapper which is recommended to be used in PySpark when the pinned thread mode is enabled.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkConf.contains.html#pyspark.SparkConf.contains" title="pyspark.SparkConf.contains"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.contains</span></code></a>(key)</p></td>
<td><p>Does this configuration contain a given key?</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkConf.get.html#pyspark.SparkConf.get" title="pyspark.SparkConf.get"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.get</span></code></a>(key[, defaultValue])</p></td>
<td><p>Get the configured value for some key, or return a default otherwise.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkConf.getAll.html#pyspark.SparkConf.getAll" title="pyspark.SparkConf.getAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.getAll</span></code></a>()</p></td>
<td><p>Get all values as a list of key-value pairs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkConf.set.html#pyspark.SparkConf.set" title="pyspark.SparkConf.set"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.set</span></code></a>(key, value)</p></td>
<td><p>Set a configuration property.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkConf.setAll.html#pyspark.SparkConf.setAll" title="pyspark.SparkConf.setAll"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.setAll</span></code></a>(pairs)</p></td>
<td><p>Set multiple parameters, passed as a list of key-value pairs.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkConf.setAppName.html#pyspark.SparkConf.setAppName" title="pyspark.SparkConf.setAppName"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.setAppName</span></code></a>(value)</p></td>
<td><p>Set application name.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkConf.setExecutorEnv.html#pyspark.SparkConf.setExecutorEnv" title="pyspark.SparkConf.setExecutorEnv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.setExecutorEnv</span></code></a>([key, value, pairs])</p></td>
<td><p>Set an environment variable to be passed to executors.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkConf.setIfMissing.html#pyspark.SparkConf.setIfMissing" title="pyspark.SparkConf.setIfMissing"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.setIfMissing</span></code></a>(key, value)</p></td>
<td><p>Set a configuration property, if not already set.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkConf.setMaster.html#pyspark.SparkConf.setMaster" title="pyspark.SparkConf.setMaster"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.setMaster</span></code></a>(value)</p></td>
<td><p>Set master URL to connect to.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkConf.setSparkHome.html#pyspark.SparkConf.setSparkHome" title="pyspark.SparkConf.setSparkHome"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.setSparkHome</span></code></a>(value)</p></td>
<td><p>Set path where Spark is installed on worker nodes.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkConf.toDebugString.html#pyspark.SparkConf.toDebugString" title="pyspark.SparkConf.toDebugString"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkConf.toDebugString</span></code></a>()</p></td>
<td><p>Returns a printable version of the configuration, as a list of key=value pairs, one per line.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.SparkFiles.get.html#pyspark.SparkFiles.get" title="pyspark.SparkFiles.get"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkFiles.get</span></code></a>(filename)</p></td>
<td><p>Get the absolute path of a file added through <a class="reference internal" href="api/pyspark.SparkContext.addFile.html#pyspark.SparkContext.addFile" title="pyspark.SparkContext.addFile"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SparkContext.addFile()</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.SparkFiles.getRootDirectory.html#pyspark.SparkFiles.getRootDirectory" title="pyspark.SparkFiles.getRootDirectory"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparkFiles.getRootDirectory</span></code></a>()</p></td>
<td><p>Get the root directory that contains files added through <a class="reference internal" href="api/pyspark.SparkContext.addFile.html#pyspark.SparkContext.addFile" title="pyspark.SparkContext.addFile"><code class="xref py py-meth docutils literal notranslate"><span class="pre">SparkContext.addFile()</span></code></a>.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.DISK_ONLY.html#pyspark.StorageLevel.DISK_ONLY" title="pyspark.StorageLevel.DISK_ONLY"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.DISK_ONLY</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.DISK_ONLY_2.html#pyspark.StorageLevel.DISK_ONLY_2" title="pyspark.StorageLevel.DISK_ONLY_2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.DISK_ONLY_2</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.DISK_ONLY_3.html#pyspark.StorageLevel.DISK_ONLY_3" title="pyspark.StorageLevel.DISK_ONLY_3"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.DISK_ONLY_3</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.MEMORY_AND_DISK.html#pyspark.StorageLevel.MEMORY_AND_DISK" title="pyspark.StorageLevel.MEMORY_AND_DISK"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.MEMORY_AND_DISK</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.MEMORY_AND_DISK_2.html#pyspark.StorageLevel.MEMORY_AND_DISK_2" title="pyspark.StorageLevel.MEMORY_AND_DISK_2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.MEMORY_AND_DISK_2</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.MEMORY_ONLY.html#pyspark.StorageLevel.MEMORY_ONLY" title="pyspark.StorageLevel.MEMORY_ONLY"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.MEMORY_ONLY</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.MEMORY_ONLY_2.html#pyspark.StorageLevel.MEMORY_ONLY_2" title="pyspark.StorageLevel.MEMORY_ONLY_2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.MEMORY_ONLY_2</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.StorageLevel.OFF_HEAP.html#pyspark.StorageLevel.OFF_HEAP" title="pyspark.StorageLevel.OFF_HEAP"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageLevel.OFF_HEAP</span></code></a></p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.TaskContext.attemptNumber.html#pyspark.TaskContext.attemptNumber" title="pyspark.TaskContext.attemptNumber"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext.attemptNumber</span></code></a>()</p></td>
<td><p><p></p>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.TaskContext.get.html#pyspark.TaskContext.get" title="pyspark.TaskContext.get"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext.get</span></code></a>()</p></td>
<td><p>Return the currently active TaskContext.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.TaskContext.getLocalProperty.html#pyspark.TaskContext.getLocalProperty" title="pyspark.TaskContext.getLocalProperty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext.getLocalProperty</span></code></a>(key)</p></td>
<td><p>Get a local property set upstream in the driver, or None if it is missing.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.TaskContext.partitionId.html#pyspark.TaskContext.partitionId" title="pyspark.TaskContext.partitionId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext.partitionId</span></code></a>()</p></td>
<td><p>The ID of the RDD partition that is computed by this task.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.TaskContext.resources.html#pyspark.TaskContext.resources" title="pyspark.TaskContext.resources"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext.resources</span></code></a>()</p></td>
<td><p>Resources allocated to the task.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.TaskContext.stageId.html#pyspark.TaskContext.stageId" title="pyspark.TaskContext.stageId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext.stageId</span></code></a>()</p></td>
<td><p>The ID of the stage that this task belong to.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.TaskContext.taskAttemptId.html#pyspark.TaskContext.taskAttemptId" title="pyspark.TaskContext.taskAttemptId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TaskContext.taskAttemptId</span></code></a>()</p></td>
<td><p>An ID that is unique to this task attempt (within the same SparkContext, no two task attempts will share the same attempt ID).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.RDDBarrier.mapPartitions.html#pyspark.RDDBarrier.mapPartitions" title="pyspark.RDDBarrier.mapPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDDBarrier.mapPartitions</span></code></a>(f[, …])</p></td>
<td><p>Returns a new RDD by applying a function to each partition of the wrapped RDD, where tasks are launched together in a barrier stage.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.RDDBarrier.mapPartitionsWithIndex.html#pyspark.RDDBarrier.mapPartitionsWithIndex" title="pyspark.RDDBarrier.mapPartitionsWithIndex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RDDBarrier.mapPartitionsWithIndex</span></code></a>(f[, …])</p></td>
<td><p>Returns a new RDD by applying a function to each partition of the wrapped RDD, while tracking the index of the original partition.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.allGather.html#pyspark.BarrierTaskContext.allGather" title="pyspark.BarrierTaskContext.allGather"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.allGather</span></code></a>([message])</p></td>
<td><p>This function blocks until all tasks in the same stage have reached this routine.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.attemptNumber.html#pyspark.BarrierTaskContext.attemptNumber" title="pyspark.BarrierTaskContext.attemptNumber"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.attemptNumber</span></code></a>()</p></td>
<td><p><p></p>
</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.barrier.html#pyspark.BarrierTaskContext.barrier" title="pyspark.BarrierTaskContext.barrier"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.barrier</span></code></a>()</p></td>
<td><p>Sets a global barrier and waits until all tasks in this stage hit this barrier.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.get.html#pyspark.BarrierTaskContext.get" title="pyspark.BarrierTaskContext.get"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.get</span></code></a>()</p></td>
<td><p>Return the currently active <a class="reference internal" href="api/pyspark.BarrierTaskContext.html#pyspark.BarrierTaskContext" title="pyspark.BarrierTaskContext"><code class="xref py py-class docutils literal notranslate"><span class="pre">BarrierTaskContext</span></code></a>.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.getLocalProperty.html#pyspark.BarrierTaskContext.getLocalProperty" title="pyspark.BarrierTaskContext.getLocalProperty"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.getLocalProperty</span></code></a>(key)</p></td>
<td><p>Get a local property set upstream in the driver, or None if it is missing.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.getTaskInfos.html#pyspark.BarrierTaskContext.getTaskInfos" title="pyspark.BarrierTaskContext.getTaskInfos"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.getTaskInfos</span></code></a>()</p></td>
<td><p>Returns <a class="reference internal" href="api/pyspark.BarrierTaskInfo.html#pyspark.BarrierTaskInfo" title="pyspark.BarrierTaskInfo"><code class="xref py py-class docutils literal notranslate"><span class="pre">BarrierTaskInfo</span></code></a> for all tasks in this barrier stage, ordered by partition ID.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.partitionId.html#pyspark.BarrierTaskContext.partitionId" title="pyspark.BarrierTaskContext.partitionId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.partitionId</span></code></a>()</p></td>
<td><p>The ID of the RDD partition that is computed by this task.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.resources.html#pyspark.BarrierTaskContext.resources" title="pyspark.BarrierTaskContext.resources"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.resources</span></code></a>()</p></td>
<td><p>Resources allocated to the task.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.stageId.html#pyspark.BarrierTaskContext.stageId" title="pyspark.BarrierTaskContext.stageId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.stageId</span></code></a>()</p></td>
<td><p>The ID of the stage that this task belong to.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.BarrierTaskContext.taskAttemptId.html#pyspark.BarrierTaskContext.taskAttemptId" title="pyspark.BarrierTaskContext.taskAttemptId"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BarrierTaskContext.taskAttemptId</span></code></a>()</p></td>
<td><p>An ID that is unique to this task attempt (within the same SparkContext, no two task attempts will share the same attempt ID).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.util.VersionUtils.majorMinorVersion.html#pyspark.util.VersionUtils.majorMinorVersion" title="pyspark.util.VersionUtils.majorMinorVersion"><code class="xref py py-obj docutils literal notranslate"><span class="pre">util.VersionUtils.majorMinorVersion</span></code></a>(sparkVersion)</p></td>
<td><p>Given a Spark version string, return the (major version number, minor version number).</p></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="api/pyspark.mllib.util.Saveable.html" title="previous page">Saveable</a>
<a class='right-next' id="next-link" href="api/pyspark.SparkContext.html" title="next page">pyspark.SparkContext</a>
</div>
</main>
</div>
</div>
<script src="../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>