blob: 81273811b7b51594ce50b2fc95ed41bac97a1a20 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_312) on Mon Jul 11 16:23:57 UTC 2022 -->
<title>SparkContext (Spark 3.2.2 JavaDoc)</title>
<meta name="date" content="2022-07-11">
<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="SparkContext (Spark 3.2.2 JavaDoc)";
}
}
catch(err) {
}
//-->
var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":10,"i24":10,"i25":10,"i26":10,"i27":10,"i28":10,"i29":10,"i30":10,"i31":10,"i32":10,"i33":10,"i34":9,"i35":9,"i36":10,"i37":10,"i38":10,"i39":10,"i40":10,"i41":10,"i42":10,"i43":10,"i44":10,"i45":10,"i46":10,"i47":9,"i48":9,"i49":10,"i50":10,"i51":10,"i52":10,"i53":10,"i54":10,"i55":10,"i56":10,"i57":10,"i58":10,"i59":10,"i60":10,"i61":10,"i62":10,"i63":10,"i64":10,"i65":9,"i66":9,"i67":10,"i68":10,"i69":10,"i70":10,"i71":10,"i72":10,"i73":10,"i74":10,"i75":10,"i76":10,"i77":10,"i78":10,"i79":10,"i80":10,"i81":10,"i82":10,"i83":10,"i84":10,"i85":10,"i86":10,"i87":10,"i88":10,"i89":10,"i90":10,"i91":10,"i92":10,"i93":10,"i94":10,"i95":10,"i96":10,"i97":10,"i98":10,"i99":10,"i100":10,"i101":10,"i102":10};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../index-all.html">Index</a></li>
<li><a href="../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../org/apache/spark/SparkEnv.html" title="class in org.apache.spark"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../index.html?org/apache/spark/SparkContext.html" target="_top">Frames</a></li>
<li><a href="SparkContext.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.spark</div>
<h2 title="Class SparkContext" class="title">Class SparkContext</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>Object</li>
<li>
<ul class="inheritance">
<li>org.apache.spark.SparkContext</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd>org.apache.spark.internal.Logging</dd>
</dl>
<hr>
<br>
<pre>public class <span class="typeNameLabel">SparkContext</span>
extends Object
implements org.apache.spark.internal.Logging</pre>
<div class="block">Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
<p></div>
<dl>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Only one <code>SparkContext</code> should be active per JVM. You must <code>stop()</code> the
active <code>SparkContext</code> before creating a new one.
param: config a Spark Config object describing the application configuration. Any settings in
this config overrides the default configs as well as system properties.</dd>
</dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext--">SparkContext</a></span>()</code>
<div class="block">Create a SparkContext that loads settings from system properties (for instance, when
launching with ./bin/spark-submit).</div>
</td>
</tr>
<tr class="rowColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext-org.apache.spark.SparkConf-">SparkContext</a></span>(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a>&nbsp;config)</code>&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext-java.lang.String-java.lang.String-org.apache.spark.SparkConf-">SparkContext</a></span>(String&nbsp;master,
String&nbsp;appName,
<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a>&nbsp;conf)</code>
<div class="block">Alternative constructor that allows setting common Spark properties directly</div>
</td>
</tr>
<tr class="rowColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext-java.lang.String-java.lang.String-java.lang.String-scala.collection.Seq-scala.collection.Map-">SparkContext</a></span>(String&nbsp;master,
String&nbsp;appName,
String&nbsp;sparkHome,
scala.collection.Seq&lt;String&gt;&nbsp;jars,
scala.collection.Map&lt;String,String&gt;&nbsp;environment)</code>
<div class="block">Alternative constructor that allows setting common Spark properties directly</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addArchive-java.lang.String-">addArchive</a></span>(String&nbsp;path)</code>
<div class="block">:: Experimental ::
Add an archive to be downloaded and unpacked with this Spark job on every node.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addFile-java.lang.String-">addFile</a></span>(String&nbsp;path)</code>
<div class="block">Add a file to be downloaded with this Spark job on every node.</div>
</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addFile-java.lang.String-boolean-">addFile</a></span>(String&nbsp;path,
boolean&nbsp;recursive)</code>
<div class="block">Add a file to be downloaded with this Spark job on every node.</div>
</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addJar-java.lang.String-">addJar</a></span>(String&nbsp;path)</code>
<div class="block">Adds a JAR dependency for all tasks to be executed on this <code>SparkContext</code> in the future.</div>
</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addSparkListener-org.apache.spark.scheduler.SparkListenerInterface-">addSparkListener</a></span>(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a>&nbsp;listener)</code>
<div class="block">:: DeveloperApi ::
Register a listener to receive up-calls from events that happen during execution.</div>
</td>
</tr>
<tr id="i5" class="rowColor">
<td class="colFirst"><code>scala.Option&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#applicationAttemptId--">applicationAttemptId</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i6" class="altColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#applicationId--">applicationId</a></span>()</code>
<div class="block">A unique identifier for the Spark application.</div>
</td>
</tr>
<tr id="i7" class="rowColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#appName--">appName</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i8" class="altColor">
<td class="colFirst"><code>scala.collection.Seq&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#archives--">archives</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i9" class="rowColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;String,<a href="../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#binaryFiles-java.lang.String-int-">binaryFiles</a></span>(String&nbsp;path,
int&nbsp;minPartitions)</code>
<div class="block">Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file
(useful for binary data)</div>
</td>
</tr>
<tr id="i10" class="altColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;byte[]&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#binaryRecords-java.lang.String-int-org.apache.hadoop.conf.Configuration-">binaryRecords</a></span>(String&nbsp;path,
int&nbsp;recordLength,
org.apache.hadoop.conf.Configuration&nbsp;conf)</code>
<div class="block">Load data from a flat binary file, assuming the length of each record is constant.</div>
</td>
</tr>
<tr id="i11" class="rowColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast">Broadcast</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#broadcast-T-scala.reflect.ClassTag-">broadcast</a></span>(T&nbsp;value,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$9)</code>
<div class="block">Broadcast a read-only variable to the cluster, returning a
<a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast"><code>Broadcast</code></a> object for reading it in distributed functions.</div>
</td>
</tr>
<tr id="i12" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelAllJobs--">cancelAllJobs</a></span>()</code>
<div class="block">Cancel all jobs that have been scheduled or are running.</div>
</td>
</tr>
<tr id="i13" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelJob-int-">cancelJob</a></span>(int&nbsp;jobId)</code>
<div class="block">Cancel a given job if it's scheduled or running.</div>
</td>
</tr>
<tr id="i14" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelJob-int-java.lang.String-">cancelJob</a></span>(int&nbsp;jobId,
String&nbsp;reason)</code>
<div class="block">Cancel a given job if it's scheduled or running.</div>
</td>
</tr>
<tr id="i15" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelJobGroup-java.lang.String-">cancelJobGroup</a></span>(String&nbsp;groupId)</code>
<div class="block">Cancel active jobs for the specified group.</div>
</td>
</tr>
<tr id="i16" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelStage-int-">cancelStage</a></span>(int&nbsp;stageId)</code>
<div class="block">Cancel a given stage and all jobs associated with it.</div>
</td>
</tr>
<tr id="i17" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelStage-int-java.lang.String-">cancelStage</a></span>(int&nbsp;stageId,
String&nbsp;reason)</code>
<div class="block">Cancel a given stage and all jobs associated with it.</div>
</td>
</tr>
<tr id="i18" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#clearCallSite--">clearCallSite</a></span>()</code>
<div class="block">Clear the thread-local property for overriding the call sites
of actions and RDDs.</div>
</td>
</tr>
<tr id="i19" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#clearJobGroup--">clearJobGroup</a></span>()</code>
<div class="block">Clear the current thread's job group ID and its description.</div>
</td>
</tr>
<tr id="i20" class="altColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#collectionAccumulator--">collectionAccumulator</a></span>()</code>
<div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates
inputs by adding them into the list.</div>
</td>
</tr>
<tr id="i21" class="rowColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#collectionAccumulator-java.lang.String-">collectionAccumulator</a></span>(String&nbsp;name)</code>
<div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates
inputs by adding them into the list.</div>
</td>
</tr>
<tr id="i22" class="altColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#defaultMinPartitions--">defaultMinPartitions</a></span>()</code>
<div class="block">Default min number of partitions for Hadoop RDDs when not given by user
Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2.</div>
</td>
</tr>
<tr id="i23" class="rowColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#defaultParallelism--">defaultParallelism</a></span>()</code>
<div class="block">Default level of parallelism to use when not given by user (e.g.</div>
</td>
</tr>
<tr id="i24" class="altColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#deployMode--">deployMode</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i25" class="rowColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#doubleAccumulator--">doubleAccumulator</a></span>()</code>
<div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
</td>
</tr>
<tr id="i26" class="altColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#doubleAccumulator-java.lang.String-">doubleAccumulator</a></span>(String&nbsp;name)</code>
<div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
</td>
</tr>
<tr id="i27" class="rowColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#emptyRDD-scala.reflect.ClassTag-">emptyRDD</a></span>(scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$8)</code>
<div class="block">Get an RDD that has no partitions or elements.</div>
</td>
</tr>
<tr id="i28" class="altColor">
<td class="colFirst"><code>scala.collection.Seq&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#files--">files</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i29" class="rowColor">
<td class="colFirst"><code>scala.collection.Seq&lt;<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getAllPools--">getAllPools</a></span>()</code>
<div class="block">:: DeveloperApi ::
Return pools for fair scheduler</div>
</td>
</tr>
<tr id="i30" class="altColor">
<td class="colFirst"><code>scala.Option&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getCheckpointDir--">getCheckpointDir</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i31" class="rowColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getConf--">getConf</a></span>()</code>
<div class="block">Return a copy of this SparkContext's configuration.</div>
</td>
</tr>
<tr id="i32" class="altColor">
<td class="colFirst"><code>scala.collection.Map&lt;String,scala.Tuple2&lt;Object,Object&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getExecutorMemoryStatus--">getExecutorMemoryStatus</a></span>()</code>
<div class="block">Return a map from the block manager to the max memory available for caching and the remaining
memory available for caching.</div>
</td>
</tr>
<tr id="i33" class="rowColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getLocalProperty-java.lang.String-">getLocalProperty</a></span>(String&nbsp;key)</code>
<div class="block">Get a local property set in this thread, or null if it is missing.</div>
</td>
</tr>
<tr id="i34" class="altColor">
<td class="colFirst"><code>static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getOrCreate--">getOrCreate</a></span>()</code>
<div class="block">This function may be used to get or instantiate a SparkContext and register it as a
singleton object.</div>
</td>
</tr>
<tr id="i35" class="rowColor">
<td class="colFirst"><code>static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getOrCreate-org.apache.spark.SparkConf-">getOrCreate</a></span>(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a>&nbsp;config)</code>
<div class="block">This function may be used to get or instantiate a SparkContext and register it as a
singleton object.</div>
</td>
</tr>
<tr id="i36" class="altColor">
<td class="colFirst"><code>scala.collection.Map&lt;Object,<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;?&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getPersistentRDDs--">getPersistentRDDs</a></span>()</code>
<div class="block">Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.</div>
</td>
</tr>
<tr id="i37" class="rowColor">
<td class="colFirst"><code>scala.Option&lt;<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getPoolForName-java.lang.String-">getPoolForName</a></span>(String&nbsp;pool)</code>
<div class="block">:: DeveloperApi ::
Return the pool associated with the given name, if one exists</div>
</td>
</tr>
<tr id="i38" class="altColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/storage/RDDInfo.html" title="class in org.apache.spark.storage">RDDInfo</a>[]</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getRDDStorageInfo--">getRDDStorageInfo</a></span>()</code>
<div class="block">:: DeveloperApi ::
Return information about what RDDs are cached, if they are in mem or on disk, how much space
they take, etc.</div>
</td>
</tr>
<tr id="i39" class="rowColor">
<td class="colFirst"><code>scala.Enumeration.Value</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getSchedulingMode--">getSchedulingMode</a></span>()</code>
<div class="block">Return current scheduling mode</div>
</td>
</tr>
<tr id="i40" class="altColor">
<td class="colFirst"><code>org.apache.hadoop.conf.Configuration</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopConfiguration--">hadoopConfiguration</a></span>()</code>
<div class="block">A default Hadoop Configuration for the Hadoop code (e.g.</div>
</td>
</tr>
<tr id="i41" class="rowColor">
<td class="colFirst"><code>&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-int-">hadoopFile</a></span>(String&nbsp;path,
Class&lt;? extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;&nbsp;inputFormatClass,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass,
int&nbsp;minPartitions)</code>
<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat</div>
</td>
</tr>
<tr id="i42" class="altColor">
<td class="colFirst"><code>&lt;K,V,F extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;<br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">hadoopFile</a></span>(String&nbsp;path,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.reflect.ClassTag&lt;F&gt;&nbsp;fm)</code>
<div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys,
values and the InputFormat so that users don't need to pass them directly.</div>
</td>
</tr>
<tr id="i43" class="rowColor">
<td class="colFirst"><code>&lt;K,V,F extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;<br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">hadoopFile</a></span>(String&nbsp;path,
int&nbsp;minPartitions,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.reflect.ClassTag&lt;F&gt;&nbsp;fm)</code>
<div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys,
values and the InputFormat so that users don't need to pass them directly.</div>
</td>
</tr>
<tr id="i44" class="altColor">
<td class="colFirst"><code>&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-int-">hadoopRDD</a></span>(org.apache.hadoop.mapred.JobConf&nbsp;conf,
Class&lt;? extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;&nbsp;inputFormatClass,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass,
int&nbsp;minPartitions)</code>
<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other
necessary info (e.g.</div>
</td>
</tr>
<tr id="i45" class="rowColor">
<td class="colFirst"><code>boolean</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#isLocal--">isLocal</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i46" class="altColor">
<td class="colFirst"><code>boolean</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#isStopped--">isStopped</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i47" class="rowColor">
<td class="colFirst"><code>static scala.Option&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#jarOfClass-java.lang.Class-">jarOfClass</a></span>(Class&lt;?&gt;&nbsp;cls)</code>
<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass
their JARs to SparkContext.</div>
</td>
</tr>
<tr id="i48" class="altColor">
<td class="colFirst"><code>static scala.Option&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#jarOfObject-java.lang.Object-">jarOfObject</a></span>(Object&nbsp;obj)</code>
<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users
to pass their JARs to SparkContext.</div>
</td>
</tr>
<tr id="i49" class="rowColor">
<td class="colFirst"><code>scala.collection.Seq&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#jars--">jars</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i50" class="altColor">
<td class="colFirst"><code>boolean</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#killExecutor-java.lang.String-">killExecutor</a></span>(String&nbsp;executorId)</code>
<div class="block">:: DeveloperApi ::
Request that the cluster manager kill the specified executor.</div>
</td>
</tr>
<tr id="i51" class="rowColor">
<td class="colFirst"><code>boolean</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#killExecutors-scala.collection.Seq-">killExecutors</a></span>(scala.collection.Seq&lt;String&gt;&nbsp;executorIds)</code>
<div class="block">:: DeveloperApi ::
Request that the cluster manager kill the specified executors.</div>
</td>
</tr>
<tr id="i52" class="altColor">
<td class="colFirst"><code>boolean</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#killTaskAttempt-long-boolean-java.lang.String-">killTaskAttempt</a></span>(long&nbsp;taskId,
boolean&nbsp;interruptThread,
String&nbsp;reason)</code>
<div class="block">Kill and reschedule the given task attempt.</div>
</td>
</tr>
<tr id="i53" class="rowColor">
<td class="colFirst"><code>scala.collection.Seq&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#listArchives--">listArchives</a></span>()</code>
<div class="block">:: Experimental ::
Returns a list of archive paths that are added to resources.</div>
</td>
</tr>
<tr id="i54" class="altColor">
<td class="colFirst"><code>scala.collection.Seq&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#listFiles--">listFiles</a></span>()</code>
<div class="block">Returns a list of file paths that are added to resources.</div>
</td>
</tr>
<tr id="i55" class="rowColor">
<td class="colFirst"><code>scala.collection.Seq&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#listJars--">listJars</a></span>()</code>
<div class="block">Returns a list of jar files that are added to resources.</div>
</td>
</tr>
<tr id="i56" class="altColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#longAccumulator--">longAccumulator</a></span>()</code>
<div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
</td>
</tr>
<tr id="i57" class="rowColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#longAccumulator-java.lang.String-">longAccumulator</a></span>(String&nbsp;name)</code>
<div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
</td>
</tr>
<tr id="i58" class="altColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#makeRDD-scala.collection.Seq-int-scala.reflect.ClassTag-">makeRDD</a></span>(scala.collection.Seq&lt;T&gt;&nbsp;seq,
int&nbsp;numSlices,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$2)</code>
<div class="block">Distribute a local Scala collection to form an RDD.</div>
</td>
</tr>
<tr id="i59" class="rowColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#makeRDD-scala.collection.Seq-scala.reflect.ClassTag-">makeRDD</a></span>(scala.collection.Seq&lt;scala.Tuple2&lt;T,scala.collection.Seq&lt;String&gt;&gt;&gt;&nbsp;seq,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$3)</code>
<div class="block">Distribute a local Scala collection to form an RDD, with one or more
location preferences (hostnames of Spark nodes) for each object.</div>
</td>
</tr>
<tr id="i60" class="altColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#master--">master</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i61" class="rowColor">
<td class="colFirst"><code>&lt;K,V,F extends org.apache.hadoop.mapreduce.InputFormat&lt;K,V&gt;&gt;<br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-org.apache.hadoop.conf.Configuration-">newAPIHadoopFile</a></span>(String&nbsp;path,
Class&lt;F&gt;&nbsp;fClass,
Class&lt;K&gt;&nbsp;kClass,
Class&lt;V&gt;&nbsp;vClass,
org.apache.hadoop.conf.Configuration&nbsp;conf)</code>
<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
and extra configuration options to pass to the input format.</div>
</td>
</tr>
<tr id="i62" class="altColor">
<td class="colFirst"><code>&lt;K,V,F extends org.apache.hadoop.mapreduce.InputFormat&lt;K,V&gt;&gt;<br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">newAPIHadoopFile</a></span>(String&nbsp;path,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.reflect.ClassTag&lt;F&gt;&nbsp;fm)</code>
<div class="block">Smarter version of <code>newApiHadoopFile</code> that uses class tags to figure out the classes of keys,
values and the <code>org.apache.hadoop.mapreduce.InputFormat</code> (new MapReduce API) so that user
don't need to pass them directly.</div>
</td>
</tr>
<tr id="i63" class="rowColor">
<td class="colFirst"><code>&lt;K,V,F extends org.apache.hadoop.mapreduce.InputFormat&lt;K,V&gt;&gt;<br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopRDD-org.apache.hadoop.conf.Configuration-java.lang.Class-java.lang.Class-java.lang.Class-">newAPIHadoopRDD</a></span>(org.apache.hadoop.conf.Configuration&nbsp;conf,
Class&lt;F&gt;&nbsp;fClass,
Class&lt;K&gt;&nbsp;kClass,
Class&lt;V&gt;&nbsp;vClass)</code>
<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
and extra configuration options to pass to the input format.</div>
</td>
</tr>
<tr id="i64" class="altColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#objectFile-java.lang.String-int-scala.reflect.ClassTag-">objectFile</a></span>(String&nbsp;path,
int&nbsp;minPartitions,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$4)</code>
<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and
BytesWritable values that contain a serialized partition.</div>
</td>
</tr>
<tr id="i65" class="rowColor">
<td class="colFirst"><code>static void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#org:Dapache:Dspark:Dinternal:DLogging:D:Dlog__:Deq-org.slf4j.Logger-">org$apache$spark$internal$Logging$$log__$eq</a></span>(org.slf4j.Logger&nbsp;x$1)</code>&nbsp;</td>
</tr>
<tr id="i66" class="altColor">
<td class="colFirst"><code>static org.slf4j.Logger</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#org:Dapache:Dspark:Dinternal:DLogging:D:Dlog_--">org$apache$spark$internal$Logging$$log_</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i67" class="rowColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#parallelize-scala.collection.Seq-int-scala.reflect.ClassTag-">parallelize</a></span>(scala.collection.Seq&lt;T&gt;&nbsp;seq,
int&nbsp;numSlices,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$1)</code>
<div class="block">Distribute a local Scala collection to form an RDD.</div>
</td>
</tr>
<tr id="i68" class="altColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;Object&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#range-long-long-long-int-">range</a></span>(long&nbsp;start,
long&nbsp;end,
long&nbsp;step,
int&nbsp;numSlices)</code>
<div class="block">Creates a new RDD[Long] containing elements from <code>start</code> to <code>end</code>(exclusive), increased by
<code>step</code> every element.</div>
</td>
</tr>
<tr id="i69" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#register-org.apache.spark.util.AccumulatorV2-">register</a></span>(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a>&lt;?,?&gt;&nbsp;acc)</code>
<div class="block">Register the given accumulator.</div>
</td>
</tr>
<tr id="i70" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#register-org.apache.spark.util.AccumulatorV2-java.lang.String-">register</a></span>(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a>&lt;?,?&gt;&nbsp;acc,
String&nbsp;name)</code>
<div class="block">Register the given accumulator with given name.</div>
</td>
</tr>
<tr id="i71" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#removeSparkListener-org.apache.spark.scheduler.SparkListenerInterface-">removeSparkListener</a></span>(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a>&nbsp;listener)</code>
<div class="block">:: DeveloperApi ::
Deregister the listener from Spark's listener bus.</div>
</td>
</tr>
<tr id="i72" class="altColor">
<td class="colFirst"><code>boolean</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#requestExecutors-int-">requestExecutors</a></span>(int&nbsp;numAdditionalExecutors)</code>
<div class="block">:: DeveloperApi ::
Request an additional number of executors from the cluster manager.</div>
</td>
</tr>
<tr id="i73" class="rowColor">
<td class="colFirst"><code>boolean</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#requestTotalExecutors-int-int-scala.collection.immutable.Map-">requestTotalExecutors</a></span>(int&nbsp;numExecutors,
int&nbsp;localityAwareTasks,
scala.collection.immutable.Map&lt;String,Object&gt;&nbsp;hostToLocalTaskCount)</code>
<div class="block">Update the cluster manager on our scheduling needs.</div>
</td>
</tr>
<tr id="i74" class="altColor">
<td class="colFirst"><code>scala.collection.Map&lt;String,<a href="../../../org/apache/spark/resource/ResourceInformation.html" title="class in org.apache.spark.resource">ResourceInformation</a>&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#resources--">resources</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i75" class="rowColor">
<td class="colFirst"><code>&lt;T,U,R&gt;&nbsp;<a href="../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a>&lt;R&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runApproximateJob-org.apache.spark.rdd.RDD-scala.Function2-org.apache.spark.partial.ApproximateEvaluator-long-">runApproximateJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
<a href="../../../org/apache/spark/partial/ApproximateEvaluator.html" title="interface in org.apache.spark.partial">ApproximateEvaluator</a>&lt;U,R&gt;&nbsp;evaluator,
long&nbsp;timeout)</code>
<div class="block">:: DeveloperApi ::
Run a job that can return approximate results.</div>
</td>
</tr>
<tr id="i76" class="altColor">
<td class="colFirst"><code>&lt;T,U&gt;&nbsp;Object</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$14)</code>
<div class="block">Run a job on all partitions in an RDD and return the results in an array.</div>
</td>
</tr>
<tr id="i77" class="rowColor">
<td class="colFirst"><code>&lt;T,U&gt;&nbsp;void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;processPartition,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$16)</code>
<div class="block">Run a job on all partitions in an RDD and pass the results to a handler function.</div>
</td>
</tr>
<tr id="i78" class="altColor">
<td class="colFirst"><code>&lt;T,U&gt;&nbsp;Object</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$12)</code>
<div class="block">Run a function on a given set of partitions in an RDD and return the results as an array.</div>
</td>
</tr>
<tr id="i79" class="rowColor">
<td class="colFirst"><code>&lt;T,U&gt;&nbsp;Object</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$13)</code>
<div class="block">Run a job on all partitions in an RDD and return the results in an array.</div>
</td>
</tr>
<tr id="i80" class="altColor">
<td class="colFirst"><code>&lt;T,U&gt;&nbsp;void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;processPartition,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$15)</code>
<div class="block">Run a job on all partitions in an RDD and pass the results to a handler function.</div>
</td>
</tr>
<tr id="i81" class="rowColor">
<td class="colFirst"><code>&lt;T,U&gt;&nbsp;Object</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$11)</code>
<div class="block">Run a function on a given set of partitions in an RDD and return the results as an array.</div>
</td>
</tr>
<tr id="i82" class="altColor">
<td class="colFirst"><code>&lt;T,U&gt;&nbsp;void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$10)</code>
<div class="block">Run a function on a given set of partitions in an RDD and pass the results to the given
handler function.</div>
</td>
</tr>
<tr id="i83" class="rowColor">
<td class="colFirst"><code>&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-">sequenceFile</a></span>(String&nbsp;path,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass)</code>
<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div>
</td>
</tr>
<tr id="i84" class="altColor">
<td class="colFirst"><code>&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-int-">sequenceFile</a></span>(String&nbsp;path,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass,
int&nbsp;minPartitions)</code>
<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div>
</td>
</tr>
<tr id="i85" class="rowColor">
<td class="colFirst"><code>&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sequenceFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.Function0-scala.Function0-">sequenceFile</a></span>(String&nbsp;path,
int&nbsp;minPartitions,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.Function0&lt;org.apache.spark.WritableConverter&lt;K&gt;&gt;&nbsp;kcf,
scala.Function0&lt;org.apache.spark.WritableConverter&lt;V&gt;&gt;&nbsp;vcf)</code>
<div class="block">Version of sequenceFile() for types implicitly convertible to Writables through a
WritableConverter.</div>
</td>
</tr>
<tr id="i86" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setCallSite-java.lang.String-">setCallSite</a></span>(String&nbsp;shortCallSite)</code>
<div class="block">Set the thread-local property for overriding the call sites
of actions and RDDs.</div>
</td>
</tr>
<tr id="i87" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setCheckpointDir-java.lang.String-">setCheckpointDir</a></span>(String&nbsp;directory)</code>
<div class="block">Set the directory under which RDDs are going to be checkpointed.</div>
</td>
</tr>
<tr id="i88" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setJobDescription-java.lang.String-">setJobDescription</a></span>(String&nbsp;value)</code>
<div class="block">Set a human readable description of the current job.</div>
</td>
</tr>
<tr id="i89" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setJobGroup-java.lang.String-java.lang.String-boolean-">setJobGroup</a></span>(String&nbsp;groupId,
String&nbsp;description,
boolean&nbsp;interruptOnCancel)</code>
<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a
different value or cleared.</div>
</td>
</tr>
<tr id="i90" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setLocalProperty-java.lang.String-java.lang.String-">setLocalProperty</a></span>(String&nbsp;key,
String&nbsp;value)</code>
<div class="block">Set a local property that affects jobs submitted from this thread, such as the Spark fair
scheduler pool.</div>
</td>
</tr>
<tr id="i91" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setLogLevel-java.lang.String-">setLogLevel</a></span>(String&nbsp;logLevel)</code>
<div class="block">Control our logLevel.</div>
</td>
</tr>
<tr id="i92" class="altColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sparkUser--">sparkUser</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i93" class="rowColor">
<td class="colFirst"><code>long</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#startTime--">startTime</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i94" class="altColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/SparkStatusTracker.html" title="class in org.apache.spark">SparkStatusTracker</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#statusTracker--">statusTracker</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i95" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#stop--">stop</a></span>()</code>
<div class="block">Shut down the SparkContext.</div>
</td>
</tr>
<tr id="i96" class="altColor">
<td class="colFirst"><code>&lt;T,U,R&gt;&nbsp;<a href="../../../org/apache/spark/SimpleFutureAction.html" title="class in org.apache.spark">SimpleFutureAction</a>&lt;R&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#submitJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.Function2-scala.Function0-">submitJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;processPartition,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.Function0&lt;R&gt;&nbsp;resultFunc)</code>
<div class="block">Submit a job for execution and return a FutureJob holding the result.</div>
</td>
</tr>
<tr id="i97" class="rowColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#textFile-java.lang.String-int-">textFile</a></span>(String&nbsp;path,
int&nbsp;minPartitions)</code>
<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any
Hadoop-supported file system URI, and return it as an RDD of Strings.</div>
</td>
</tr>
<tr id="i98" class="altColor">
<td class="colFirst"><code>scala.Option&lt;String&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#uiWebUrl--">uiWebUrl</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i99" class="rowColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#union-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.reflect.ClassTag-">union</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;first,
scala.collection.Seq&lt;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&gt;&nbsp;rest,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$7)</code>
<div class="block">Build the union of a list of RDDs passed as variable-length arguments.</div>
</td>
</tr>
<tr id="i100" class="altColor">
<td class="colFirst"><code>&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#union-scala.collection.Seq-scala.reflect.ClassTag-">union</a></span>(scala.collection.Seq&lt;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&gt;&nbsp;rdds,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$6)</code>
<div class="block">Build the union of a list of RDDs.</div>
</td>
</tr>
<tr id="i101" class="rowColor">
<td class="colFirst"><code>String</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#version--">version</a></span>()</code>
<div class="block">The version of Spark on which this application is running.</div>
</td>
</tr>
<tr id="i102" class="altColor">
<td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;String,String&gt;&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#wholeTextFiles-java.lang.String-int-">wholeTextFiles</a></span>(String&nbsp;path,
int&nbsp;minPartitions)</code>
<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any
Hadoop-supported file system URI.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;Object</h3>
<code>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.org.apache.spark.internal.Logging">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;org.apache.spark.internal.Logging</h3>
<code>$init$, initializeForcefully, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, initLock, isTraceEnabled, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning, org$apache$spark$internal$Logging$$log__$eq, org$apache$spark$internal$Logging$$log_, uninitialize</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="SparkContext-org.apache.spark.SparkConf-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>SparkContext</h4>
<pre>public&nbsp;SparkContext(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a>&nbsp;config)</pre>
</li>
</ul>
<a name="SparkContext--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>SparkContext</h4>
<pre>public&nbsp;SparkContext()</pre>
<div class="block">Create a SparkContext that loads settings from system properties (for instance, when
launching with ./bin/spark-submit).</div>
</li>
</ul>
<a name="SparkContext-java.lang.String-java.lang.String-org.apache.spark.SparkConf-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>SparkContext</h4>
<pre>public&nbsp;SparkContext(String&nbsp;master,
String&nbsp;appName,
<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a>&nbsp;conf)</pre>
<div class="block">Alternative constructor that allows setting common Spark properties directly
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd>
<dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd>
<dd><code>conf</code> - a <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><code>SparkConf</code></a> object specifying other Spark parameters</dd>
</dl>
</li>
</ul>
<a name="SparkContext-java.lang.String-java.lang.String-java.lang.String-scala.collection.Seq-scala.collection.Map-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>SparkContext</h4>
<pre>public&nbsp;SparkContext(String&nbsp;master,
String&nbsp;appName,
String&nbsp;sparkHome,
scala.collection.Seq&lt;String&gt;&nbsp;jars,
scala.collection.Map&lt;String,String&gt;&nbsp;environment)</pre>
<div class="block">Alternative constructor that allows setting common Spark properties directly
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd>
<dd><code>appName</code> - A name for your application, to display on the cluster web UI.</dd>
<dd><code>sparkHome</code> - Location where Spark is installed on cluster nodes.</dd>
<dd><code>jars</code> - Collection of JARs to send to the cluster. These can be paths on the local file
system or HDFS, HTTP, HTTPS, or FTP URLs.</dd>
<dd><code>environment</code> - Environment variables to set on worker nodes.</dd>
</dl>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="getOrCreate-org.apache.spark.SparkConf-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getOrCreate</h4>
<pre>public static&nbsp;<a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a>&nbsp;getOrCreate(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a>&nbsp;config)</pre>
<div class="block">This function may be used to get or instantiate a SparkContext and register it as a
singleton object. Because we can only have one active SparkContext per JVM,
this is useful when applications may wish to share a SparkContext.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>config</code> - <code>SparkConfig</code> that will be used for initialisation of the <code>SparkContext</code></dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>current <code>SparkContext</code> (or a new one if it wasn't created before the function call)</dd>
</dl>
</li>
</ul>
<a name="getOrCreate--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getOrCreate</h4>
<pre>public static&nbsp;<a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a>&nbsp;getOrCreate()</pre>
<div class="block">This function may be used to get or instantiate a SparkContext and register it as a
singleton object. Because we can only have one active SparkContext per JVM,
this is useful when applications may wish to share a SparkContext.
<p>
This method allows not passing a SparkConf (useful if just retrieving).
<p></div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>current <code>SparkContext</code> (or a new one if wasn't created before the function call)</dd>
</dl>
</li>
</ul>
<a name="jarOfClass-java.lang.Class-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>jarOfClass</h4>
<pre>public static&nbsp;scala.Option&lt;String&gt;&nbsp;jarOfClass(Class&lt;?&gt;&nbsp;cls)</pre>
<div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass
their JARs to SparkContext.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>cls</code> - class that should be inside of the jar</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>jar that contains the Class, <code>None</code> if not found</dd>
</dl>
</li>
</ul>
<a name="jarOfObject-java.lang.Object-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>jarOfObject</h4>
<pre>public static&nbsp;scala.Option&lt;String&gt;&nbsp;jarOfObject(Object&nbsp;obj)</pre>
<div class="block">Find the JAR that contains the class of a particular object, to make it easy for users
to pass their JARs to SparkContext. In most cases you can call jarOfObject(this) in
your driver program.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>obj</code> - reference to an instance which class should be inside of the jar</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>jar that contains the class of the instance, <code>None</code> if not found</dd>
</dl>
</li>
</ul>
<a name="org:Dapache:Dspark:Dinternal:DLogging:D:Dlog_--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>org$apache$spark$internal$Logging$$log_</h4>
<pre>public static&nbsp;org.slf4j.Logger&nbsp;org$apache$spark$internal$Logging$$log_()</pre>
</li>
</ul>
<a name="org:Dapache:Dspark:Dinternal:DLogging:D:Dlog__:Deq-org.slf4j.Logger-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>org$apache$spark$internal$Logging$$log__$eq</h4>
<pre>public static&nbsp;void&nbsp;org$apache$spark$internal$Logging$$log__$eq(org.slf4j.Logger&nbsp;x$1)</pre>
</li>
</ul>
<a name="startTime--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>startTime</h4>
<pre>public&nbsp;long&nbsp;startTime()</pre>
</li>
</ul>
<a name="getConf--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getConf</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a>&nbsp;getConf()</pre>
<div class="block">Return a copy of this SparkContext's configuration. The configuration ''cannot'' be
changed at runtime.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="resources--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>resources</h4>
<pre>public&nbsp;scala.collection.Map&lt;String,<a href="../../../org/apache/spark/resource/ResourceInformation.html" title="class in org.apache.spark.resource">ResourceInformation</a>&gt;&nbsp;resources()</pre>
</li>
</ul>
<a name="jars--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>jars</h4>
<pre>public&nbsp;scala.collection.Seq&lt;String&gt;&nbsp;jars()</pre>
</li>
</ul>
<a name="files--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>files</h4>
<pre>public&nbsp;scala.collection.Seq&lt;String&gt;&nbsp;files()</pre>
</li>
</ul>
<a name="archives--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>archives</h4>
<pre>public&nbsp;scala.collection.Seq&lt;String&gt;&nbsp;archives()</pre>
</li>
</ul>
<a name="master--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>master</h4>
<pre>public&nbsp;String&nbsp;master()</pre>
</li>
</ul>
<a name="deployMode--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>deployMode</h4>
<pre>public&nbsp;String&nbsp;deployMode()</pre>
</li>
</ul>
<a name="appName--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>appName</h4>
<pre>public&nbsp;String&nbsp;appName()</pre>
</li>
</ul>
<a name="isLocal--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>isLocal</h4>
<pre>public&nbsp;boolean&nbsp;isLocal()</pre>
</li>
</ul>
<a name="isStopped--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>isStopped</h4>
<pre>public&nbsp;boolean&nbsp;isStopped()</pre>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>true if context is stopped or in the midst of stopping.</dd>
</dl>
</li>
</ul>
<a name="statusTracker--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>statusTracker</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/SparkStatusTracker.html" title="class in org.apache.spark">SparkStatusTracker</a>&nbsp;statusTracker()</pre>
</li>
</ul>
<a name="uiWebUrl--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>uiWebUrl</h4>
<pre>public&nbsp;scala.Option&lt;String&gt;&nbsp;uiWebUrl()</pre>
</li>
</ul>
<a name="hadoopConfiguration--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>hadoopConfiguration</h4>
<pre>public&nbsp;org.apache.hadoop.conf.Configuration&nbsp;hadoopConfiguration()</pre>
<div class="block">A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse.
<p></div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
plan to set some global configurations for all Hadoop RDDs.</dd>
</dl>
</li>
</ul>
<a name="sparkUser--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>sparkUser</h4>
<pre>public&nbsp;String&nbsp;sparkUser()</pre>
</li>
</ul>
<a name="applicationId--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>applicationId</h4>
<pre>public&nbsp;String&nbsp;applicationId()</pre>
<div class="block">A unique identifier for the Spark application.
Its format depends on the scheduler implementation.
(i.e.
in case of local spark app something like 'local-1433865536131'
in case of YARN something like 'application_1433865536131_34483'
in case of MESOS something like 'driver-20170926223339-0001'
)</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="applicationAttemptId--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>applicationAttemptId</h4>
<pre>public&nbsp;scala.Option&lt;String&gt;&nbsp;applicationAttemptId()</pre>
</li>
</ul>
<a name="setLogLevel-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setLogLevel</h4>
<pre>public&nbsp;void&nbsp;setLogLevel(String&nbsp;logLevel)</pre>
<div class="block">Control our logLevel. This overrides any user-defined log settings.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>logLevel</code> - The desired log level as a string.
Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</dd>
</dl>
</li>
</ul>
<a name="setLocalProperty-java.lang.String-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setLocalProperty</h4>
<pre>public&nbsp;void&nbsp;setLocalProperty(String&nbsp;key,
String&nbsp;value)</pre>
<div class="block">Set a local property that affects jobs submitted from this thread, such as the Spark fair
scheduler pool. User-defined properties may also be set here. These properties are propagated
through to worker tasks and can be accessed there via
<a href="../../../org/apache/spark/TaskContext.html#getLocalProperty-java.lang.String-"><code>TaskContext.getLocalProperty(java.lang.String)</code></a>.
<p>
These properties are inherited by child threads spawned from this thread. This
may have unexpected consequences when working with thread pools. The standard java
implementation of thread pools have worker threads spawn other worker threads.
As a result, local properties may propagate unpredictably.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>key</code> - (undocumented)</dd>
<dd><code>value</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="getLocalProperty-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getLocalProperty</h4>
<pre>public&nbsp;String&nbsp;getLocalProperty(String&nbsp;key)</pre>
<div class="block">Get a local property set in this thread, or null if it is missing. See
<code>org.apache.spark.SparkContext.setLocalProperty</code>.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>key</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="setJobDescription-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setJobDescription</h4>
<pre>public&nbsp;void&nbsp;setJobDescription(String&nbsp;value)</pre>
<div class="block">Set a human readable description of the current job.</div>
</li>
</ul>
<a name="setJobGroup-java.lang.String-java.lang.String-boolean-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setJobGroup</h4>
<pre>public&nbsp;void&nbsp;setJobGroup(String&nbsp;groupId,
String&nbsp;description,
boolean&nbsp;interruptOnCancel)</pre>
<div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a
different value or cleared.
<p>
Often, a unit of execution in an application consists of multiple Spark actions or jobs.
Application programmers can use this method to group all those jobs together and give a
group description. Once set, the Spark web UI will associate such jobs with this group.
<p>
The application can also use <code>org.apache.spark.SparkContext.cancelJobGroup</code> to cancel all
running jobs in this group. For example,
<pre><code>
// In the main thread:
sc.setJobGroup("some_job_to_cancel", "some job description")
sc.parallelize(1 to 10000, 2).map { i =&gt; Thread.sleep(10); i }.count()
// In a separate thread:
sc.cancelJobGroup("some_job_to_cancel")
</code></pre>
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>interruptOnCancel</code> - If true, then job cancellation will result in <code>Thread.interrupt()</code>
being called on the job's executor threads. This is useful to help ensure that the tasks
are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS
may respond to Thread.interrupt() by marking nodes as dead.</dd>
<dd><code>groupId</code> - (undocumented)</dd>
<dd><code>description</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="clearJobGroup--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>clearJobGroup</h4>
<pre>public&nbsp;void&nbsp;clearJobGroup()</pre>
<div class="block">Clear the current thread's job group ID and its description.</div>
</li>
</ul>
<a name="parallelize-scala.collection.Seq-int-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>parallelize</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;parallelize(scala.collection.Seq&lt;T&gt;&nbsp;seq,
int&nbsp;numSlices,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$1)</pre>
<div class="block">Distribute a local Scala collection to form an RDD.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>seq</code> - Scala collection to distribute</dd>
<dd><code>numSlices</code> - number of partitions to divide the collection into</dd>
<dd><code>evidence$1</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD representing distributed collection</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Parallelize acts lazily. If <code>seq</code> is a mutable collection and is altered after the call
to parallelize and before the first action on the RDD, the resultant RDD will reflect the
modified collection. Pass a copy of the argument to avoid this., avoid using <code>parallelize(Seq())</code> to create an empty <code>RDD</code>. Consider <code>emptyRDD</code> for an
RDD with no partitions, or <code>parallelize(Seq[T]())</code> for an RDD of <code>T</code> with empty partitions.</dd>
</dl>
</li>
</ul>
<a name="range-long-long-long-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>range</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;Object&gt;&nbsp;range(long&nbsp;start,
long&nbsp;end,
long&nbsp;step,
int&nbsp;numSlices)</pre>
<div class="block">Creates a new RDD[Long] containing elements from <code>start</code> to <code>end</code>(exclusive), increased by
<code>step</code> every element.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>start</code> - the start value.</dd>
<dd><code>end</code> - the end value.</dd>
<dd><code>step</code> - the incremental step</dd>
<dd><code>numSlices</code> - number of partitions to divide the collection into</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD representing distributed range</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>if we need to cache this RDD, we should make sure each partition does not exceed limit.
<p></dd>
</dl>
</li>
</ul>
<a name="makeRDD-scala.collection.Seq-int-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>makeRDD</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;makeRDD(scala.collection.Seq&lt;T&gt;&nbsp;seq,
int&nbsp;numSlices,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$2)</pre>
<div class="block">Distribute a local Scala collection to form an RDD.
<p>
This method is identical to <code>parallelize</code>.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>seq</code> - Scala collection to distribute</dd>
<dd><code>numSlices</code> - number of partitions to divide the collection into</dd>
<dd><code>evidence$2</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD representing distributed collection</dd>
</dl>
</li>
</ul>
<a name="makeRDD-scala.collection.Seq-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>makeRDD</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;makeRDD(scala.collection.Seq&lt;scala.Tuple2&lt;T,scala.collection.Seq&lt;String&gt;&gt;&gt;&nbsp;seq,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$3)</pre>
<div class="block">Distribute a local Scala collection to form an RDD, with one or more
location preferences (hostnames of Spark nodes) for each object.
Create a new partition for each collection item.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>seq</code> - list of tuples of data and location preferences (hostnames of Spark nodes)</dd>
<dd><code>evidence$3</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD representing data partitioned according to location preferences</dd>
</dl>
</li>
</ul>
<a name="textFile-java.lang.String-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>textFile</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;String&gt;&nbsp;textFile(String&nbsp;path,
int&nbsp;minPartitions)</pre>
<div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any
Hadoop-supported file system URI, and return it as an RDD of Strings.
The text files must be encoded as UTF-8.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - path to the text file on a supported file system</dd>
<dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of lines of the text file</dd>
</dl>
</li>
</ul>
<a name="wholeTextFiles-java.lang.String-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>wholeTextFiles</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;String,String&gt;&gt;&nbsp;wholeTextFiles(String&nbsp;path,
int&nbsp;minPartitions)</pre>
<div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any
Hadoop-supported file system URI. Each file is read as a single record and returned in a
key-value pair, where the key is the path of each file, the value is the content of each file.
The text files must be encoded as UTF-8.
<p>
<p> For example, if you have the following files:
<pre><code>
hdfs://a-hdfs-path/part-00000
hdfs://a-hdfs-path/part-00001
...
hdfs://a-hdfs-path/part-nnnnn
</code></pre>
<p>
Do <code>val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path")</code>,
<p>
<p> then <code>rdd</code> contains
<pre><code>
(a-hdfs-path/part-00000, its content)
(a-hdfs-path/part-00001, its content)
...
(a-hdfs-path/part-nnnnn, its content)
</code></pre>
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the
list of inputs.</dd>
<dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD representing tuples of file path and the corresponding file content</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Small files are preferred, large file is also allowable, but may cause bad performance., On some filesystems, <code>.../path/&amp;#42;</code> can be a more efficient way to read all files
in a directory rather than <code>.../path/</code> or <code>.../path</code>, Partitioning is determined by data locality. This may result in too few partitions
by default.
<p></dd>
</dl>
</li>
</ul>
<a name="binaryFiles-java.lang.String-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>binaryFiles</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;String,<a href="../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>&gt;&gt;&nbsp;binaryFiles(String&nbsp;path,
int&nbsp;minPartitions)</pre>
<div class="block">Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file
(useful for binary data)
<p>
For example, if you have the following files:
<pre><code>
hdfs://a-hdfs-path/part-00000
hdfs://a-hdfs-path/part-00001
...
hdfs://a-hdfs-path/part-nnnnn
</code></pre>
<p>
Do
<code>val rdd = sparkContext.binaryFiles("hdfs://a-hdfs-path")</code>,
<p>
then <code>rdd</code> contains
<pre><code>
(a-hdfs-path/part-00000, its content)
(a-hdfs-path/part-00001, its content)
...
(a-hdfs-path/part-nnnnn, its content)
</code></pre>
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the
list of inputs.</dd>
<dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD representing tuples of file path and corresponding file content</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Small files are preferred; very large files may cause bad performance., On some filesystems, <code>.../path/&amp;#42;</code> can be a more efficient way to read all files
in a directory rather than <code>.../path/</code> or <code>.../path</code>, Partitioning is determined by data locality. This may result in too few partitions
by default.
<p></dd>
</dl>
</li>
</ul>
<a name="binaryRecords-java.lang.String-int-org.apache.hadoop.conf.Configuration-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>binaryRecords</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;byte[]&gt;&nbsp;binaryRecords(String&nbsp;path,
int&nbsp;recordLength,
org.apache.hadoop.conf.Configuration&nbsp;conf)</pre>
<div class="block">Load data from a flat binary file, assuming the length of each record is constant.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the
list of inputs.</dd>
<dd><code>recordLength</code> - The length at which to split the records</dd>
<dd><code>conf</code> - Configuration for setting up the dataset.
<p></dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>An RDD of data with values, represented as byte arrays</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>We ensure that the byte array for each record in the resulting RDD
has the provided record length.
<p></dd>
</dl>
</li>
</ul>
<a name="hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>hadoopRDD</h4>
<pre>public&nbsp;&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;hadoopRDD(org.apache.hadoop.mapred.JobConf&nbsp;conf,
Class&lt;? extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;&nbsp;inputFormatClass,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass,
int&nbsp;minPartitions)</pre>
<div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other
necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable),
using the older MapReduce API (<code>org.apache.hadoop.mapred</code>).
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>conf</code> - JobConf for setting up the dataset. Note: This will be put into a Broadcast.
Therefore if you plan to reuse this conf to create multiple RDDs, you need to make
sure you won't modify the conf. A safe approach is always creating a new conf for
a new RDD.</dd>
<dd><code>inputFormatClass</code> - storage format of the data to be read</dd>
<dd><code>keyClass</code> - <code>Class</code> of the key associated with the <code>inputFormatClass</code> parameter</dd>
<dd><code>valueClass</code> - <code>Class</code> of the value associated with the <code>inputFormatClass</code> parameter</dd>
<dd><code>minPartitions</code> - Minimum number of Hadoop Splits to generate.</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value
<p></dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>hadoopFile</h4>
<pre>public&nbsp;&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;hadoopFile(String&nbsp;path,
Class&lt;? extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;&nbsp;inputFormatClass,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass,
int&nbsp;minPartitions)</pre>
<div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>inputFormatClass</code> - storage format of the data to be read</dd>
<dd><code>keyClass</code> - <code>Class</code> of the key associated with the <code>inputFormatClass</code> parameter</dd>
<dd><code>valueClass</code> - <code>Class</code> of the value associated with the <code>inputFormatClass</code> parameter</dd>
<dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="hadoopFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>hadoopFile</h4>
<pre>public&nbsp;&lt;K,V,F extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;hadoopFile(String&nbsp;path,
int&nbsp;minPartitions,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.reflect.ClassTag&lt;F&gt;&nbsp;fm)</pre>
<div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys,
values and the InputFormat so that users don't need to pass them directly. Instead, callers
can just write, for example,
<pre><code>
val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minPartitions)
</code></pre>
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd>
<dd><code>km</code> - (undocumented)</dd>
<dd><code>vm</code> - (undocumented)</dd>
<dd><code>fm</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="hadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>hadoopFile</h4>
<pre>public&nbsp;&lt;K,V,F extends org.apache.hadoop.mapred.InputFormat&lt;K,V&gt;&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;hadoopFile(String&nbsp;path,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.reflect.ClassTag&lt;F&gt;&nbsp;fm)</pre>
<div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys,
values and the InputFormat so that users don't need to pass them directly. Instead, callers
can just write, for example,
<pre><code>
val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path)
</code></pre>
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths as
a list of inputs</dd>
<dd><code>km</code> - (undocumented)</dd>
<dd><code>vm</code> - (undocumented)</dd>
<dd><code>fm</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="newAPIHadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>newAPIHadoopFile</h4>
<pre>public&nbsp;&lt;K,V,F extends org.apache.hadoop.mapreduce.InputFormat&lt;K,V&gt;&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;newAPIHadoopFile(String&nbsp;path,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.reflect.ClassTag&lt;F&gt;&nbsp;fm)</pre>
<div class="block">Smarter version of <code>newApiHadoopFile</code> that uses class tags to figure out the classes of keys,
values and the <code>org.apache.hadoop.mapreduce.InputFormat</code> (new MapReduce API) so that user
don't need to pass them directly. Instead, callers can just write, for example:
<code></code><code>
val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path)
</code><code></code>
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>km</code> - (undocumented)</dd>
<dd><code>vm</code> - (undocumented)</dd>
<dd><code>fm</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="newAPIHadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-org.apache.hadoop.conf.Configuration-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>newAPIHadoopFile</h4>
<pre>public&nbsp;&lt;K,V,F extends org.apache.hadoop.mapreduce.InputFormat&lt;K,V&gt;&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;newAPIHadoopFile(String&nbsp;path,
Class&lt;F&gt;&nbsp;fClass,
Class&lt;K&gt;&nbsp;kClass,
Class&lt;V&gt;&nbsp;vClass,
org.apache.hadoop.conf.Configuration&nbsp;conf)</pre>
<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
and extra configuration options to pass to the input format.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>fClass</code> - storage format of the data to be read</dd>
<dd><code>kClass</code> - <code>Class</code> of the key associated with the <code>fClass</code> parameter</dd>
<dd><code>vClass</code> - <code>Class</code> of the value associated with the <code>fClass</code> parameter</dd>
<dd><code>conf</code> - Hadoop configuration</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="newAPIHadoopRDD-org.apache.hadoop.conf.Configuration-java.lang.Class-java.lang.Class-java.lang.Class-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>newAPIHadoopRDD</h4>
<pre>public&nbsp;&lt;K,V,F extends org.apache.hadoop.mapreduce.InputFormat&lt;K,V&gt;&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;newAPIHadoopRDD(org.apache.hadoop.conf.Configuration&nbsp;conf,
Class&lt;F&gt;&nbsp;fClass,
Class&lt;K&gt;&nbsp;kClass,
Class&lt;V&gt;&nbsp;vClass)</pre>
<div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
and extra configuration options to pass to the input format.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>conf</code> - Configuration for setting up the dataset. Note: This will be put into a Broadcast.
Therefore if you plan to reuse this conf to create multiple RDDs, you need to make
sure you won't modify the conf. A safe approach is always creating a new conf for
a new RDD.</dd>
<dd><code>fClass</code> - storage format of the data to be read</dd>
<dd><code>kClass</code> - <code>Class</code> of the key associated with the <code>fClass</code> parameter</dd>
<dd><code>vClass</code> - <code>Class</code> of the value associated with the <code>fClass</code> parameter
<p></dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>sequenceFile</h4>
<pre>public&nbsp;&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;sequenceFile(String&nbsp;path,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass,
int&nbsp;minPartitions)</pre>
<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>keyClass</code> - <code>Class</code> of the key associated with <code>SequenceFileInputFormat</code></dd>
<dd><code>valueClass</code> - <code>Class</code> of the value associated with <code>SequenceFileInputFormat</code></dd>
<dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>sequenceFile</h4>
<pre>public&nbsp;&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;sequenceFile(String&nbsp;path,
Class&lt;K&gt;&nbsp;keyClass,
Class&lt;V&gt;&nbsp;valueClass)</pre>
<div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>keyClass</code> - <code>Class</code> of the key associated with <code>SequenceFileInputFormat</code></dd>
<dd><code>valueClass</code> - <code>Class</code> of the value associated with <code>SequenceFileInputFormat</code></dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="sequenceFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.Function0-scala.Function0-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>sequenceFile</h4>
<pre>public&nbsp;&lt;K,V&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;scala.Tuple2&lt;K,V&gt;&gt;&nbsp;sequenceFile(String&nbsp;path,
int&nbsp;minPartitions,
scala.reflect.ClassTag&lt;K&gt;&nbsp;km,
scala.reflect.ClassTag&lt;V&gt;&nbsp;vm,
scala.Function0&lt;org.apache.spark.WritableConverter&lt;K&gt;&gt;&nbsp;kcf,
scala.Function0&lt;org.apache.spark.WritableConverter&lt;V&gt;&gt;&nbsp;vcf)</pre>
<div class="block">Version of sequenceFile() for types implicitly convertible to Writables through a
WritableConverter. For example, to access a SequenceFile where the keys are Text and the
values are IntWritable, you could simply write
<pre><code>
sparkContext.sequenceFile[String, Int](path, ...)
</code></pre>
<p>
WritableConverters are provided in a somewhat strange way (by an implicit function) to support
both subclasses of Writable and types for which we define a converter (e.g. Int to
IntWritable). The most natural thing would've been to have implicit objects for the
converters, but then we couldn't have an object for every subclass of Writable (you can't
have a parameterized singleton object). We use functions instead to create a new converter
for the appropriate type. In addition, we pass the converter a ClassTag of its type to
allow it to figure out the Writable class to use in the subclass case.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd>
<dd><code>km</code> - (undocumented)</dd>
<dd><code>vm</code> - (undocumented)</dd>
<dd><code>kcf</code> - (undocumented)</dd>
<dd><code>vcf</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD of tuples of key and corresponding value</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Because Hadoop's RecordReader class re-uses the same Writable object for each
record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
operation will create many references to the same object.
If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
copy them using a <code>map</code> function.</dd>
</dl>
</li>
</ul>
<a name="objectFile-java.lang.String-int-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>objectFile</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;objectFile(String&nbsp;path,
int&nbsp;minPartitions,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$4)</pre>
<div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and
BytesWritable values that contain a serialized partition. This is still an experimental
storage format and may not be supported exactly as is in future Spark releases. It will also
be pretty slow if you use the default serializer (Java serialization),
though the nice thing about it is that there's very little effort required to save arbitrary
objects.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - directory to the input data files, the path can be comma separated paths
as a list of inputs</dd>
<dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd>
<dd><code>evidence$4</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>RDD representing deserialized data from the file(s)</dd>
</dl>
</li>
</ul>
<a name="union-scala.collection.Seq-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>union</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;union(scala.collection.Seq&lt;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&gt;&nbsp;rdds,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$6)</pre>
<div class="block">Build the union of a list of RDDs.</div>
</li>
</ul>
<a name="union-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>union</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;union(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;first,
scala.collection.Seq&lt;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&gt;&nbsp;rest,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$7)</pre>
<div class="block">Build the union of a list of RDDs passed as variable-length arguments.</div>
</li>
</ul>
<a name="emptyRDD-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>emptyRDD</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;emptyRDD(scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$8)</pre>
<div class="block">Get an RDD that has no partitions or elements.</div>
</li>
</ul>
<a name="register-org.apache.spark.util.AccumulatorV2-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>register</h4>
<pre>public&nbsp;void&nbsp;register(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a>&lt;?,?&gt;&nbsp;acc)</pre>
<div class="block">Register the given accumulator.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>acc</code> - (undocumented)</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Accumulators must be registered before use, or it will throw exception.</dd>
</dl>
</li>
</ul>
<a name="register-org.apache.spark.util.AccumulatorV2-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>register</h4>
<pre>public&nbsp;void&nbsp;register(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a>&lt;?,?&gt;&nbsp;acc,
String&nbsp;name)</pre>
<div class="block">Register the given accumulator with given name.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>acc</code> - (undocumented)</dd>
<dd><code>name</code> - (undocumented)</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Accumulators must be registered before use, or it will throw exception.</dd>
</dl>
</li>
</ul>
<a name="longAccumulator--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>longAccumulator</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a>&nbsp;longAccumulator()</pre>
<div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="longAccumulator-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>longAccumulator</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a>&nbsp;longAccumulator(String&nbsp;name)</pre>
<div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>name</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="doubleAccumulator--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>doubleAccumulator</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a>&nbsp;doubleAccumulator()</pre>
<div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="doubleAccumulator-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>doubleAccumulator</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a>&nbsp;doubleAccumulator(String&nbsp;name)</pre>
<div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>name</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="collectionAccumulator--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>collectionAccumulator</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a>&lt;T&gt;&nbsp;collectionAccumulator()</pre>
<div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates
inputs by adding them into the list.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="collectionAccumulator-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>collectionAccumulator</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a>&lt;T&gt;&nbsp;collectionAccumulator(String&nbsp;name)</pre>
<div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates
inputs by adding them into the list.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>name</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="broadcast-java.lang.Object-scala.reflect.ClassTag-">
<!-- -->
</a><a name="broadcast-T-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>broadcast</h4>
<pre>public&nbsp;&lt;T&gt;&nbsp;<a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast">Broadcast</a>&lt;T&gt;&nbsp;broadcast(T&nbsp;value,
scala.reflect.ClassTag&lt;T&gt;&nbsp;evidence$9)</pre>
<div class="block">Broadcast a read-only variable to the cluster, returning a
<a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast"><code>Broadcast</code></a> object for reading it in distributed functions.
The variable will be sent to each cluster only once.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>value</code> - value to broadcast to the Spark nodes</dd>
<dd><code>evidence$9</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd><code>Broadcast</code> object, a read-only variable cached on each machine</dd>
</dl>
</li>
</ul>
<a name="addFile-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>addFile</h4>
<pre>public&nbsp;void&nbsp;addFile(String&nbsp;path)</pre>
<div class="block">Add a file to be downloaded with this Spark job on every node.
<p>
If a file is added during execution, it will not be available until the next TaskSet starts.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported
filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
use <code>SparkFiles.get(fileName)</code> to find its download location.
<p></dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>A path can be added only once. Subsequent additions of the same path are ignored.</dd>
</dl>
</li>
</ul>
<a name="listFiles--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>listFiles</h4>
<pre>public&nbsp;scala.collection.Seq&lt;String&gt;&nbsp;listFiles()</pre>
<div class="block">Returns a list of file paths that are added to resources.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="addArchive-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>addArchive</h4>
<pre>public&nbsp;void&nbsp;addArchive(String&nbsp;path)</pre>
<div class="block">:: Experimental ::
Add an archive to be downloaded and unpacked with this Spark job on every node.
<p>
If an archive is added during execution, it will not be available until the next TaskSet
starts.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported
filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
use <code>SparkFiles.get(paths-to-files)</code> to find its download/unpacked location.
The given path should be one of .zip, .tar, .tar.gz, .tgz and .jar.
<p></dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>3.1.0</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>A path can be added only once. Subsequent additions of the same path are ignored.
<p></dd>
</dl>
</li>
</ul>
<a name="listArchives--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>listArchives</h4>
<pre>public&nbsp;scala.collection.Seq&lt;String&gt;&nbsp;listArchives()</pre>
<div class="block">:: Experimental ::
Returns a list of archive paths that are added to resources.
<p></div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>3.1.0</dd>
</dl>
</li>
</ul>
<a name="addFile-java.lang.String-boolean-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>addFile</h4>
<pre>public&nbsp;void&nbsp;addFile(String&nbsp;path,
boolean&nbsp;recursive)</pre>
<div class="block">Add a file to be downloaded with this Spark job on every node.
<p>
If a file is added during execution, it will not be available until the next TaskSet starts.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported
filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
use <code>SparkFiles.get(fileName)</code> to find its download location.</dd>
<dd><code>recursive</code> - if true, a directory can be given in <code>path</code>. Currently directories are
only supported for Hadoop-supported filesystems.
<p></dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>A path can be added only once. Subsequent additions of the same path are ignored.</dd>
</dl>
</li>
</ul>
<a name="addSparkListener-org.apache.spark.scheduler.SparkListenerInterface-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>addSparkListener</h4>
<pre>public&nbsp;void&nbsp;addSparkListener(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a>&nbsp;listener)</pre>
<div class="block">:: DeveloperApi ::
Register a listener to receive up-calls from events that happen during execution.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>listener</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="removeSparkListener-org.apache.spark.scheduler.SparkListenerInterface-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>removeSparkListener</h4>
<pre>public&nbsp;void&nbsp;removeSparkListener(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a>&nbsp;listener)</pre>
<div class="block">:: DeveloperApi ::
Deregister the listener from Spark's listener bus.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>listener</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="requestTotalExecutors-int-int-scala.collection.immutable.Map-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>requestTotalExecutors</h4>
<pre>public&nbsp;boolean&nbsp;requestTotalExecutors(int&nbsp;numExecutors,
int&nbsp;localityAwareTasks,
scala.collection.immutable.Map&lt;String,Object&gt;&nbsp;hostToLocalTaskCount)</pre>
<div class="block">Update the cluster manager on our scheduling needs. Three bits of information are included
to help it make decisions. This applies to the default ResourceProfile.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>numExecutors</code> - The total number of executors we'd like to have. The cluster manager
shouldn't kill any running executor to reach this number, but,
if all existing executors were to die, this is the number of executors
we'd want to be allocated.</dd>
<dd><code>localityAwareTasks</code> - The number of tasks in all active stages that have a locality
preferences. This includes running, pending, and completed tasks.</dd>
<dd><code>hostToLocalTaskCount</code> - A map of hosts to the number of tasks from all active stages
that would like to like to run on that host.
This includes running, pending, and completed tasks.</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>whether the request is acknowledged by the cluster manager.</dd>
</dl>
</li>
</ul>
<a name="requestExecutors-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>requestExecutors</h4>
<pre>public&nbsp;boolean&nbsp;requestExecutors(int&nbsp;numAdditionalExecutors)</pre>
<div class="block">:: DeveloperApi ::
Request an additional number of executors from the cluster manager.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>numAdditionalExecutors</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>whether the request is received.</dd>
</dl>
</li>
</ul>
<a name="killExecutors-scala.collection.Seq-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>killExecutors</h4>
<pre>public&nbsp;boolean&nbsp;killExecutors(scala.collection.Seq&lt;String&gt;&nbsp;executorIds)</pre>
<div class="block">:: DeveloperApi ::
Request that the cluster manager kill the specified executors.
<p>
This is not supported when dynamic allocation is turned on.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>executorIds</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>whether the request is received.</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>This is an indication to the cluster manager that the application wishes to adjust
its resource usage downwards. If the application wishes to replace the executors it kills
through this method with new ones, it should follow up explicitly with a call to
{{SparkContext#requestExecutors}}.
<p></dd>
</dl>
</li>
</ul>
<a name="killExecutor-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>killExecutor</h4>
<pre>public&nbsp;boolean&nbsp;killExecutor(String&nbsp;executorId)</pre>
<div class="block">:: DeveloperApi ::
Request that the cluster manager kill the specified executor.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>executorId</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>whether the request is received.</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>This is an indication to the cluster manager that the application wishes to adjust
its resource usage downwards. If the application wishes to replace the executor it kills
through this method with a new one, it should follow up explicitly with a call to
{{SparkContext#requestExecutors}}.
<p></dd>
</dl>
</li>
</ul>
<a name="version--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>version</h4>
<pre>public&nbsp;String&nbsp;version()</pre>
<div class="block">The version of Spark on which this application is running.</div>
</li>
</ul>
<a name="getExecutorMemoryStatus--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getExecutorMemoryStatus</h4>
<pre>public&nbsp;scala.collection.Map&lt;String,scala.Tuple2&lt;Object,Object&gt;&gt;&nbsp;getExecutorMemoryStatus()</pre>
<div class="block">Return a map from the block manager to the max memory available for caching and the remaining
memory available for caching.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getRDDStorageInfo--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getRDDStorageInfo</h4>
<pre>public&nbsp;<a href="../../../org/apache/spark/storage/RDDInfo.html" title="class in org.apache.spark.storage">RDDInfo</a>[]&nbsp;getRDDStorageInfo()</pre>
<div class="block">:: DeveloperApi ::
Return information about what RDDs are cached, if they are in mem or on disk, how much space
they take, etc.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getPersistentRDDs--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getPersistentRDDs</h4>
<pre>public&nbsp;scala.collection.Map&lt;Object,<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;?&gt;&gt;&nbsp;getPersistentRDDs()</pre>
<div class="block">Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.
<p></div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>This does not necessarily mean the caching or computation was successful.</dd>
</dl>
</li>
</ul>
<a name="getAllPools--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getAllPools</h4>
<pre>public&nbsp;scala.collection.Seq&lt;<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>&gt;&nbsp;getAllPools()</pre>
<div class="block">:: DeveloperApi ::
Return pools for fair scheduler</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getPoolForName-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getPoolForName</h4>
<pre>public&nbsp;scala.Option&lt;<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>&gt;&nbsp;getPoolForName(String&nbsp;pool)</pre>
<div class="block">:: DeveloperApi ::
Return the pool associated with the given name, if one exists</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>pool</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="getSchedulingMode--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getSchedulingMode</h4>
<pre>public&nbsp;scala.Enumeration.Value&nbsp;getSchedulingMode()</pre>
<div class="block">Return current scheduling mode</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="addJar-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>addJar</h4>
<pre>public&nbsp;void&nbsp;addJar(String&nbsp;path)</pre>
<div class="block">Adds a JAR dependency for all tasks to be executed on this <code>SparkContext</code> in the future.
<p>
If a jar is added during execution, it will not be available until the next TaskSet starts.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported filesystems),
an HTTP, HTTPS or FTP URI, or local:/path for a file on every worker node.
<p></dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>A path can be added only once. Subsequent additions of the same path are ignored.</dd>
</dl>
</li>
</ul>
<a name="listJars--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>listJars</h4>
<pre>public&nbsp;scala.collection.Seq&lt;String&gt;&nbsp;listJars()</pre>
<div class="block">Returns a list of jar files that are added to resources.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="stop--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>stop</h4>
<pre>public&nbsp;void&nbsp;stop()</pre>
<div class="block">Shut down the SparkContext.</div>
</li>
</ul>
<a name="setCallSite-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setCallSite</h4>
<pre>public&nbsp;void&nbsp;setCallSite(String&nbsp;shortCallSite)</pre>
<div class="block">Set the thread-local property for overriding the call sites
of actions and RDDs.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>shortCallSite</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="clearCallSite--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>clearCallSite</h4>
<pre>public&nbsp;void&nbsp;clearCallSite()</pre>
<div class="block">Clear the thread-local property for overriding the call sites
of actions and RDDs.</div>
</li>
</ul>
<a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.Function2-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runJob</h4>
<pre>public&nbsp;&lt;T,U&gt;&nbsp;void&nbsp;runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$10)</pre>
<div class="block">Run a function on a given set of partitions in an RDD and pass the results to the given
handler function. This is the main entry point for all actions in Spark.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>func</code> - a function to run on each partition of the RDD</dd>
<dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all
partitions of the target RDD, e.g. for operations like <code>first()</code></dd>
<dd><code>resultHandler</code> - callback to pass each result to</dd>
<dd><code>evidence$10</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runJob</h4>
<pre>public&nbsp;&lt;T,U&gt;&nbsp;Object&nbsp;runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$11)</pre>
<div class="block">Run a function on a given set of partitions in an RDD and return the results as an array.
The function that is run against each partition additionally takes <code>TaskContext</code> argument.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>func</code> - a function to run on each partition of the RDD</dd>
<dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all
partitions of the target RDD, e.g. for operations like <code>first()</code></dd>
<dd><code>evidence$11</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>in-memory collection with a result of the job (each collection element will contain
a result from one partition)</dd>
</dl>
</li>
</ul>
<a name="runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runJob</h4>
<pre>public&nbsp;&lt;T,U&gt;&nbsp;Object&nbsp;runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$12)</pre>
<div class="block">Run a function on a given set of partitions in an RDD and return the results as an array.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>func</code> - a function to run on each partition of the RDD</dd>
<dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all
partitions of the target RDD, e.g. for operations like <code>first()</code></dd>
<dd><code>evidence$12</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>in-memory collection with a result of the job (each collection element will contain
a result from one partition)</dd>
</dl>
</li>
</ul>
<a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runJob</h4>
<pre>public&nbsp;&lt;T,U&gt;&nbsp;Object&nbsp;runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$13)</pre>
<div class="block">Run a job on all partitions in an RDD and return the results in an array. The function
that is run against each partition additionally takes <code>TaskContext</code> argument.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>func</code> - a function to run on each partition of the RDD</dd>
<dd><code>evidence$13</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>in-memory collection with a result of the job (each collection element will contain
a result from one partition)</dd>
</dl>
</li>
</ul>
<a name="runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runJob</h4>
<pre>public&nbsp;&lt;T,U&gt;&nbsp;Object&nbsp;runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$14)</pre>
<div class="block">Run a job on all partitions in an RDD and return the results in an array.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>func</code> - a function to run on each partition of the RDD</dd>
<dd><code>evidence$14</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>in-memory collection with a result of the job (each collection element will contain
a result from one partition)</dd>
</dl>
</li>
</ul>
<a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.Function2-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runJob</h4>
<pre>public&nbsp;&lt;T,U&gt;&nbsp;void&nbsp;runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;processPartition,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$15)</pre>
<div class="block">Run a job on all partitions in an RDD and pass the results to a handler function. The function
that is run against each partition additionally takes <code>TaskContext</code> argument.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>processPartition</code> - a function to run on each partition of the RDD</dd>
<dd><code>resultHandler</code> - callback to pass each result to</dd>
<dd><code>evidence$15</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.Function2-scala.reflect.ClassTag-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runJob</h4>
<pre>public&nbsp;&lt;T,U&gt;&nbsp;void&nbsp;runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;processPartition,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.reflect.ClassTag&lt;U&gt;&nbsp;evidence$16)</pre>
<div class="block">Run a job on all partitions in an RDD and pass the results to a handler function.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>processPartition</code> - a function to run on each partition of the RDD</dd>
<dd><code>resultHandler</code> - callback to pass each result to</dd>
<dd><code>evidence$16</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="runApproximateJob-org.apache.spark.rdd.RDD-scala.Function2-org.apache.spark.partial.ApproximateEvaluator-long-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>runApproximateJob</h4>
<pre>public&nbsp;&lt;T,U,R&gt;&nbsp;<a href="../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a>&lt;R&gt;&nbsp;runApproximateJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function2&lt;<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;func,
<a href="../../../org/apache/spark/partial/ApproximateEvaluator.html" title="interface in org.apache.spark.partial">ApproximateEvaluator</a>&lt;U,R&gt;&nbsp;evaluator,
long&nbsp;timeout)</pre>
<div class="block">:: DeveloperApi ::
Run a job that can return approximate results.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>func</code> - a function to run on each partition of the RDD</dd>
<dd><code>evaluator</code> - <code>ApproximateEvaluator</code> to receive the partial results</dd>
<dd><code>timeout</code> - maximum time to wait for the job, in milliseconds</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>partial result (how partial depends on whether the job was finished before or
after timeout)</dd>
</dl>
</li>
</ul>
<a name="submitJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.Function2-scala.Function0-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>submitJob</h4>
<pre>public&nbsp;&lt;T,U,R&gt;&nbsp;<a href="../../../org/apache/spark/SimpleFutureAction.html" title="class in org.apache.spark">SimpleFutureAction</a>&lt;R&gt;&nbsp;submitJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a>&lt;T&gt;&nbsp;rdd,
scala.Function1&lt;scala.collection.Iterator&lt;T&gt;,U&gt;&nbsp;processPartition,
scala.collection.Seq&lt;Object&gt;&nbsp;partitions,
scala.Function2&lt;Object,U,scala.runtime.BoxedUnit&gt;&nbsp;resultHandler,
scala.Function0&lt;R&gt;&nbsp;resultFunc)</pre>
<div class="block">Submit a job for execution and return a FutureJob holding the result.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>rdd</code> - target RDD to run tasks on</dd>
<dd><code>processPartition</code> - a function to run on each partition of the RDD</dd>
<dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all
partitions of the target RDD, e.g. for operations like <code>first()</code></dd>
<dd><code>resultHandler</code> - callback to pass each result to</dd>
<dd><code>resultFunc</code> - function to be executed when the result is ready</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
<a name="cancelJobGroup-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cancelJobGroup</h4>
<pre>public&nbsp;void&nbsp;cancelJobGroup(String&nbsp;groupId)</pre>
<div class="block">Cancel active jobs for the specified group. See <code>org.apache.spark.SparkContext.setJobGroup</code>
for more information.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>groupId</code> - (undocumented)</dd>
</dl>
</li>
</ul>
<a name="cancelAllJobs--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cancelAllJobs</h4>
<pre>public&nbsp;void&nbsp;cancelAllJobs()</pre>
<div class="block">Cancel all jobs that have been scheduled or are running.</div>
</li>
</ul>
<a name="cancelJob-int-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cancelJob</h4>
<pre>public&nbsp;void&nbsp;cancelJob(int&nbsp;jobId,
String&nbsp;reason)</pre>
<div class="block">Cancel a given job if it's scheduled or running.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>jobId</code> - the job ID to cancel</dd>
<dd><code>reason</code> - optional reason for cancellation</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd>
</dl>
</li>
</ul>
<a name="cancelJob-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cancelJob</h4>
<pre>public&nbsp;void&nbsp;cancelJob(int&nbsp;jobId)</pre>
<div class="block">Cancel a given job if it's scheduled or running.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>jobId</code> - the job ID to cancel</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd>
</dl>
</li>
</ul>
<a name="cancelStage-int-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cancelStage</h4>
<pre>public&nbsp;void&nbsp;cancelStage(int&nbsp;stageId,
String&nbsp;reason)</pre>
<div class="block">Cancel a given stage and all jobs associated with it.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>stageId</code> - the stage ID to cancel</dd>
<dd><code>reason</code> - reason for cancellation</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd>
</dl>
</li>
</ul>
<a name="cancelStage-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cancelStage</h4>
<pre>public&nbsp;void&nbsp;cancelStage(int&nbsp;stageId)</pre>
<div class="block">Cancel a given stage and all jobs associated with it.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>stageId</code> - the stage ID to cancel</dd>
<dt><span class="simpleTagLabel">Note:</span></dt>
<dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd>
</dl>
</li>
</ul>
<a name="killTaskAttempt-long-boolean-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>killTaskAttempt</h4>
<pre>public&nbsp;boolean&nbsp;killTaskAttempt(long&nbsp;taskId,
boolean&nbsp;interruptThread,
String&nbsp;reason)</pre>
<div class="block">Kill and reschedule the given task attempt. Task ids can be obtained from the Spark UI
or through SparkListener.onTaskStart.
<p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>taskId</code> - the task ID to kill. This id uniquely identifies the task attempt.</dd>
<dd><code>interruptThread</code> - whether to interrupt the thread running the task.</dd>
<dd><code>reason</code> - the reason for killing the task, which should be a short string. If a task
is killed multiple times with different reasons, only one reason will be reported.
<p></dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>Whether the task was successfully killed.</dd>
</dl>
</li>
</ul>
<a name="setCheckpointDir-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setCheckpointDir</h4>
<pre>public&nbsp;void&nbsp;setCheckpointDir(String&nbsp;directory)</pre>
<div class="block">Set the directory under which RDDs are going to be checkpointed.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>directory</code> - path to the directory where checkpoint files will be stored
(must be HDFS path if running in cluster)</dd>
</dl>
</li>
</ul>
<a name="getCheckpointDir--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getCheckpointDir</h4>
<pre>public&nbsp;scala.Option&lt;String&gt;&nbsp;getCheckpointDir()</pre>
</li>
</ul>
<a name="defaultParallelism--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>defaultParallelism</h4>
<pre>public&nbsp;int&nbsp;defaultParallelism()</pre>
<div class="block">Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD).</div>
</li>
</ul>
<a name="defaultMinPartitions--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>defaultMinPartitions</h4>
<pre>public&nbsp;int&nbsp;defaultMinPartitions()</pre>
<div class="block">Default min number of partitions for Hadoop RDDs when not given by user
Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2.
The reasons for this are discussed in https://github.com/mesos/spark/pull/718</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../index-all.html">Index</a></li>
<li><a href="../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../org/apache/spark/SparkEnv.html" title="class in org.apache.spark"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../index.html?org/apache/spark/SparkContext.html" target="_top">Frames</a></li>
<li><a href="SparkContext.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<script defer="defer" type="text/javascript" src="../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../lib/api-javadocs.js"></script></body>
</html>