| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_312) on Mon Jul 11 16:23:57 UTC 2022 --> |
| <title>SparkContext (Spark 3.2.2 JavaDoc)</title> |
| <meta name="date" content="2022-07-11"> |
| <link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="SparkContext (Spark 3.2.2 JavaDoc)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":10,"i24":10,"i25":10,"i26":10,"i27":10,"i28":10,"i29":10,"i30":10,"i31":10,"i32":10,"i33":10,"i34":9,"i35":9,"i36":10,"i37":10,"i38":10,"i39":10,"i40":10,"i41":10,"i42":10,"i43":10,"i44":10,"i45":10,"i46":10,"i47":9,"i48":9,"i49":10,"i50":10,"i51":10,"i52":10,"i53":10,"i54":10,"i55":10,"i56":10,"i57":10,"i58":10,"i59":10,"i60":10,"i61":10,"i62":10,"i63":10,"i64":10,"i65":9,"i66":9,"i67":10,"i68":10,"i69":10,"i70":10,"i71":10,"i72":10,"i73":10,"i74":10,"i75":10,"i76":10,"i77":10,"i78":10,"i79":10,"i80":10,"i81":10,"i82":10,"i83":10,"i84":10,"i85":10,"i86":10,"i87":10,"i88":10,"i89":10,"i90":10,"i91":10,"i92":10,"i93":10,"i94":10,"i95":10,"i96":10,"i97":10,"i98":10,"i99":10,"i100":10,"i101":10,"i102":10}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../index-all.html">Index</a></li> |
| <li><a href="../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../org/apache/spark/SparkEnv.html" title="class in org.apache.spark"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../index.html?org/apache/spark/SparkContext.html" target="_top">Frames</a></li> |
| <li><a href="SparkContext.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.spark</div> |
| <h2 title="Class SparkContext" class="title">Class SparkContext</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.spark.SparkContext</li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Implemented Interfaces:</dt> |
| <dd>org.apache.spark.internal.Logging</dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">SparkContext</span> |
| extends Object |
| implements org.apache.spark.internal.Logging</pre> |
| <div class="block">Main entry point for Spark functionality. A SparkContext represents the connection to a Spark |
| cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster. |
| <p></div> |
| <dl> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Only one <code>SparkContext</code> should be active per JVM. You must <code>stop()</code> the |
| active <code>SparkContext</code> before creating a new one. |
| param: config a Spark Config object describing the application configuration. Any settings in |
| this config overrides the default configs as well as system properties.</dd> |
| </dl> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colOne" scope="col">Constructor and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext--">SparkContext</a></span>()</code> |
| <div class="block">Create a SparkContext that loads settings from system properties (for instance, when |
| launching with ./bin/spark-submit).</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext-org.apache.spark.SparkConf-">SparkContext</a></span>(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</code> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext-java.lang.String-java.lang.String-org.apache.spark.SparkConf-">SparkContext</a></span>(String master, |
| String appName, |
| <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</code> |
| <div class="block">Alternative constructor that allows setting common Spark properties directly</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#SparkContext-java.lang.String-java.lang.String-java.lang.String-scala.collection.Seq-scala.collection.Map-">SparkContext</a></span>(String master, |
| String appName, |
| String sparkHome, |
| scala.collection.Seq<String> jars, |
| scala.collection.Map<String,String> environment)</code> |
| <div class="block">Alternative constructor that allows setting common Spark properties directly</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addArchive-java.lang.String-">addArchive</a></span>(String path)</code> |
| <div class="block">:: Experimental :: |
| Add an archive to be downloaded and unpacked with this Spark job on every node.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addFile-java.lang.String-">addFile</a></span>(String path)</code> |
| <div class="block">Add a file to be downloaded with this Spark job on every node.</div> |
| </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addFile-java.lang.String-boolean-">addFile</a></span>(String path, |
| boolean recursive)</code> |
| <div class="block">Add a file to be downloaded with this Spark job on every node.</div> |
| </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addJar-java.lang.String-">addJar</a></span>(String path)</code> |
| <div class="block">Adds a JAR dependency for all tasks to be executed on this <code>SparkContext</code> in the future.</div> |
| </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#addSparkListener-org.apache.spark.scheduler.SparkListenerInterface-">addSparkListener</a></span>(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a> listener)</code> |
| <div class="block">:: DeveloperApi :: |
| Register a listener to receive up-calls from events that happen during execution.</div> |
| </td> |
| </tr> |
| <tr id="i5" class="rowColor"> |
| <td class="colFirst"><code>scala.Option<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#applicationAttemptId--">applicationAttemptId</a></span>()</code> </td> |
| </tr> |
| <tr id="i6" class="altColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#applicationId--">applicationId</a></span>()</code> |
| <div class="block">A unique identifier for the Spark application.</div> |
| </td> |
| </tr> |
| <tr id="i7" class="rowColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#appName--">appName</a></span>()</code> </td> |
| </tr> |
| <tr id="i8" class="altColor"> |
| <td class="colFirst"><code>scala.collection.Seq<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#archives--">archives</a></span>()</code> </td> |
| </tr> |
| <tr id="i9" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,<a href="../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#binaryFiles-java.lang.String-int-">binaryFiles</a></span>(String path, |
| int minPartitions)</code> |
| <div class="block">Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file |
| (useful for binary data)</div> |
| </td> |
| </tr> |
| <tr id="i10" class="altColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><byte[]></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#binaryRecords-java.lang.String-int-org.apache.hadoop.conf.Configuration-">binaryRecords</a></span>(String path, |
| int recordLength, |
| org.apache.hadoop.conf.Configuration conf)</code> |
| <div class="block">Load data from a flat binary file, assuming the length of each record is constant.</div> |
| </td> |
| </tr> |
| <tr id="i11" class="rowColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast">Broadcast</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#broadcast-T-scala.reflect.ClassTag-">broadcast</a></span>(T value, |
| scala.reflect.ClassTag<T> evidence$9)</code> |
| <div class="block">Broadcast a read-only variable to the cluster, returning a |
| <a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast"><code>Broadcast</code></a> object for reading it in distributed functions.</div> |
| </td> |
| </tr> |
| <tr id="i12" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelAllJobs--">cancelAllJobs</a></span>()</code> |
| <div class="block">Cancel all jobs that have been scheduled or are running.</div> |
| </td> |
| </tr> |
| <tr id="i13" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelJob-int-">cancelJob</a></span>(int jobId)</code> |
| <div class="block">Cancel a given job if it's scheduled or running.</div> |
| </td> |
| </tr> |
| <tr id="i14" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelJob-int-java.lang.String-">cancelJob</a></span>(int jobId, |
| String reason)</code> |
| <div class="block">Cancel a given job if it's scheduled or running.</div> |
| </td> |
| </tr> |
| <tr id="i15" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelJobGroup-java.lang.String-">cancelJobGroup</a></span>(String groupId)</code> |
| <div class="block">Cancel active jobs for the specified group.</div> |
| </td> |
| </tr> |
| <tr id="i16" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelStage-int-">cancelStage</a></span>(int stageId)</code> |
| <div class="block">Cancel a given stage and all jobs associated with it.</div> |
| </td> |
| </tr> |
| <tr id="i17" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#cancelStage-int-java.lang.String-">cancelStage</a></span>(int stageId, |
| String reason)</code> |
| <div class="block">Cancel a given stage and all jobs associated with it.</div> |
| </td> |
| </tr> |
| <tr id="i18" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#clearCallSite--">clearCallSite</a></span>()</code> |
| <div class="block">Clear the thread-local property for overriding the call sites |
| of actions and RDDs.</div> |
| </td> |
| </tr> |
| <tr id="i19" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#clearJobGroup--">clearJobGroup</a></span>()</code> |
| <div class="block">Clear the current thread's job group ID and its description.</div> |
| </td> |
| </tr> |
| <tr id="i20" class="altColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#collectionAccumulator--">collectionAccumulator</a></span>()</code> |
| <div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates |
| inputs by adding them into the list.</div> |
| </td> |
| </tr> |
| <tr id="i21" class="rowColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#collectionAccumulator-java.lang.String-">collectionAccumulator</a></span>(String name)</code> |
| <div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates |
| inputs by adding them into the list.</div> |
| </td> |
| </tr> |
| <tr id="i22" class="altColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#defaultMinPartitions--">defaultMinPartitions</a></span>()</code> |
| <div class="block">Default min number of partitions for Hadoop RDDs when not given by user |
| Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2.</div> |
| </td> |
| </tr> |
| <tr id="i23" class="rowColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#defaultParallelism--">defaultParallelism</a></span>()</code> |
| <div class="block">Default level of parallelism to use when not given by user (e.g.</div> |
| </td> |
| </tr> |
| <tr id="i24" class="altColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#deployMode--">deployMode</a></span>()</code> </td> |
| </tr> |
| <tr id="i25" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#doubleAccumulator--">doubleAccumulator</a></span>()</code> |
| <div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| </td> |
| </tr> |
| <tr id="i26" class="altColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#doubleAccumulator-java.lang.String-">doubleAccumulator</a></span>(String name)</code> |
| <div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| </td> |
| </tr> |
| <tr id="i27" class="rowColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#emptyRDD-scala.reflect.ClassTag-">emptyRDD</a></span>(scala.reflect.ClassTag<T> evidence$8)</code> |
| <div class="block">Get an RDD that has no partitions or elements.</div> |
| </td> |
| </tr> |
| <tr id="i28" class="altColor"> |
| <td class="colFirst"><code>scala.collection.Seq<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#files--">files</a></span>()</code> </td> |
| </tr> |
| <tr id="i29" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Seq<<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getAllPools--">getAllPools</a></span>()</code> |
| <div class="block">:: DeveloperApi :: |
| Return pools for fair scheduler</div> |
| </td> |
| </tr> |
| <tr id="i30" class="altColor"> |
| <td class="colFirst"><code>scala.Option<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getCheckpointDir--">getCheckpointDir</a></span>()</code> </td> |
| </tr> |
| <tr id="i31" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getConf--">getConf</a></span>()</code> |
| <div class="block">Return a copy of this SparkContext's configuration.</div> |
| </td> |
| </tr> |
| <tr id="i32" class="altColor"> |
| <td class="colFirst"><code>scala.collection.Map<String,scala.Tuple2<Object,Object>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getExecutorMemoryStatus--">getExecutorMemoryStatus</a></span>()</code> |
| <div class="block">Return a map from the block manager to the max memory available for caching and the remaining |
| memory available for caching.</div> |
| </td> |
| </tr> |
| <tr id="i33" class="rowColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getLocalProperty-java.lang.String-">getLocalProperty</a></span>(String key)</code> |
| <div class="block">Get a local property set in this thread, or null if it is missing.</div> |
| </td> |
| </tr> |
| <tr id="i34" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getOrCreate--">getOrCreate</a></span>()</code> |
| <div class="block">This function may be used to get or instantiate a SparkContext and register it as a |
| singleton object.</div> |
| </td> |
| </tr> |
| <tr id="i35" class="rowColor"> |
| <td class="colFirst"><code>static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getOrCreate-org.apache.spark.SparkConf-">getOrCreate</a></span>(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</code> |
| <div class="block">This function may be used to get or instantiate a SparkContext and register it as a |
| singleton object.</div> |
| </td> |
| </tr> |
| <tr id="i36" class="altColor"> |
| <td class="colFirst"><code>scala.collection.Map<Object,<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><?>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getPersistentRDDs--">getPersistentRDDs</a></span>()</code> |
| <div class="block">Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.</div> |
| </td> |
| </tr> |
| <tr id="i37" class="rowColor"> |
| <td class="colFirst"><code>scala.Option<<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getPoolForName-java.lang.String-">getPoolForName</a></span>(String pool)</code> |
| <div class="block">:: DeveloperApi :: |
| Return the pool associated with the given name, if one exists</div> |
| </td> |
| </tr> |
| <tr id="i38" class="altColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/storage/RDDInfo.html" title="class in org.apache.spark.storage">RDDInfo</a>[]</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getRDDStorageInfo--">getRDDStorageInfo</a></span>()</code> |
| <div class="block">:: DeveloperApi :: |
| Return information about what RDDs are cached, if they are in mem or on disk, how much space |
| they take, etc.</div> |
| </td> |
| </tr> |
| <tr id="i39" class="rowColor"> |
| <td class="colFirst"><code>scala.Enumeration.Value</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#getSchedulingMode--">getSchedulingMode</a></span>()</code> |
| <div class="block">Return current scheduling mode</div> |
| </td> |
| </tr> |
| <tr id="i40" class="altColor"> |
| <td class="colFirst"><code>org.apache.hadoop.conf.Configuration</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopConfiguration--">hadoopConfiguration</a></span>()</code> |
| <div class="block">A default Hadoop Configuration for the Hadoop code (e.g.</div> |
| </td> |
| </tr> |
| <tr id="i41" class="rowColor"> |
| <td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-int-">hadoopFile</a></span>(String path, |
| Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, |
| Class<K> keyClass, |
| Class<V> valueClass, |
| int minPartitions)</code> |
| <div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat</div> |
| </td> |
| </tr> |
| <tr id="i42" class="altColor"> |
| <td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>><br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">hadoopFile</a></span>(String path, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.reflect.ClassTag<F> fm)</code> |
| <div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys, |
| values and the InputFormat so that users don't need to pass them directly.</div> |
| </td> |
| </tr> |
| <tr id="i43" class="rowColor"> |
| <td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>><br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">hadoopFile</a></span>(String path, |
| int minPartitions, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.reflect.ClassTag<F> fm)</code> |
| <div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys, |
| values and the InputFormat so that users don't need to pass them directly.</div> |
| </td> |
| </tr> |
| <tr id="i44" class="altColor"> |
| <td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-int-">hadoopRDD</a></span>(org.apache.hadoop.mapred.JobConf conf, |
| Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, |
| Class<K> keyClass, |
| Class<V> valueClass, |
| int minPartitions)</code> |
| <div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other |
| necessary info (e.g.</div> |
| </td> |
| </tr> |
| <tr id="i45" class="rowColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#isLocal--">isLocal</a></span>()</code> </td> |
| </tr> |
| <tr id="i46" class="altColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#isStopped--">isStopped</a></span>()</code> </td> |
| </tr> |
| <tr id="i47" class="rowColor"> |
| <td class="colFirst"><code>static scala.Option<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#jarOfClass-java.lang.Class-">jarOfClass</a></span>(Class<?> cls)</code> |
| <div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass |
| their JARs to SparkContext.</div> |
| </td> |
| </tr> |
| <tr id="i48" class="altColor"> |
| <td class="colFirst"><code>static scala.Option<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#jarOfObject-java.lang.Object-">jarOfObject</a></span>(Object obj)</code> |
| <div class="block">Find the JAR that contains the class of a particular object, to make it easy for users |
| to pass their JARs to SparkContext.</div> |
| </td> |
| </tr> |
| <tr id="i49" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Seq<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#jars--">jars</a></span>()</code> </td> |
| </tr> |
| <tr id="i50" class="altColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#killExecutor-java.lang.String-">killExecutor</a></span>(String executorId)</code> |
| <div class="block">:: DeveloperApi :: |
| Request that the cluster manager kill the specified executor.</div> |
| </td> |
| </tr> |
| <tr id="i51" class="rowColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#killExecutors-scala.collection.Seq-">killExecutors</a></span>(scala.collection.Seq<String> executorIds)</code> |
| <div class="block">:: DeveloperApi :: |
| Request that the cluster manager kill the specified executors.</div> |
| </td> |
| </tr> |
| <tr id="i52" class="altColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#killTaskAttempt-long-boolean-java.lang.String-">killTaskAttempt</a></span>(long taskId, |
| boolean interruptThread, |
| String reason)</code> |
| <div class="block">Kill and reschedule the given task attempt.</div> |
| </td> |
| </tr> |
| <tr id="i53" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Seq<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#listArchives--">listArchives</a></span>()</code> |
| <div class="block">:: Experimental :: |
| Returns a list of archive paths that are added to resources.</div> |
| </td> |
| </tr> |
| <tr id="i54" class="altColor"> |
| <td class="colFirst"><code>scala.collection.Seq<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#listFiles--">listFiles</a></span>()</code> |
| <div class="block">Returns a list of file paths that are added to resources.</div> |
| </td> |
| </tr> |
| <tr id="i55" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Seq<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#listJars--">listJars</a></span>()</code> |
| <div class="block">Returns a list of jar files that are added to resources.</div> |
| </td> |
| </tr> |
| <tr id="i56" class="altColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#longAccumulator--">longAccumulator</a></span>()</code> |
| <div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| </td> |
| </tr> |
| <tr id="i57" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#longAccumulator-java.lang.String-">longAccumulator</a></span>(String name)</code> |
| <div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| </td> |
| </tr> |
| <tr id="i58" class="altColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#makeRDD-scala.collection.Seq-int-scala.reflect.ClassTag-">makeRDD</a></span>(scala.collection.Seq<T> seq, |
| int numSlices, |
| scala.reflect.ClassTag<T> evidence$2)</code> |
| <div class="block">Distribute a local Scala collection to form an RDD.</div> |
| </td> |
| </tr> |
| <tr id="i59" class="rowColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#makeRDD-scala.collection.Seq-scala.reflect.ClassTag-">makeRDD</a></span>(scala.collection.Seq<scala.Tuple2<T,scala.collection.Seq<String>>> seq, |
| scala.reflect.ClassTag<T> evidence$3)</code> |
| <div class="block">Distribute a local Scala collection to form an RDD, with one or more |
| location preferences (hostnames of Spark nodes) for each object.</div> |
| </td> |
| </tr> |
| <tr id="i60" class="altColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#master--">master</a></span>()</code> </td> |
| </tr> |
| <tr id="i61" class="rowColor"> |
| <td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>><br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-org.apache.hadoop.conf.Configuration-">newAPIHadoopFile</a></span>(String path, |
| Class<F> fClass, |
| Class<K> kClass, |
| Class<V> vClass, |
| org.apache.hadoop.conf.Configuration conf)</code> |
| <div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat |
| and extra configuration options to pass to the input format.</div> |
| </td> |
| </tr> |
| <tr id="i62" class="altColor"> |
| <td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>><br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">newAPIHadoopFile</a></span>(String path, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.reflect.ClassTag<F> fm)</code> |
| <div class="block">Smarter version of <code>newApiHadoopFile</code> that uses class tags to figure out the classes of keys, |
| values and the <code>org.apache.hadoop.mapreduce.InputFormat</code> (new MapReduce API) so that user |
| don't need to pass them directly.</div> |
| </td> |
| </tr> |
| <tr id="i63" class="rowColor"> |
| <td class="colFirst"><code><K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>><br><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#newAPIHadoopRDD-org.apache.hadoop.conf.Configuration-java.lang.Class-java.lang.Class-java.lang.Class-">newAPIHadoopRDD</a></span>(org.apache.hadoop.conf.Configuration conf, |
| Class<F> fClass, |
| Class<K> kClass, |
| Class<V> vClass)</code> |
| <div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat |
| and extra configuration options to pass to the input format.</div> |
| </td> |
| </tr> |
| <tr id="i64" class="altColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#objectFile-java.lang.String-int-scala.reflect.ClassTag-">objectFile</a></span>(String path, |
| int minPartitions, |
| scala.reflect.ClassTag<T> evidence$4)</code> |
| <div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and |
| BytesWritable values that contain a serialized partition.</div> |
| </td> |
| </tr> |
| <tr id="i65" class="rowColor"> |
| <td class="colFirst"><code>static void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#org:Dapache:Dspark:Dinternal:DLogging:D:Dlog__:Deq-org.slf4j.Logger-">org$apache$spark$internal$Logging$$log__$eq</a></span>(org.slf4j.Logger x$1)</code> </td> |
| </tr> |
| <tr id="i66" class="altColor"> |
| <td class="colFirst"><code>static org.slf4j.Logger</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#org:Dapache:Dspark:Dinternal:DLogging:D:Dlog_--">org$apache$spark$internal$Logging$$log_</a></span>()</code> </td> |
| </tr> |
| <tr id="i67" class="rowColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#parallelize-scala.collection.Seq-int-scala.reflect.ClassTag-">parallelize</a></span>(scala.collection.Seq<T> seq, |
| int numSlices, |
| scala.reflect.ClassTag<T> evidence$1)</code> |
| <div class="block">Distribute a local Scala collection to form an RDD.</div> |
| </td> |
| </tr> |
| <tr id="i68" class="altColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#range-long-long-long-int-">range</a></span>(long start, |
| long end, |
| long step, |
| int numSlices)</code> |
| <div class="block">Creates a new RDD[Long] containing elements from <code>start</code> to <code>end</code>(exclusive), increased by |
| <code>step</code> every element.</div> |
| </td> |
| </tr> |
| <tr id="i69" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#register-org.apache.spark.util.AccumulatorV2-">register</a></span>(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a><?,?> acc)</code> |
| <div class="block">Register the given accumulator.</div> |
| </td> |
| </tr> |
| <tr id="i70" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#register-org.apache.spark.util.AccumulatorV2-java.lang.String-">register</a></span>(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a><?,?> acc, |
| String name)</code> |
| <div class="block">Register the given accumulator with given name.</div> |
| </td> |
| </tr> |
| <tr id="i71" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#removeSparkListener-org.apache.spark.scheduler.SparkListenerInterface-">removeSparkListener</a></span>(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a> listener)</code> |
| <div class="block">:: DeveloperApi :: |
| Deregister the listener from Spark's listener bus.</div> |
| </td> |
| </tr> |
| <tr id="i72" class="altColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#requestExecutors-int-">requestExecutors</a></span>(int numAdditionalExecutors)</code> |
| <div class="block">:: DeveloperApi :: |
| Request an additional number of executors from the cluster manager.</div> |
| </td> |
| </tr> |
| <tr id="i73" class="rowColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#requestTotalExecutors-int-int-scala.collection.immutable.Map-">requestTotalExecutors</a></span>(int numExecutors, |
| int localityAwareTasks, |
| scala.collection.immutable.Map<String,Object> hostToLocalTaskCount)</code> |
| <div class="block">Update the cluster manager on our scheduling needs.</div> |
| </td> |
| </tr> |
| <tr id="i74" class="altColor"> |
| <td class="colFirst"><code>scala.collection.Map<String,<a href="../../../org/apache/spark/resource/ResourceInformation.html" title="class in org.apache.spark.resource">ResourceInformation</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#resources--">resources</a></span>()</code> </td> |
| </tr> |
| <tr id="i75" class="rowColor"> |
| <td class="colFirst"><code><T,U,R> <a href="../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><R></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runApproximateJob-org.apache.spark.rdd.RDD-scala.Function2-org.apache.spark.partial.ApproximateEvaluator-long-">runApproximateJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| <a href="../../../org/apache/spark/partial/ApproximateEvaluator.html" title="interface in org.apache.spark.partial">ApproximateEvaluator</a><U,R> evaluator, |
| long timeout)</code> |
| <div class="block">:: DeveloperApi :: |
| Run a job that can return approximate results.</div> |
| </td> |
| </tr> |
| <tr id="i76" class="altColor"> |
| <td class="colFirst"><code><T,U> Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> func, |
| scala.reflect.ClassTag<U> evidence$14)</code> |
| <div class="block">Run a job on all partitions in an RDD and return the results in an array.</div> |
| </td> |
| </tr> |
| <tr id="i77" class="rowColor"> |
| <td class="colFirst"><code><T,U> void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> processPartition, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.reflect.ClassTag<U> evidence$16)</code> |
| <div class="block">Run a job on all partitions in an RDD and pass the results to a handler function.</div> |
| </td> |
| </tr> |
| <tr id="i78" class="altColor"> |
| <td class="colFirst"><code><T,U> Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> func, |
| scala.collection.Seq<Object> partitions, |
| scala.reflect.ClassTag<U> evidence$12)</code> |
| <div class="block">Run a function on a given set of partitions in an RDD and return the results as an array.</div> |
| </td> |
| </tr> |
| <tr id="i79" class="rowColor"> |
| <td class="colFirst"><code><T,U> Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| scala.reflect.ClassTag<U> evidence$13)</code> |
| <div class="block">Run a job on all partitions in an RDD and return the results in an array.</div> |
| </td> |
| </tr> |
| <tr id="i80" class="altColor"> |
| <td class="colFirst"><code><T,U> void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> processPartition, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.reflect.ClassTag<U> evidence$15)</code> |
| <div class="block">Run a job on all partitions in an RDD and pass the results to a handler function.</div> |
| </td> |
| </tr> |
| <tr id="i81" class="rowColor"> |
| <td class="colFirst"><code><T,U> Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| scala.collection.Seq<Object> partitions, |
| scala.reflect.ClassTag<U> evidence$11)</code> |
| <div class="block">Run a function on a given set of partitions in an RDD and return the results as an array.</div> |
| </td> |
| </tr> |
| <tr id="i82" class="altColor"> |
| <td class="colFirst"><code><T,U> void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.Function2-scala.reflect.ClassTag-">runJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| scala.collection.Seq<Object> partitions, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.reflect.ClassTag<U> evidence$10)</code> |
| <div class="block">Run a function on a given set of partitions in an RDD and pass the results to the given |
| handler function.</div> |
| </td> |
| </tr> |
| <tr id="i83" class="rowColor"> |
| <td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-">sequenceFile</a></span>(String path, |
| Class<K> keyClass, |
| Class<V> valueClass)</code> |
| <div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div> |
| </td> |
| </tr> |
| <tr id="i84" class="altColor"> |
| <td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-int-">sequenceFile</a></span>(String path, |
| Class<K> keyClass, |
| Class<V> valueClass, |
| int minPartitions)</code> |
| <div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types.</div> |
| </td> |
| </tr> |
| <tr id="i85" class="rowColor"> |
| <td class="colFirst"><code><K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sequenceFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.Function0-scala.Function0-">sequenceFile</a></span>(String path, |
| int minPartitions, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.Function0<org.apache.spark.WritableConverter<K>> kcf, |
| scala.Function0<org.apache.spark.WritableConverter<V>> vcf)</code> |
| <div class="block">Version of sequenceFile() for types implicitly convertible to Writables through a |
| WritableConverter.</div> |
| </td> |
| </tr> |
| <tr id="i86" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setCallSite-java.lang.String-">setCallSite</a></span>(String shortCallSite)</code> |
| <div class="block">Set the thread-local property for overriding the call sites |
| of actions and RDDs.</div> |
| </td> |
| </tr> |
| <tr id="i87" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setCheckpointDir-java.lang.String-">setCheckpointDir</a></span>(String directory)</code> |
| <div class="block">Set the directory under which RDDs are going to be checkpointed.</div> |
| </td> |
| </tr> |
| <tr id="i88" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setJobDescription-java.lang.String-">setJobDescription</a></span>(String value)</code> |
| <div class="block">Set a human readable description of the current job.</div> |
| </td> |
| </tr> |
| <tr id="i89" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setJobGroup-java.lang.String-java.lang.String-boolean-">setJobGroup</a></span>(String groupId, |
| String description, |
| boolean interruptOnCancel)</code> |
| <div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a |
| different value or cleared.</div> |
| </td> |
| </tr> |
| <tr id="i90" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setLocalProperty-java.lang.String-java.lang.String-">setLocalProperty</a></span>(String key, |
| String value)</code> |
| <div class="block">Set a local property that affects jobs submitted from this thread, such as the Spark fair |
| scheduler pool.</div> |
| </td> |
| </tr> |
| <tr id="i91" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#setLogLevel-java.lang.String-">setLogLevel</a></span>(String logLevel)</code> |
| <div class="block">Control our logLevel.</div> |
| </td> |
| </tr> |
| <tr id="i92" class="altColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#sparkUser--">sparkUser</a></span>()</code> </td> |
| </tr> |
| <tr id="i93" class="rowColor"> |
| <td class="colFirst"><code>long</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#startTime--">startTime</a></span>()</code> </td> |
| </tr> |
| <tr id="i94" class="altColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/SparkStatusTracker.html" title="class in org.apache.spark">SparkStatusTracker</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#statusTracker--">statusTracker</a></span>()</code> </td> |
| </tr> |
| <tr id="i95" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#stop--">stop</a></span>()</code> |
| <div class="block">Shut down the SparkContext.</div> |
| </td> |
| </tr> |
| <tr id="i96" class="altColor"> |
| <td class="colFirst"><code><T,U,R> <a href="../../../org/apache/spark/SimpleFutureAction.html" title="class in org.apache.spark">SimpleFutureAction</a><R></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#submitJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.Function2-scala.Function0-">submitJob</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> processPartition, |
| scala.collection.Seq<Object> partitions, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.Function0<R> resultFunc)</code> |
| <div class="block">Submit a job for execution and return a FutureJob holding the result.</div> |
| </td> |
| </tr> |
| <tr id="i97" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#textFile-java.lang.String-int-">textFile</a></span>(String path, |
| int minPartitions)</code> |
| <div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any |
| Hadoop-supported file system URI, and return it as an RDD of Strings.</div> |
| </td> |
| </tr> |
| <tr id="i98" class="altColor"> |
| <td class="colFirst"><code>scala.Option<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#uiWebUrl--">uiWebUrl</a></span>()</code> </td> |
| </tr> |
| <tr id="i99" class="rowColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#union-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.reflect.ClassTag-">union</a></span>(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> first, |
| scala.collection.Seq<<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T>> rest, |
| scala.reflect.ClassTag<T> evidence$7)</code> |
| <div class="block">Build the union of a list of RDDs passed as variable-length arguments.</div> |
| </td> |
| </tr> |
| <tr id="i100" class="altColor"> |
| <td class="colFirst"><code><T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#union-scala.collection.Seq-scala.reflect.ClassTag-">union</a></span>(scala.collection.Seq<<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T>> rdds, |
| scala.reflect.ClassTag<T> evidence$6)</code> |
| <div class="block">Build the union of a list of RDDs.</div> |
| </td> |
| </tr> |
| <tr id="i101" class="rowColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#version--">version</a></span>()</code> |
| <div class="block">The version of Spark on which this application is running.</div> |
| </td> |
| </tr> |
| <tr id="i102" class="altColor"> |
| <td class="colFirst"><code><a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,String>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../org/apache/spark/SparkContext.html#wholeTextFiles-java.lang.String-int-">wholeTextFiles</a></span>(String path, |
| int minPartitions)</code> |
| <div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any |
| Hadoop-supported file system URI.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class Object</h3> |
| <code>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.spark.internal.Logging"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from interface org.apache.spark.internal.Logging</h3> |
| <code>$init$, initializeForcefully, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, initLock, isTraceEnabled, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning, org$apache$spark$internal$Logging$$log__$eq, org$apache$spark$internal$Logging$$log_, uninitialize</code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a name="SparkContext-org.apache.spark.SparkConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>SparkContext</h4> |
| <pre>public SparkContext(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</pre> |
| </li> |
| </ul> |
| <a name="SparkContext--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>SparkContext</h4> |
| <pre>public SparkContext()</pre> |
| <div class="block">Create a SparkContext that loads settings from system properties (for instance, when |
| launching with ./bin/spark-submit).</div> |
| </li> |
| </ul> |
| <a name="SparkContext-java.lang.String-java.lang.String-org.apache.spark.SparkConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>SparkContext</h4> |
| <pre>public SparkContext(String master, |
| String appName, |
| <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> conf)</pre> |
| <div class="block">Alternative constructor that allows setting common Spark properties directly |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd> |
| <dd><code>appName</code> - A name for your application, to display on the cluster web UI</dd> |
| <dd><code>conf</code> - a <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><code>SparkConf</code></a> object specifying other Spark parameters</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="SparkContext-java.lang.String-java.lang.String-java.lang.String-scala.collection.Seq-scala.collection.Map-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>SparkContext</h4> |
| <pre>public SparkContext(String master, |
| String appName, |
| String sparkHome, |
| scala.collection.Seq<String> jars, |
| scala.collection.Map<String,String> environment)</pre> |
| <div class="block">Alternative constructor that allows setting common Spark properties directly |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>master</code> - Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).</dd> |
| <dd><code>appName</code> - A name for your application, to display on the cluster web UI.</dd> |
| <dd><code>sparkHome</code> - Location where Spark is installed on cluster nodes.</dd> |
| <dd><code>jars</code> - Collection of JARs to send to the cluster. These can be paths on the local file |
| system or HDFS, HTTP, HTTPS, or FTP URLs.</dd> |
| <dd><code>environment</code> - Environment variables to set on worker nodes.</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="getOrCreate-org.apache.spark.SparkConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getOrCreate</h4> |
| <pre>public static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> getOrCreate(<a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> config)</pre> |
| <div class="block">This function may be used to get or instantiate a SparkContext and register it as a |
| singleton object. Because we can only have one active SparkContext per JVM, |
| this is useful when applications may wish to share a SparkContext. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>config</code> - <code>SparkConfig</code> that will be used for initialisation of the <code>SparkContext</code></dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>current <code>SparkContext</code> (or a new one if it wasn't created before the function call)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getOrCreate--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getOrCreate</h4> |
| <pre>public static <a href="../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> getOrCreate()</pre> |
| <div class="block">This function may be used to get or instantiate a SparkContext and register it as a |
| singleton object. Because we can only have one active SparkContext per JVM, |
| this is useful when applications may wish to share a SparkContext. |
| <p> |
| This method allows not passing a SparkConf (useful if just retrieving). |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>current <code>SparkContext</code> (or a new one if wasn't created before the function call)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="jarOfClass-java.lang.Class-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>jarOfClass</h4> |
| <pre>public static scala.Option<String> jarOfClass(Class<?> cls)</pre> |
| <div class="block">Find the JAR from which a given class was loaded, to make it easy for users to pass |
| their JARs to SparkContext. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>cls</code> - class that should be inside of the jar</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>jar that contains the Class, <code>None</code> if not found</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="jarOfObject-java.lang.Object-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>jarOfObject</h4> |
| <pre>public static scala.Option<String> jarOfObject(Object obj)</pre> |
| <div class="block">Find the JAR that contains the class of a particular object, to make it easy for users |
| to pass their JARs to SparkContext. In most cases you can call jarOfObject(this) in |
| your driver program. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>obj</code> - reference to an instance which class should be inside of the jar</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>jar that contains the class of the instance, <code>None</code> if not found</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="org:Dapache:Dspark:Dinternal:DLogging:D:Dlog_--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>org$apache$spark$internal$Logging$$log_</h4> |
| <pre>public static org.slf4j.Logger org$apache$spark$internal$Logging$$log_()</pre> |
| </li> |
| </ul> |
| <a name="org:Dapache:Dspark:Dinternal:DLogging:D:Dlog__:Deq-org.slf4j.Logger-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>org$apache$spark$internal$Logging$$log__$eq</h4> |
| <pre>public static void org$apache$spark$internal$Logging$$log__$eq(org.slf4j.Logger x$1)</pre> |
| </li> |
| </ul> |
| <a name="startTime--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>startTime</h4> |
| <pre>public long startTime()</pre> |
| </li> |
| </ul> |
| <a name="getConf--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getConf</h4> |
| <pre>public <a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark">SparkConf</a> getConf()</pre> |
| <div class="block">Return a copy of this SparkContext's configuration. The configuration ''cannot'' be |
| changed at runtime.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="resources--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>resources</h4> |
| <pre>public scala.collection.Map<String,<a href="../../../org/apache/spark/resource/ResourceInformation.html" title="class in org.apache.spark.resource">ResourceInformation</a>> resources()</pre> |
| </li> |
| </ul> |
| <a name="jars--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>jars</h4> |
| <pre>public scala.collection.Seq<String> jars()</pre> |
| </li> |
| </ul> |
| <a name="files--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>files</h4> |
| <pre>public scala.collection.Seq<String> files()</pre> |
| </li> |
| </ul> |
| <a name="archives--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>archives</h4> |
| <pre>public scala.collection.Seq<String> archives()</pre> |
| </li> |
| </ul> |
| <a name="master--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>master</h4> |
| <pre>public String master()</pre> |
| </li> |
| </ul> |
| <a name="deployMode--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>deployMode</h4> |
| <pre>public String deployMode()</pre> |
| </li> |
| </ul> |
| <a name="appName--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>appName</h4> |
| <pre>public String appName()</pre> |
| </li> |
| </ul> |
| <a name="isLocal--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>isLocal</h4> |
| <pre>public boolean isLocal()</pre> |
| </li> |
| </ul> |
| <a name="isStopped--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>isStopped</h4> |
| <pre>public boolean isStopped()</pre> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>true if context is stopped or in the midst of stopping.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="statusTracker--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>statusTracker</h4> |
| <pre>public <a href="../../../org/apache/spark/SparkStatusTracker.html" title="class in org.apache.spark">SparkStatusTracker</a> statusTracker()</pre> |
| </li> |
| </ul> |
| <a name="uiWebUrl--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>uiWebUrl</h4> |
| <pre>public scala.Option<String> uiWebUrl()</pre> |
| </li> |
| </ul> |
| <a name="hadoopConfiguration--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>hadoopConfiguration</h4> |
| <pre>public org.apache.hadoop.conf.Configuration hadoopConfiguration()</pre> |
| <div class="block">A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>As it will be reused in all Hadoop RDDs, it's better not to modify it unless you |
| plan to set some global configurations for all Hadoop RDDs.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="sparkUser--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>sparkUser</h4> |
| <pre>public String sparkUser()</pre> |
| </li> |
| </ul> |
| <a name="applicationId--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>applicationId</h4> |
| <pre>public String applicationId()</pre> |
| <div class="block">A unique identifier for the Spark application. |
| Its format depends on the scheduler implementation. |
| (i.e. |
| in case of local spark app something like 'local-1433865536131' |
| in case of YARN something like 'application_1433865536131_34483' |
| in case of MESOS something like 'driver-20170926223339-0001' |
| )</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="applicationAttemptId--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>applicationAttemptId</h4> |
| <pre>public scala.Option<String> applicationAttemptId()</pre> |
| </li> |
| </ul> |
| <a name="setLogLevel-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setLogLevel</h4> |
| <pre>public void setLogLevel(String logLevel)</pre> |
| <div class="block">Control our logLevel. This overrides any user-defined log settings.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>logLevel</code> - The desired log level as a string. |
| Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setLocalProperty-java.lang.String-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setLocalProperty</h4> |
| <pre>public void setLocalProperty(String key, |
| String value)</pre> |
| <div class="block">Set a local property that affects jobs submitted from this thread, such as the Spark fair |
| scheduler pool. User-defined properties may also be set here. These properties are propagated |
| through to worker tasks and can be accessed there via |
| <a href="../../../org/apache/spark/TaskContext.html#getLocalProperty-java.lang.String-"><code>TaskContext.getLocalProperty(java.lang.String)</code></a>. |
| <p> |
| These properties are inherited by child threads spawned from this thread. This |
| may have unexpected consequences when working with thread pools. The standard java |
| implementation of thread pools have worker threads spawn other worker threads. |
| As a result, local properties may propagate unpredictably.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>key</code> - (undocumented)</dd> |
| <dd><code>value</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getLocalProperty-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getLocalProperty</h4> |
| <pre>public String getLocalProperty(String key)</pre> |
| <div class="block">Get a local property set in this thread, or null if it is missing. See |
| <code>org.apache.spark.SparkContext.setLocalProperty</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>key</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setJobDescription-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setJobDescription</h4> |
| <pre>public void setJobDescription(String value)</pre> |
| <div class="block">Set a human readable description of the current job.</div> |
| </li> |
| </ul> |
| <a name="setJobGroup-java.lang.String-java.lang.String-boolean-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setJobGroup</h4> |
| <pre>public void setJobGroup(String groupId, |
| String description, |
| boolean interruptOnCancel)</pre> |
| <div class="block">Assigns a group ID to all the jobs started by this thread until the group ID is set to a |
| different value or cleared. |
| <p> |
| Often, a unit of execution in an application consists of multiple Spark actions or jobs. |
| Application programmers can use this method to group all those jobs together and give a |
| group description. Once set, the Spark web UI will associate such jobs with this group. |
| <p> |
| The application can also use <code>org.apache.spark.SparkContext.cancelJobGroup</code> to cancel all |
| running jobs in this group. For example, |
| <pre><code> |
| // In the main thread: |
| sc.setJobGroup("some_job_to_cancel", "some job description") |
| sc.parallelize(1 to 10000, 2).map { i => Thread.sleep(10); i }.count() |
| |
| // In a separate thread: |
| sc.cancelJobGroup("some_job_to_cancel") |
| </code></pre> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>interruptOnCancel</code> - If true, then job cancellation will result in <code>Thread.interrupt()</code> |
| being called on the job's executor threads. This is useful to help ensure that the tasks |
| are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS |
| may respond to Thread.interrupt() by marking nodes as dead.</dd> |
| <dd><code>groupId</code> - (undocumented)</dd> |
| <dd><code>description</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="clearJobGroup--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>clearJobGroup</h4> |
| <pre>public void clearJobGroup()</pre> |
| <div class="block">Clear the current thread's job group ID and its description.</div> |
| </li> |
| </ul> |
| <a name="parallelize-scala.collection.Seq-int-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>parallelize</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> parallelize(scala.collection.Seq<T> seq, |
| int numSlices, |
| scala.reflect.ClassTag<T> evidence$1)</pre> |
| <div class="block">Distribute a local Scala collection to form an RDD. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>seq</code> - Scala collection to distribute</dd> |
| <dd><code>numSlices</code> - number of partitions to divide the collection into</dd> |
| <dd><code>evidence$1</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD representing distributed collection</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Parallelize acts lazily. If <code>seq</code> is a mutable collection and is altered after the call |
| to parallelize and before the first action on the RDD, the resultant RDD will reflect the |
| modified collection. Pass a copy of the argument to avoid this., avoid using <code>parallelize(Seq())</code> to create an empty <code>RDD</code>. Consider <code>emptyRDD</code> for an |
| RDD with no partitions, or <code>parallelize(Seq[T]())</code> for an RDD of <code>T</code> with empty partitions.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="range-long-long-long-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>range</h4> |
| <pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object> range(long start, |
| long end, |
| long step, |
| int numSlices)</pre> |
| <div class="block">Creates a new RDD[Long] containing elements from <code>start</code> to <code>end</code>(exclusive), increased by |
| <code>step</code> every element. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>start</code> - the start value.</dd> |
| <dd><code>end</code> - the end value.</dd> |
| <dd><code>step</code> - the incremental step</dd> |
| <dd><code>numSlices</code> - number of partitions to divide the collection into</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD representing distributed range</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>if we need to cache this RDD, we should make sure each partition does not exceed limit. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="makeRDD-scala.collection.Seq-int-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>makeRDD</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> makeRDD(scala.collection.Seq<T> seq, |
| int numSlices, |
| scala.reflect.ClassTag<T> evidence$2)</pre> |
| <div class="block">Distribute a local Scala collection to form an RDD. |
| <p> |
| This method is identical to <code>parallelize</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>seq</code> - Scala collection to distribute</dd> |
| <dd><code>numSlices</code> - number of partitions to divide the collection into</dd> |
| <dd><code>evidence$2</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD representing distributed collection</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="makeRDD-scala.collection.Seq-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>makeRDD</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> makeRDD(scala.collection.Seq<scala.Tuple2<T,scala.collection.Seq<String>>> seq, |
| scala.reflect.ClassTag<T> evidence$3)</pre> |
| <div class="block">Distribute a local Scala collection to form an RDD, with one or more |
| location preferences (hostnames of Spark nodes) for each object. |
| Create a new partition for each collection item.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>seq</code> - list of tuples of data and location preferences (hostnames of Spark nodes)</dd> |
| <dd><code>evidence$3</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD representing data partitioned according to location preferences</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="textFile-java.lang.String-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>textFile</h4> |
| <pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String> textFile(String path, |
| int minPartitions)</pre> |
| <div class="block">Read a text file from HDFS, a local file system (available on all nodes), or any |
| Hadoop-supported file system URI, and return it as an RDD of Strings. |
| The text files must be encoded as UTF-8. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - path to the text file on a supported file system</dd> |
| <dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of lines of the text file</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="wholeTextFiles-java.lang.String-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>wholeTextFiles</h4> |
| <pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,String>> wholeTextFiles(String path, |
| int minPartitions)</pre> |
| <div class="block">Read a directory of text files from HDFS, a local file system (available on all nodes), or any |
| Hadoop-supported file system URI. Each file is read as a single record and returned in a |
| key-value pair, where the key is the path of each file, the value is the content of each file. |
| The text files must be encoded as UTF-8. |
| <p> |
| <p> For example, if you have the following files: |
| <pre><code> |
| hdfs://a-hdfs-path/part-00000 |
| hdfs://a-hdfs-path/part-00001 |
| ... |
| hdfs://a-hdfs-path/part-nnnnn |
| </code></pre> |
| <p> |
| Do <code>val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path")</code>, |
| <p> |
| <p> then <code>rdd</code> contains |
| <pre><code> |
| (a-hdfs-path/part-00000, its content) |
| (a-hdfs-path/part-00001, its content) |
| ... |
| (a-hdfs-path/part-nnnnn, its content) |
| </code></pre> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the |
| list of inputs.</dd> |
| <dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD representing tuples of file path and the corresponding file content</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Small files are preferred, large file is also allowable, but may cause bad performance., On some filesystems, <code>.../path/&#42;</code> can be a more efficient way to read all files |
| in a directory rather than <code>.../path/</code> or <code>.../path</code>, Partitioning is determined by data locality. This may result in too few partitions |
| by default. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="binaryFiles-java.lang.String-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>binaryFiles</h4> |
| <pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<String,<a href="../../../org/apache/spark/input/PortableDataStream.html" title="class in org.apache.spark.input">PortableDataStream</a>>> binaryFiles(String path, |
| int minPartitions)</pre> |
| <div class="block">Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file |
| (useful for binary data) |
| <p> |
| For example, if you have the following files: |
| <pre><code> |
| hdfs://a-hdfs-path/part-00000 |
| hdfs://a-hdfs-path/part-00001 |
| ... |
| hdfs://a-hdfs-path/part-nnnnn |
| </code></pre> |
| <p> |
| Do |
| <code>val rdd = sparkContext.binaryFiles("hdfs://a-hdfs-path")</code>, |
| <p> |
| then <code>rdd</code> contains |
| <pre><code> |
| (a-hdfs-path/part-00000, its content) |
| (a-hdfs-path/part-00001, its content) |
| ... |
| (a-hdfs-path/part-nnnnn, its content) |
| </code></pre> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the |
| list of inputs.</dd> |
| <dd><code>minPartitions</code> - A suggestion value of the minimal splitting number for input data.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD representing tuples of file path and corresponding file content</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Small files are preferred; very large files may cause bad performance., On some filesystems, <code>.../path/&#42;</code> can be a more efficient way to read all files |
| in a directory rather than <code>.../path/</code> or <code>.../path</code>, Partitioning is determined by data locality. This may result in too few partitions |
| by default. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="binaryRecords-java.lang.String-int-org.apache.hadoop.conf.Configuration-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>binaryRecords</h4> |
| <pre>public <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><byte[]> binaryRecords(String path, |
| int recordLength, |
| org.apache.hadoop.conf.Configuration conf)</pre> |
| <div class="block">Load data from a flat binary file, assuming the length of each record is constant. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - Directory to the input data files, the path can be comma separated paths as the |
| list of inputs.</dd> |
| <dd><code>recordLength</code> - The length at which to split the records</dd> |
| <dd><code>conf</code> - Configuration for setting up the dataset. |
| <p></dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>An RDD of data with values, represented as byte arrays</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>We ensure that the byte array for each record in the resulting RDD |
| has the provided record length. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="hadoopRDD-org.apache.hadoop.mapred.JobConf-java.lang.Class-java.lang.Class-java.lang.Class-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>hadoopRDD</h4> |
| <pre>public <K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopRDD(org.apache.hadoop.mapred.JobConf conf, |
| Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, |
| Class<K> keyClass, |
| Class<V> valueClass, |
| int minPartitions)</pre> |
| <div class="block">Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other |
| necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable), |
| using the older MapReduce API (<code>org.apache.hadoop.mapred</code>). |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>conf</code> - JobConf for setting up the dataset. Note: This will be put into a Broadcast. |
| Therefore if you plan to reuse this conf to create multiple RDDs, you need to make |
| sure you won't modify the conf. A safe approach is always creating a new conf for |
| a new RDD.</dd> |
| <dd><code>inputFormatClass</code> - storage format of the data to be read</dd> |
| <dd><code>keyClass</code> - <code>Class</code> of the key associated with the <code>inputFormatClass</code> parameter</dd> |
| <dd><code>valueClass</code> - <code>Class</code> of the value associated with the <code>inputFormatClass</code> parameter</dd> |
| <dd><code>minPartitions</code> - Minimum number of Hadoop Splits to generate.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value |
| <p></dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="hadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>hadoopFile</h4> |
| <pre>public <K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopFile(String path, |
| Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, |
| Class<K> keyClass, |
| Class<V> valueClass, |
| int minPartitions)</pre> |
| <div class="block">Get an RDD for a Hadoop file with an arbitrary InputFormat |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>inputFormatClass</code> - storage format of the data to be read</dd> |
| <dd><code>keyClass</code> - <code>Class</code> of the key associated with the <code>inputFormatClass</code> parameter</dd> |
| <dd><code>valueClass</code> - <code>Class</code> of the value associated with the <code>inputFormatClass</code> parameter</dd> |
| <dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="hadoopFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>hadoopFile</h4> |
| <pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopFile(String path, |
| int minPartitions, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.reflect.ClassTag<F> fm)</pre> |
| <div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys, |
| values and the InputFormat so that users don't need to pass them directly. Instead, callers |
| can just write, for example, |
| <pre><code> |
| val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minPartitions) |
| </code></pre> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd> |
| <dd><code>km</code> - (undocumented)</dd> |
| <dd><code>vm</code> - (undocumented)</dd> |
| <dd><code>fm</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="hadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>hadoopFile</h4> |
| <pre>public <K,V,F extends org.apache.hadoop.mapred.InputFormat<K,V>> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> hadoopFile(String path, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.reflect.ClassTag<F> fm)</pre> |
| <div class="block">Smarter version of hadoopFile() that uses class tags to figure out the classes of keys, |
| values and the InputFormat so that users don't need to pass them directly. Instead, callers |
| can just write, for example, |
| <pre><code> |
| val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path) |
| </code></pre> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths as |
| a list of inputs</dd> |
| <dd><code>km</code> - (undocumented)</dd> |
| <dd><code>vm</code> - (undocumented)</dd> |
| <dd><code>fm</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="newAPIHadoopFile-java.lang.String-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>newAPIHadoopFile</h4> |
| <pre>public <K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> newAPIHadoopFile(String path, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.reflect.ClassTag<F> fm)</pre> |
| <div class="block">Smarter version of <code>newApiHadoopFile</code> that uses class tags to figure out the classes of keys, |
| values and the <code>org.apache.hadoop.mapreduce.InputFormat</code> (new MapReduce API) so that user |
| don't need to pass them directly. Instead, callers can just write, for example: |
| <code></code><code> |
| val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path) |
| </code><code></code> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>km</code> - (undocumented)</dd> |
| <dd><code>vm</code> - (undocumented)</dd> |
| <dd><code>fm</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="newAPIHadoopFile-java.lang.String-java.lang.Class-java.lang.Class-java.lang.Class-org.apache.hadoop.conf.Configuration-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>newAPIHadoopFile</h4> |
| <pre>public <K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> newAPIHadoopFile(String path, |
| Class<F> fClass, |
| Class<K> kClass, |
| Class<V> vClass, |
| org.apache.hadoop.conf.Configuration conf)</pre> |
| <div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat |
| and extra configuration options to pass to the input format. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>fClass</code> - storage format of the data to be read</dd> |
| <dd><code>kClass</code> - <code>Class</code> of the key associated with the <code>fClass</code> parameter</dd> |
| <dd><code>vClass</code> - <code>Class</code> of the value associated with the <code>fClass</code> parameter</dd> |
| <dd><code>conf</code> - Hadoop configuration</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="newAPIHadoopRDD-org.apache.hadoop.conf.Configuration-java.lang.Class-java.lang.Class-java.lang.Class-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>newAPIHadoopRDD</h4> |
| <pre>public <K,V,F extends org.apache.hadoop.mapreduce.InputFormat<K,V>> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> newAPIHadoopRDD(org.apache.hadoop.conf.Configuration conf, |
| Class<F> fClass, |
| Class<K> kClass, |
| Class<V> vClass)</pre> |
| <div class="block">Get an RDD for a given Hadoop file with an arbitrary new API InputFormat |
| and extra configuration options to pass to the input format. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>conf</code> - Configuration for setting up the dataset. Note: This will be put into a Broadcast. |
| Therefore if you plan to reuse this conf to create multiple RDDs, you need to make |
| sure you won't modify the conf. A safe approach is always creating a new conf for |
| a new RDD.</dd> |
| <dd><code>fClass</code> - storage format of the data to be read</dd> |
| <dd><code>kClass</code> - <code>Class</code> of the key associated with the <code>fClass</code> parameter</dd> |
| <dd><code>vClass</code> - <code>Class</code> of the value associated with the <code>fClass</code> parameter |
| <p></dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>sequenceFile</h4> |
| <pre>public <K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> sequenceFile(String path, |
| Class<K> keyClass, |
| Class<V> valueClass, |
| int minPartitions)</pre> |
| <div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>keyClass</code> - <code>Class</code> of the key associated with <code>SequenceFileInputFormat</code></dd> |
| <dd><code>valueClass</code> - <code>Class</code> of the value associated with <code>SequenceFileInputFormat</code></dd> |
| <dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="sequenceFile-java.lang.String-java.lang.Class-java.lang.Class-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>sequenceFile</h4> |
| <pre>public <K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> sequenceFile(String path, |
| Class<K> keyClass, |
| Class<V> valueClass)</pre> |
| <div class="block">Get an RDD for a Hadoop SequenceFile with given key and value types. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>keyClass</code> - <code>Class</code> of the key associated with <code>SequenceFileInputFormat</code></dd> |
| <dd><code>valueClass</code> - <code>Class</code> of the value associated with <code>SequenceFileInputFormat</code></dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="sequenceFile-java.lang.String-int-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.Function0-scala.Function0-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>sequenceFile</h4> |
| <pre>public <K,V> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> sequenceFile(String path, |
| int minPartitions, |
| scala.reflect.ClassTag<K> km, |
| scala.reflect.ClassTag<V> vm, |
| scala.Function0<org.apache.spark.WritableConverter<K>> kcf, |
| scala.Function0<org.apache.spark.WritableConverter<V>> vcf)</pre> |
| <div class="block">Version of sequenceFile() for types implicitly convertible to Writables through a |
| WritableConverter. For example, to access a SequenceFile where the keys are Text and the |
| values are IntWritable, you could simply write |
| <pre><code> |
| sparkContext.sequenceFile[String, Int](path, ...) |
| </code></pre> |
| <p> |
| WritableConverters are provided in a somewhat strange way (by an implicit function) to support |
| both subclasses of Writable and types for which we define a converter (e.g. Int to |
| IntWritable). The most natural thing would've been to have implicit objects for the |
| converters, but then we couldn't have an object for every subclass of Writable (you can't |
| have a parameterized singleton object). We use functions instead to create a new converter |
| for the appropriate type. In addition, we pass the converter a ClassTag of its type to |
| allow it to figure out the Writable class to use in the subclass case. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd> |
| <dd><code>km</code> - (undocumented)</dd> |
| <dd><code>vm</code> - (undocumented)</dd> |
| <dd><code>kcf</code> - (undocumented)</dd> |
| <dd><code>vcf</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of tuples of key and corresponding value</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Because Hadoop's RecordReader class re-uses the same Writable object for each |
| record, directly caching the returned RDD or directly passing it to an aggregation or shuffle |
| operation will create many references to the same object. |
| If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first |
| copy them using a <code>map</code> function.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="objectFile-java.lang.String-int-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>objectFile</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> objectFile(String path, |
| int minPartitions, |
| scala.reflect.ClassTag<T> evidence$4)</pre> |
| <div class="block">Load an RDD saved as a SequenceFile containing serialized objects, with NullWritable keys and |
| BytesWritable values that contain a serialized partition. This is still an experimental |
| storage format and may not be supported exactly as is in future Spark releases. It will also |
| be pretty slow if you use the default serializer (Java serialization), |
| though the nice thing about it is that there's very little effort required to save arbitrary |
| objects. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - directory to the input data files, the path can be comma separated paths |
| as a list of inputs</dd> |
| <dd><code>minPartitions</code> - suggested minimum number of partitions for the resulting RDD</dd> |
| <dd><code>evidence$4</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD representing deserialized data from the file(s)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="union-scala.collection.Seq-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>union</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> union(scala.collection.Seq<<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T>> rdds, |
| scala.reflect.ClassTag<T> evidence$6)</pre> |
| <div class="block">Build the union of a list of RDDs.</div> |
| </li> |
| </ul> |
| <a name="union-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>union</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> union(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> first, |
| scala.collection.Seq<<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T>> rest, |
| scala.reflect.ClassTag<T> evidence$7)</pre> |
| <div class="block">Build the union of a list of RDDs passed as variable-length arguments.</div> |
| </li> |
| </ul> |
| <a name="emptyRDD-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>emptyRDD</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> emptyRDD(scala.reflect.ClassTag<T> evidence$8)</pre> |
| <div class="block">Get an RDD that has no partitions or elements.</div> |
| </li> |
| </ul> |
| <a name="register-org.apache.spark.util.AccumulatorV2-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>register</h4> |
| <pre>public void register(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a><?,?> acc)</pre> |
| <div class="block">Register the given accumulator. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>acc</code> - (undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Accumulators must be registered before use, or it will throw exception.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="register-org.apache.spark.util.AccumulatorV2-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>register</h4> |
| <pre>public void register(<a href="../../../org/apache/spark/util/AccumulatorV2.html" title="class in org.apache.spark.util">AccumulatorV2</a><?,?> acc, |
| String name)</pre> |
| <div class="block">Register the given accumulator with given name. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>acc</code> - (undocumented)</dd> |
| <dd><code>name</code> - (undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Accumulators must be registered before use, or it will throw exception.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="longAccumulator--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>longAccumulator</h4> |
| <pre>public <a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a> longAccumulator()</pre> |
| <div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="longAccumulator-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>longAccumulator</h4> |
| <pre>public <a href="../../../org/apache/spark/util/LongAccumulator.html" title="class in org.apache.spark.util">LongAccumulator</a> longAccumulator(String name)</pre> |
| <div class="block">Create and register a long accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>name</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="doubleAccumulator--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>doubleAccumulator</h4> |
| <pre>public <a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a> doubleAccumulator()</pre> |
| <div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="doubleAccumulator-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>doubleAccumulator</h4> |
| <pre>public <a href="../../../org/apache/spark/util/DoubleAccumulator.html" title="class in org.apache.spark.util">DoubleAccumulator</a> doubleAccumulator(String name)</pre> |
| <div class="block">Create and register a double accumulator, which starts with 0 and accumulates inputs by <code>add</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>name</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="collectionAccumulator--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>collectionAccumulator</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a><T> collectionAccumulator()</pre> |
| <div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates |
| inputs by adding them into the list.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="collectionAccumulator-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>collectionAccumulator</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/util/CollectionAccumulator.html" title="class in org.apache.spark.util">CollectionAccumulator</a><T> collectionAccumulator(String name)</pre> |
| <div class="block">Create and register a <code>CollectionAccumulator</code>, which starts with empty list and accumulates |
| inputs by adding them into the list.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>name</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="broadcast-java.lang.Object-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a><a name="broadcast-T-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>broadcast</h4> |
| <pre>public <T> <a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast">Broadcast</a><T> broadcast(T value, |
| scala.reflect.ClassTag<T> evidence$9)</pre> |
| <div class="block">Broadcast a read-only variable to the cluster, returning a |
| <a href="../../../org/apache/spark/broadcast/Broadcast.html" title="class in org.apache.spark.broadcast"><code>Broadcast</code></a> object for reading it in distributed functions. |
| The variable will be sent to each cluster only once. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>value</code> - value to broadcast to the Spark nodes</dd> |
| <dd><code>evidence$9</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd><code>Broadcast</code> object, a read-only variable cached on each machine</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="addFile-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addFile</h4> |
| <pre>public void addFile(String path)</pre> |
| <div class="block">Add a file to be downloaded with this Spark job on every node. |
| <p> |
| If a file is added during execution, it will not be available until the next TaskSet starts. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported |
| filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, |
| use <code>SparkFiles.get(fileName)</code> to find its download location. |
| <p></dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>A path can be added only once. Subsequent additions of the same path are ignored.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="listFiles--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>listFiles</h4> |
| <pre>public scala.collection.Seq<String> listFiles()</pre> |
| <div class="block">Returns a list of file paths that are added to resources.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="addArchive-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addArchive</h4> |
| <pre>public void addArchive(String path)</pre> |
| <div class="block">:: Experimental :: |
| Add an archive to be downloaded and unpacked with this Spark job on every node. |
| <p> |
| If an archive is added during execution, it will not be available until the next TaskSet |
| starts. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported |
| filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, |
| use <code>SparkFiles.get(paths-to-files)</code> to find its download/unpacked location. |
| The given path should be one of .zip, .tar, .tar.gz, .tgz and .jar. |
| <p></dd> |
| <dt><span class="simpleTagLabel">Since:</span></dt> |
| <dd>3.1.0</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>A path can be added only once. Subsequent additions of the same path are ignored. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="listArchives--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>listArchives</h4> |
| <pre>public scala.collection.Seq<String> listArchives()</pre> |
| <div class="block">:: Experimental :: |
| Returns a list of archive paths that are added to resources. |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Since:</span></dt> |
| <dd>3.1.0</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="addFile-java.lang.String-boolean-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addFile</h4> |
| <pre>public void addFile(String path, |
| boolean recursive)</pre> |
| <div class="block">Add a file to be downloaded with this Spark job on every node. |
| <p> |
| If a file is added during execution, it will not be available until the next TaskSet starts. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported |
| filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, |
| use <code>SparkFiles.get(fileName)</code> to find its download location.</dd> |
| <dd><code>recursive</code> - if true, a directory can be given in <code>path</code>. Currently directories are |
| only supported for Hadoop-supported filesystems. |
| <p></dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>A path can be added only once. Subsequent additions of the same path are ignored.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="addSparkListener-org.apache.spark.scheduler.SparkListenerInterface-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addSparkListener</h4> |
| <pre>public void addSparkListener(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a> listener)</pre> |
| <div class="block">:: DeveloperApi :: |
| Register a listener to receive up-calls from events that happen during execution.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>listener</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="removeSparkListener-org.apache.spark.scheduler.SparkListenerInterface-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>removeSparkListener</h4> |
| <pre>public void removeSparkListener(<a href="../../../org/apache/spark/scheduler/SparkListenerInterface.html" title="interface in org.apache.spark.scheduler">SparkListenerInterface</a> listener)</pre> |
| <div class="block">:: DeveloperApi :: |
| Deregister the listener from Spark's listener bus.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>listener</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="requestTotalExecutors-int-int-scala.collection.immutable.Map-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>requestTotalExecutors</h4> |
| <pre>public boolean requestTotalExecutors(int numExecutors, |
| int localityAwareTasks, |
| scala.collection.immutable.Map<String,Object> hostToLocalTaskCount)</pre> |
| <div class="block">Update the cluster manager on our scheduling needs. Three bits of information are included |
| to help it make decisions. This applies to the default ResourceProfile.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>numExecutors</code> - The total number of executors we'd like to have. The cluster manager |
| shouldn't kill any running executor to reach this number, but, |
| if all existing executors were to die, this is the number of executors |
| we'd want to be allocated.</dd> |
| <dd><code>localityAwareTasks</code> - The number of tasks in all active stages that have a locality |
| preferences. This includes running, pending, and completed tasks.</dd> |
| <dd><code>hostToLocalTaskCount</code> - A map of hosts to the number of tasks from all active stages |
| that would like to like to run on that host. |
| This includes running, pending, and completed tasks.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>whether the request is acknowledged by the cluster manager.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="requestExecutors-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>requestExecutors</h4> |
| <pre>public boolean requestExecutors(int numAdditionalExecutors)</pre> |
| <div class="block">:: DeveloperApi :: |
| Request an additional number of executors from the cluster manager.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>numAdditionalExecutors</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>whether the request is received.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="killExecutors-scala.collection.Seq-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>killExecutors</h4> |
| <pre>public boolean killExecutors(scala.collection.Seq<String> executorIds)</pre> |
| <div class="block">:: DeveloperApi :: |
| Request that the cluster manager kill the specified executors. |
| <p> |
| This is not supported when dynamic allocation is turned on. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>executorIds</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>whether the request is received.</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This is an indication to the cluster manager that the application wishes to adjust |
| its resource usage downwards. If the application wishes to replace the executors it kills |
| through this method with new ones, it should follow up explicitly with a call to |
| {{SparkContext#requestExecutors}}. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="killExecutor-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>killExecutor</h4> |
| <pre>public boolean killExecutor(String executorId)</pre> |
| <div class="block">:: DeveloperApi :: |
| Request that the cluster manager kill the specified executor. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>executorId</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>whether the request is received.</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This is an indication to the cluster manager that the application wishes to adjust |
| its resource usage downwards. If the application wishes to replace the executor it kills |
| through this method with a new one, it should follow up explicitly with a call to |
| {{SparkContext#requestExecutors}}. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="version--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>version</h4> |
| <pre>public String version()</pre> |
| <div class="block">The version of Spark on which this application is running.</div> |
| </li> |
| </ul> |
| <a name="getExecutorMemoryStatus--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getExecutorMemoryStatus</h4> |
| <pre>public scala.collection.Map<String,scala.Tuple2<Object,Object>> getExecutorMemoryStatus()</pre> |
| <div class="block">Return a map from the block manager to the max memory available for caching and the remaining |
| memory available for caching.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getRDDStorageInfo--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getRDDStorageInfo</h4> |
| <pre>public <a href="../../../org/apache/spark/storage/RDDInfo.html" title="class in org.apache.spark.storage">RDDInfo</a>[] getRDDStorageInfo()</pre> |
| <div class="block">:: DeveloperApi :: |
| Return information about what RDDs are cached, if they are in mem or on disk, how much space |
| they take, etc.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getPersistentRDDs--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getPersistentRDDs</h4> |
| <pre>public scala.collection.Map<Object,<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><?>> getPersistentRDDs()</pre> |
| <div class="block">Returns an immutable map of RDDs that have marked themselves as persistent via cache() call. |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This does not necessarily mean the caching or computation was successful.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getAllPools--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getAllPools</h4> |
| <pre>public scala.collection.Seq<<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>> getAllPools()</pre> |
| <div class="block">:: DeveloperApi :: |
| Return pools for fair scheduler</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getPoolForName-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getPoolForName</h4> |
| <pre>public scala.Option<<a href="../../../org/apache/spark/scheduler/Schedulable.html" title="interface in org.apache.spark.scheduler">Schedulable</a>> getPoolForName(String pool)</pre> |
| <div class="block">:: DeveloperApi :: |
| Return the pool associated with the given name, if one exists</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>pool</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getSchedulingMode--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getSchedulingMode</h4> |
| <pre>public scala.Enumeration.Value getSchedulingMode()</pre> |
| <div class="block">Return current scheduling mode</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="addJar-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>addJar</h4> |
| <pre>public void addJar(String path)</pre> |
| <div class="block">Adds a JAR dependency for all tasks to be executed on this <code>SparkContext</code> in the future. |
| <p> |
| If a jar is added during execution, it will not be available until the next TaskSet starts. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), |
| an HTTP, HTTPS or FTP URI, or local:/path for a file on every worker node. |
| <p></dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>A path can be added only once. Subsequent additions of the same path are ignored.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="listJars--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>listJars</h4> |
| <pre>public scala.collection.Seq<String> listJars()</pre> |
| <div class="block">Returns a list of jar files that are added to resources.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="stop--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>stop</h4> |
| <pre>public void stop()</pre> |
| <div class="block">Shut down the SparkContext.</div> |
| </li> |
| </ul> |
| <a name="setCallSite-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setCallSite</h4> |
| <pre>public void setCallSite(String shortCallSite)</pre> |
| <div class="block">Set the thread-local property for overriding the call sites |
| of actions and RDDs.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>shortCallSite</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="clearCallSite--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>clearCallSite</h4> |
| <pre>public void clearCallSite()</pre> |
| <div class="block">Clear the thread-local property for overriding the call sites |
| of actions and RDDs.</div> |
| </li> |
| </ul> |
| <a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.Function2-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runJob</h4> |
| <pre>public <T,U> void runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| scala.collection.Seq<Object> partitions, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.reflect.ClassTag<U> evidence$10)</pre> |
| <div class="block">Run a function on a given set of partitions in an RDD and pass the results to the given |
| handler function. This is the main entry point for all actions in Spark. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>func</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all |
| partitions of the target RDD, e.g. for operations like <code>first()</code></dd> |
| <dd><code>resultHandler</code> - callback to pass each result to</dd> |
| <dd><code>evidence$10</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.collection.Seq-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runJob</h4> |
| <pre>public <T,U> Object runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| scala.collection.Seq<Object> partitions, |
| scala.reflect.ClassTag<U> evidence$11)</pre> |
| <div class="block">Run a function on a given set of partitions in an RDD and return the results as an array. |
| The function that is run against each partition additionally takes <code>TaskContext</code> argument. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>func</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all |
| partitions of the target RDD, e.g. for operations like <code>first()</code></dd> |
| <dd><code>evidence$11</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>in-memory collection with a result of the job (each collection element will contain |
| a result from one partition)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runJob</h4> |
| <pre>public <T,U> Object runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> func, |
| scala.collection.Seq<Object> partitions, |
| scala.reflect.ClassTag<U> evidence$12)</pre> |
| <div class="block">Run a function on a given set of partitions in an RDD and return the results as an array. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>func</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all |
| partitions of the target RDD, e.g. for operations like <code>first()</code></dd> |
| <dd><code>evidence$12</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>in-memory collection with a result of the job (each collection element will contain |
| a result from one partition)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runJob</h4> |
| <pre>public <T,U> Object runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| scala.reflect.ClassTag<U> evidence$13)</pre> |
| <div class="block">Run a job on all partitions in an RDD and return the results in an array. The function |
| that is run against each partition additionally takes <code>TaskContext</code> argument. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>func</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>evidence$13</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>in-memory collection with a result of the job (each collection element will contain |
| a result from one partition)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runJob</h4> |
| <pre>public <T,U> Object runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> func, |
| scala.reflect.ClassTag<U> evidence$14)</pre> |
| <div class="block">Run a job on all partitions in an RDD and return the results in an array. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>func</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>evidence$14</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>in-memory collection with a result of the job (each collection element will contain |
| a result from one partition)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="runJob-org.apache.spark.rdd.RDD-scala.Function2-scala.Function2-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runJob</h4> |
| <pre>public <T,U> void runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> processPartition, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.reflect.ClassTag<U> evidence$15)</pre> |
| <div class="block">Run a job on all partitions in an RDD and pass the results to a handler function. The function |
| that is run against each partition additionally takes <code>TaskContext</code> argument. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>processPartition</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>resultHandler</code> - callback to pass each result to</dd> |
| <dd><code>evidence$15</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="runJob-org.apache.spark.rdd.RDD-scala.Function1-scala.Function2-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runJob</h4> |
| <pre>public <T,U> void runJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> processPartition, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.reflect.ClassTag<U> evidence$16)</pre> |
| <div class="block">Run a job on all partitions in an RDD and pass the results to a handler function. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>processPartition</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>resultHandler</code> - callback to pass each result to</dd> |
| <dd><code>evidence$16</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="runApproximateJob-org.apache.spark.rdd.RDD-scala.Function2-org.apache.spark.partial.ApproximateEvaluator-long-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>runApproximateJob</h4> |
| <pre>public <T,U,R> <a href="../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><R> runApproximateJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function2<<a href="../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a>,scala.collection.Iterator<T>,U> func, |
| <a href="../../../org/apache/spark/partial/ApproximateEvaluator.html" title="interface in org.apache.spark.partial">ApproximateEvaluator</a><U,R> evaluator, |
| long timeout)</pre> |
| <div class="block">:: DeveloperApi :: |
| Run a job that can return approximate results. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>func</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>evaluator</code> - <code>ApproximateEvaluator</code> to receive the partial results</dd> |
| <dd><code>timeout</code> - maximum time to wait for the job, in milliseconds</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>partial result (how partial depends on whether the job was finished before or |
| after timeout)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="submitJob-org.apache.spark.rdd.RDD-scala.Function1-scala.collection.Seq-scala.Function2-scala.Function0-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>submitJob</h4> |
| <pre>public <T,U,R> <a href="../../../org/apache/spark/SimpleFutureAction.html" title="class in org.apache.spark">SimpleFutureAction</a><R> submitJob(<a href="../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.Function1<scala.collection.Iterator<T>,U> processPartition, |
| scala.collection.Seq<Object> partitions, |
| scala.Function2<Object,U,scala.runtime.BoxedUnit> resultHandler, |
| scala.Function0<R> resultFunc)</pre> |
| <div class="block">Submit a job for execution and return a FutureJob holding the result. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd</code> - target RDD to run tasks on</dd> |
| <dd><code>processPartition</code> - a function to run on each partition of the RDD</dd> |
| <dd><code>partitions</code> - set of partitions to run on; some jobs may not want to compute on all |
| partitions of the target RDD, e.g. for operations like <code>first()</code></dd> |
| <dd><code>resultHandler</code> - callback to pass each result to</dd> |
| <dd><code>resultFunc</code> - function to be executed when the result is ready</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cancelJobGroup-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cancelJobGroup</h4> |
| <pre>public void cancelJobGroup(String groupId)</pre> |
| <div class="block">Cancel active jobs for the specified group. See <code>org.apache.spark.SparkContext.setJobGroup</code> |
| for more information.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>groupId</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cancelAllJobs--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cancelAllJobs</h4> |
| <pre>public void cancelAllJobs()</pre> |
| <div class="block">Cancel all jobs that have been scheduled or are running.</div> |
| </li> |
| </ul> |
| <a name="cancelJob-int-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cancelJob</h4> |
| <pre>public void cancelJob(int jobId, |
| String reason)</pre> |
| <div class="block">Cancel a given job if it's scheduled or running. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>jobId</code> - the job ID to cancel</dd> |
| <dd><code>reason</code> - optional reason for cancellation</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cancelJob-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cancelJob</h4> |
| <pre>public void cancelJob(int jobId)</pre> |
| <div class="block">Cancel a given job if it's scheduled or running. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>jobId</code> - the job ID to cancel</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cancelStage-int-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cancelStage</h4> |
| <pre>public void cancelStage(int stageId, |
| String reason)</pre> |
| <div class="block">Cancel a given stage and all jobs associated with it. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>stageId</code> - the stage ID to cancel</dd> |
| <dd><code>reason</code> - reason for cancellation</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cancelStage-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cancelStage</h4> |
| <pre>public void cancelStage(int stageId)</pre> |
| <div class="block">Cancel a given stage and all jobs associated with it. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>stageId</code> - the stage ID to cancel</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Throws <code>InterruptedException</code> if the cancel message cannot be sent</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="killTaskAttempt-long-boolean-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>killTaskAttempt</h4> |
| <pre>public boolean killTaskAttempt(long taskId, |
| boolean interruptThread, |
| String reason)</pre> |
| <div class="block">Kill and reschedule the given task attempt. Task ids can be obtained from the Spark UI |
| or through SparkListener.onTaskStart. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>taskId</code> - the task ID to kill. This id uniquely identifies the task attempt.</dd> |
| <dd><code>interruptThread</code> - whether to interrupt the thread running the task.</dd> |
| <dd><code>reason</code> - the reason for killing the task, which should be a short string. If a task |
| is killed multiple times with different reasons, only one reason will be reported. |
| <p></dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>Whether the task was successfully killed.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setCheckpointDir-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setCheckpointDir</h4> |
| <pre>public void setCheckpointDir(String directory)</pre> |
| <div class="block">Set the directory under which RDDs are going to be checkpointed.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>directory</code> - path to the directory where checkpoint files will be stored |
| (must be HDFS path if running in cluster)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getCheckpointDir--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getCheckpointDir</h4> |
| <pre>public scala.Option<String> getCheckpointDir()</pre> |
| </li> |
| </ul> |
| <a name="defaultParallelism--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>defaultParallelism</h4> |
| <pre>public int defaultParallelism()</pre> |
| <div class="block">Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD).</div> |
| </li> |
| </ul> |
| <a name="defaultMinPartitions--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>defaultMinPartitions</h4> |
| <pre>public int defaultMinPartitions()</pre> |
| <div class="block">Default min number of partitions for Hadoop RDDs when not given by user |
| Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2. |
| The reasons for this are discussed in https://github.com/mesos/spark/pull/718</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../index-all.html">Index</a></li> |
| <li><a href="../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../org/apache/spark/SparkConf.html" title="class in org.apache.spark"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../org/apache/spark/SparkEnv.html" title="class in org.apache.spark"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../index.html?org/apache/spark/SparkContext.html" target="_top">Frames</a></li> |
| <li><a href="SparkContext.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <script defer="defer" type="text/javascript" src="../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../lib/api-javadocs.js"></script></body> |
| </html> |