| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_402) on Mon Apr 15 02:02:04 UTC 2024 --> |
| <title>RDD (Spark 3.4.3 JavaDoc)</title> |
| <meta name="date" content="2024-04-15"> |
| <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="RDD (Spark 3.4.3 JavaDoc)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":6,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":9,"i21":10,"i22":10,"i23":10,"i24":10,"i25":10,"i26":10,"i27":10,"i28":10,"i29":10,"i30":10,"i31":10,"i32":10,"i33":10,"i34":10,"i35":10,"i36":10,"i37":10,"i38":10,"i39":10,"i40":10,"i41":10,"i42":10,"i43":10,"i44":10,"i45":10,"i46":10,"i47":10,"i48":10,"i49":10,"i50":9,"i51":10,"i52":10,"i53":10,"i54":10,"i55":10,"i56":10,"i57":10,"i58":10,"i59":10,"i60":9,"i61":9,"i62":9,"i63":9,"i64":10,"i65":10,"i66":10,"i67":10,"i68":10,"i69":10,"i70":10,"i71":10,"i72":10,"i73":10,"i74":10,"i75":10,"i76":10,"i77":10,"i78":10,"i79":10,"i80":10,"i81":10,"i82":10,"i83":10,"i84":10,"i85":10,"i86":10,"i87":10,"i88":10,"i89":10,"i90":10,"i91":10,"i92":10,"i93":10,"i94":10,"i95":10,"i96":10,"i97":10,"i98":10}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/spark/rdd/PartitionPruningRDD.html" title="class in org.apache.spark.rdd"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/spark/rdd/RDDBarrier.html" title="class in org.apache.spark.rdd"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/spark/rdd/RDD.html" target="_top">Frames</a></li> |
| <li><a href="RDD.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.spark.rdd</div> |
| <h2 title="Class RDD" class="title">Class RDD<T></h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.spark.rdd.RDD<T></li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Implemented Interfaces:</dt> |
| <dd>java.io.Serializable, org.apache.spark.internal.Logging</dd> |
| </dl> |
| <dl> |
| <dt>Direct Known Subclasses:</dt> |
| <dd><a href="../../../../org/apache/spark/api/r/BaseRRDD.html" title="class in org.apache.spark.api.r">BaseRRDD</a>, <a href="../../../../org/apache/spark/rdd/CoGroupedRDD.html" title="class in org.apache.spark.rdd">CoGroupedRDD</a>, <a href="../../../../org/apache/spark/graphx/EdgeRDD.html" title="class in org.apache.spark.graphx">EdgeRDD</a>, <a href="../../../../org/apache/spark/rdd/HadoopRDD.html" title="class in org.apache.spark.rdd">HadoopRDD</a>, <a href="../../../../org/apache/spark/rdd/JdbcRDD.html" title="class in org.apache.spark.rdd">JdbcRDD</a>, <a href="../../../../org/apache/spark/rdd/NewHadoopRDD.html" title="class in org.apache.spark.rdd">NewHadoopRDD</a>, <a href="../../../../org/apache/spark/rdd/PartitionPruningRDD.html" title="class in org.apache.spark.rdd">PartitionPruningRDD</a>, <a href="../../../../org/apache/spark/rdd/ShuffledRDD.html" title="class in org.apache.spark.rdd">ShuffledRDD</a>, <a href="../../../../org/apache/spark/rdd/UnionRDD.html" title="class in org.apache.spark.rdd">UnionRDD</a>, <a href="../../../../org/apache/spark/graphx/VertexRDD.html" title="class in org.apache.spark.graphx">VertexRDD</a></dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>public abstract class <span class="typeNameLabel">RDD<T></span> |
| extends Object |
| implements scala.Serializable, org.apache.spark.internal.Logging</pre> |
| <div class="block">A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable, |
| partitioned collection of elements that can be operated on in parallel. This class contains the |
| basic operations available on all RDDs, such as <code>map</code>, <code>filter</code>, and <code>persist</code>. In addition, |
| <a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="class in org.apache.spark.rdd"><code>PairRDDFunctions</code></a> contains operations available only on RDDs of key-value |
| pairs, such as <code>groupByKey</code> and <code>join</code>; |
| <a href="../../../../org/apache/spark/rdd/DoubleRDDFunctions.html" title="class in org.apache.spark.rdd"><code>DoubleRDDFunctions</code></a> contains operations available only on RDDs of |
| Doubles; and |
| <a href="../../../../org/apache/spark/rdd/SequenceFileRDDFunctions.html" title="class in org.apache.spark.rdd"><code>SequenceFileRDDFunctions</code></a> contains operations available on RDDs that |
| can be saved as SequenceFiles. |
| All operations are automatically available on any RDD of the right type (e.g. RDD[(Int, Int)]) |
| through implicit. |
| <p> |
| Internally, each RDD is characterized by five main properties: |
| <p> |
| - A list of partitions |
| - A function for computing each split |
| - A list of dependencies on other RDDs |
| - Optionally, a Partitioner for key-value RDDs (e.g. to say that the RDD is hash-partitioned) |
| - Optionally, a list of preferred locations to compute each split on (e.g. block locations for |
| an HDFS file) |
| <p> |
| All of the scheduling and execution in Spark is done based on these methods, allowing each RDD |
| to implement its own way of computing itself. Indeed, users can implement custom RDDs (e.g. for |
| reading data from a new storage system) by overriding these functions. Please refer to the |
| <a href="http://people.csail.mit.edu/matei/papers/2012/nsdi_spark.pdf">Spark paper</a> |
| for more details on RDD internals.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../serialized-form.html#org.apache.spark.rdd.RDD">Serialized Form</a></dd> |
| </dl> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== NESTED CLASS SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.class.summary"> |
| <!-- --> |
| </a> |
| <h3>Nested Class Summary</h3> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.classes.inherited.from.class.org.apache.spark.internal.Logging"> |
| <!-- --> |
| </a> |
| <h3>Nested classes/interfaces inherited from interface org.apache.spark.internal.Logging</h3> |
| <code>org.apache.spark.internal.Logging.SparkShellLoggingFilter</code></li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colOne" scope="col">Constructor and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#RDD-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-">RDD</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><?> oneParent, |
| scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> evidence$2)</code> |
| <div class="block">Construct an RDD with just a one-to-one dependency on one parent</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#RDD-org.apache.spark.SparkContext-scala.collection.Seq-scala.reflect.ClassTag-">RDD</a></span>(<a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> _sc, |
| scala.collection.Seq<<a href="../../../../org/apache/spark/Dependency.html" title="class in org.apache.spark">Dependency</a><?>> deps, |
| scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> evidence$1)</code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code><U> U</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#aggregate-U-scala.Function2-scala.Function2-scala.reflect.ClassTag-">aggregate</a></span>(U zeroValue, |
| scala.Function2<U,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> seqOp, |
| scala.Function2<U,U,U> combOp, |
| scala.reflect.ClassTag<U> evidence$31)</code> |
| <div class="block">Aggregate the elements of each partition, and then the results for all the partitions, using |
| given combine functions and a neutral "zero value".</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDDBarrier.html" title="class in org.apache.spark.rdd">RDDBarrier</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#barrier--">barrier</a></span>()</code> |
| <div class="block">:: Experimental :: |
| Marks the current stage as a barrier stage, where Spark must launch all tasks together.</div> |
| </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#cache--">cache</a></span>()</code> |
| <div class="block">Persist this RDD with the default storage level (<code>MEMORY_ONLY</code>).</div> |
| </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#cartesian-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-">cartesian</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> other, |
| scala.reflect.ClassTag<U> evidence$5)</code> |
| <div class="block">Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of |
| elements (a, b) where a is in <code>this</code> and b is in <code>other</code>.</div> |
| </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#checkpoint--">checkpoint</a></span>()</code> |
| <div class="block">Mark this RDD for checkpointing.</div> |
| </td> |
| </tr> |
| <tr id="i5" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#cleanShuffleDependencies-boolean-">cleanShuffleDependencies</a></span>(boolean blocking)</code> |
| <div class="block">Removes an RDD's shuffles and it's non-persisted ancestors.</div> |
| </td> |
| </tr> |
| <tr id="i6" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#coalesce-int-boolean-scala.Option-scala.math.Ordering-">coalesce</a></span>(int numPartitions, |
| boolean shuffle, |
| scala.Option<<a href="../../../../org/apache/spark/rdd/PartitionCoalescer.html" title="interface in org.apache.spark.rdd">PartitionCoalescer</a>> partitionCoalescer, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Return a new RDD that is reduced into <code>numPartitions</code> partitions.</div> |
| </td> |
| </tr> |
| <tr id="i7" class="rowColor"> |
| <td class="colFirst"><code>Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#collect--">collect</a></span>()</code> |
| <div class="block">Return an array that contains all of the elements in this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i8" class="altColor"> |
| <td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#collect-scala.PartialFunction-scala.reflect.ClassTag-">collect</a></span>(scala.PartialFunction<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> f, |
| scala.reflect.ClassTag<U> evidence$30)</code> |
| <div class="block">Return an RDD that contains all matching values by applying <code>f</code>.</div> |
| </td> |
| </tr> |
| <tr id="i9" class="rowColor"> |
| <td class="colFirst"><code>abstract scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#compute-org.apache.spark.Partition-org.apache.spark.TaskContext-">compute</a></span>(<a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a> split, |
| <a href="../../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a> context)</code> |
| <div class="block">:: DeveloperApi :: |
| Implemented by subclasses to compute a given partition.</div> |
| </td> |
| </tr> |
| <tr id="i10" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#context--">context</a></span>()</code> |
| <div class="block">The <a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark"><code>SparkContext</code></a> that this RDD was created on.</div> |
| </td> |
| </tr> |
| <tr id="i11" class="rowColor"> |
| <td class="colFirst"><code>long</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#count--">count</a></span>()</code> |
| <div class="block">Return the number of elements in the RDD.</div> |
| </td> |
| </tr> |
| <tr id="i12" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><<a href="../../../../org/apache/spark/partial/BoundedDouble.html" title="class in org.apache.spark.partial">BoundedDouble</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#countApprox-long-double-">countApprox</a></span>(long timeout, |
| double confidence)</code> |
| <div class="block">Approximate version of count() that returns a potentially incomplete result |
| within a timeout, even if not all tasks have finished.</div> |
| </td> |
| </tr> |
| <tr id="i13" class="rowColor"> |
| <td class="colFirst"><code>long</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#countApproxDistinct-double-">countApproxDistinct</a></span>(double relativeSD)</code> |
| <div class="block">Return approximate number of distinct elements in the RDD.</div> |
| </td> |
| </tr> |
| <tr id="i14" class="altColor"> |
| <td class="colFirst"><code>long</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#countApproxDistinct-int-int-">countApproxDistinct</a></span>(int p, |
| int sp)</code> |
| <div class="block">Return approximate number of distinct elements in the RDD.</div> |
| </td> |
| </tr> |
| <tr id="i15" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Map<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#countByValue-scala.math.Ordering-">countByValue</a></span>(scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Return the count of each unique value in this RDD as a local map of (value, count) pairs.</div> |
| </td> |
| </tr> |
| <tr id="i16" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><scala.collection.Map<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/partial/BoundedDouble.html" title="class in org.apache.spark.partial">BoundedDouble</a>>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#countByValueApprox-long-double-scala.math.Ordering-">countByValueApprox</a></span>(long timeout, |
| double confidence, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Approximate version of countByValue().</div> |
| </td> |
| </tr> |
| <tr id="i17" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Seq<<a href="../../../../org/apache/spark/Dependency.html" title="class in org.apache.spark">Dependency</a><?>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#dependencies--">dependencies</a></span>()</code> |
| <div class="block">Get the list of dependencies of this RDD, taking into account whether the |
| RDD is checkpointed or not.</div> |
| </td> |
| </tr> |
| <tr id="i18" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#distinct--">distinct</a></span>()</code> |
| <div class="block">Return a new RDD containing the distinct elements in this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i19" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#distinct-int-scala.math.Ordering-">distinct</a></span>(int numPartitions, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Return a new RDD containing the distinct elements in this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i20" class="altColor"> |
| <td class="colFirst"><code>static <a href="../../../../org/apache/spark/rdd/DoubleRDDFunctions.html" title="class in org.apache.spark.rdd">DoubleRDDFunctions</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#doubleRDDToDoubleRDDFunctions-org.apache.spark.rdd.RDD-">doubleRDDToDoubleRDDFunctions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object> rdd)</code> </td> |
| </tr> |
| <tr id="i21" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#filter-scala.Function1-">filter</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object> f)</code> |
| <div class="block">Return a new RDD containing only the elements that satisfy a predicate.</div> |
| </td> |
| </tr> |
| <tr id="i22" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#first--">first</a></span>()</code> |
| <div class="block">Return the first element in this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i23" class="rowColor"> |
| <td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#flatMap-scala.Function1-scala.reflect.ClassTag-">flatMap</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,scala.collection.TraversableOnce<U>> f, |
| scala.reflect.ClassTag<U> evidence$4)</code> |
| <div class="block">Return a new RDD by first applying a function to all elements of this |
| RDD, and then flattening the results.</div> |
| </td> |
| </tr> |
| <tr id="i24" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#fold-T-scala.Function2-">fold</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> zeroValue, |
| scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> op)</code> |
| <div class="block">Aggregate the elements of each partition, and then the results for all the partitions, using a |
| given associative function and a neutral "zero value".</div> |
| </td> |
| </tr> |
| <tr id="i25" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#foreach-scala.Function1-">foreach</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,scala.runtime.BoxedUnit> f)</code> |
| <div class="block">Applies a function f to all elements of this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i26" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#foreachPartition-scala.Function1-">foreachPartition</a></span>(scala.Function1<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.runtime.BoxedUnit> f)</code> |
| <div class="block">Applies a function f to each partition of this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i27" class="rowColor"> |
| <td class="colFirst"><code>scala.Option<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#getCheckpointFile--">getCheckpointFile</a></span>()</code> |
| <div class="block">Gets the name of the directory to which this RDD was checkpointed.</div> |
| </td> |
| </tr> |
| <tr id="i28" class="altColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#getNumPartitions--">getNumPartitions</a></span>()</code> |
| <div class="block">Returns the number of partitions of this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i29" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/resource/ResourceProfile.html" title="class in org.apache.spark.resource">ResourceProfile</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#getResourceProfile--">getResourceProfile</a></span>()</code> |
| <div class="block">Get the ResourceProfile specified with this RDD or null if it wasn't specified.</div> |
| </td> |
| </tr> |
| <tr id="i30" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#getStorageLevel--">getStorageLevel</a></span>()</code> |
| <div class="block">Get the RDD's current storage level, or StorageLevel.NONE if none is set.</div> |
| </td> |
| </tr> |
| <tr id="i31" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#glom--">glom</a></span>()</code> |
| <div class="block">Return an RDD created by coalescing all elements within each partition into an array.</div> |
| </td> |
| </tr> |
| <tr id="i32" class="altColor"> |
| <td class="colFirst"><code><K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#groupBy-scala.Function1-scala.reflect.ClassTag-">groupBy</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| scala.reflect.ClassTag<K> kt)</code> |
| <div class="block">Return an RDD of grouped items.</div> |
| </td> |
| </tr> |
| <tr id="i33" class="rowColor"> |
| <td class="colFirst"><code><K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#groupBy-scala.Function1-int-scala.reflect.ClassTag-">groupBy</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| int numPartitions, |
| scala.reflect.ClassTag<K> kt)</code> |
| <div class="block">Return an RDD of grouped elements.</div> |
| </td> |
| </tr> |
| <tr id="i34" class="altColor"> |
| <td class="colFirst"><code><K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#groupBy-scala.Function1-org.apache.spark.Partitioner-scala.reflect.ClassTag-scala.math.Ordering-">groupBy</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| <a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> p, |
| scala.reflect.ClassTag<K> kt, |
| scala.math.Ordering<K> ord)</code> |
| <div class="block">Return an RDD of grouped items.</div> |
| </td> |
| </tr> |
| <tr id="i35" class="rowColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#id--">id</a></span>()</code> |
| <div class="block">A unique ID for this RDD (within its SparkContext).</div> |
| </td> |
| </tr> |
| <tr id="i36" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#intersection-org.apache.spark.rdd.RDD-">intersection</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other)</code> |
| <div class="block">Return the intersection of this RDD and another one.</div> |
| </td> |
| </tr> |
| <tr id="i37" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#intersection-org.apache.spark.rdd.RDD-int-">intersection</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| int numPartitions)</code> |
| <div class="block">Return the intersection of this RDD and another one.</div> |
| </td> |
| </tr> |
| <tr id="i38" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#intersection-org.apache.spark.rdd.RDD-org.apache.spark.Partitioner-scala.math.Ordering-">intersection</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| <a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Return the intersection of this RDD and another one.</div> |
| </td> |
| </tr> |
| <tr id="i39" class="rowColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#isCheckpointed--">isCheckpointed</a></span>()</code> |
| <div class="block">Return whether this RDD is checkpointed and materialized, either reliably or locally.</div> |
| </td> |
| </tr> |
| <tr id="i40" class="altColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#isEmpty--">isEmpty</a></span>()</code> </td> |
| </tr> |
| <tr id="i41" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#iterator-org.apache.spark.Partition-org.apache.spark.TaskContext-">iterator</a></span>(<a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a> split, |
| <a href="../../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a> context)</code> |
| <div class="block">Internal method to this RDD; will read from cache if applicable, or otherwise compute it.</div> |
| </td> |
| </tr> |
| <tr id="i42" class="altColor"> |
| <td class="colFirst"><code><K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#keyBy-scala.Function1-">keyBy</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f)</code> |
| <div class="block">Creates tuples of the elements in this RDD by applying <code>f</code>.</div> |
| </td> |
| </tr> |
| <tr id="i43" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#localCheckpoint--">localCheckpoint</a></span>()</code> |
| <div class="block">Mark this RDD for local checkpointing using Spark's existing caching layer.</div> |
| </td> |
| </tr> |
| <tr id="i44" class="altColor"> |
| <td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#map-scala.Function1-scala.reflect.ClassTag-">map</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> f, |
| scala.reflect.ClassTag<U> evidence$3)</code> |
| <div class="block">Return a new RDD by applying a function to all elements of this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i45" class="rowColor"> |
| <td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#mapPartitions-scala.Function1-boolean-scala.reflect.ClassTag-">mapPartitions</a></span>(scala.Function1<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<U>> f, |
| boolean preservesPartitioning, |
| scala.reflect.ClassTag<U> evidence$6)</code> |
| <div class="block">Return a new RDD by applying a function to each partition of this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i46" class="altColor"> |
| <td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#mapPartitionsWithIndex-scala.Function2-boolean-scala.reflect.ClassTag-">mapPartitionsWithIndex</a></span>(scala.Function2<Object,scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<U>> f, |
| boolean preservesPartitioning, |
| scala.reflect.ClassTag<U> evidence$9)</code> |
| <div class="block">Return a new RDD by applying a function to each partition of this RDD, while tracking the index |
| of the original partition.</div> |
| </td> |
| </tr> |
| <tr id="i47" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#max-scala.math.Ordering-">max</a></span>(scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Returns the max of this RDD as defined by the implicit Ordering[T].</div> |
| </td> |
| </tr> |
| <tr id="i48" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#min-scala.math.Ordering-">min</a></span>(scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Returns the min of this RDD as defined by the implicit Ordering[T].</div> |
| </td> |
| </tr> |
| <tr id="i49" class="rowColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#name--">name</a></span>()</code> |
| <div class="block">A friendly name for this RDD</div> |
| </td> |
| </tr> |
| <tr id="i50" class="altColor"> |
| <td class="colFirst"><code>static <T> <a href="../../../../org/apache/spark/rdd/DoubleRDDFunctions.html" title="class in org.apache.spark.rdd">DoubleRDDFunctions</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#numericRDDToDoubleRDDFunctions-org.apache.spark.rdd.RDD-scala.math.Numeric-">numericRDDToDoubleRDDFunctions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.math.Numeric<T> num)</code> </td> |
| </tr> |
| <tr id="i51" class="rowColor"> |
| <td class="colFirst"><code>scala.Option<<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#partitioner--">partitioner</a></span>()</code> |
| <div class="block">Optionally overridden by subclasses to specify how they are partitioned.</div> |
| </td> |
| </tr> |
| <tr id="i52" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a>[]</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#partitions--">partitions</a></span>()</code> |
| <div class="block">Get the array of partitions of this RDD, taking into account whether the |
| RDD is checkpointed or not.</div> |
| </td> |
| </tr> |
| <tr id="i53" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#persist--">persist</a></span>()</code> |
| <div class="block">Persist this RDD with the default storage level (<code>MEMORY_ONLY</code>).</div> |
| </td> |
| </tr> |
| <tr id="i54" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#persist-org.apache.spark.storage.StorageLevel-">persist</a></span>(<a href="../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a> newLevel)</code> |
| <div class="block">Set this RDD's storage level to persist its values across operations after the first time |
| it is computed.</div> |
| </td> |
| </tr> |
| <tr id="i55" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#pipe-scala.collection.Seq-scala.collection.Map-scala.Function1-scala.Function2-boolean-int-java.lang.String-">pipe</a></span>(scala.collection.Seq<String> command, |
| scala.collection.Map<String,String> env, |
| scala.Function1<scala.Function1<String,scala.runtime.BoxedUnit>,scala.runtime.BoxedUnit> printPipeContext, |
| scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,scala.Function1<String,scala.runtime.BoxedUnit>,scala.runtime.BoxedUnit> printRDDElement, |
| boolean separateWorkingDir, |
| int bufferSize, |
| String encoding)</code> |
| <div class="block">Return an RDD created by piping elements to a forked external process.</div> |
| </td> |
| </tr> |
| <tr id="i56" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#pipe-java.lang.String-">pipe</a></span>(String command)</code> |
| <div class="block">Return an RDD created by piping elements to a forked external process.</div> |
| </td> |
| </tr> |
| <tr id="i57" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#pipe-java.lang.String-scala.collection.Map-">pipe</a></span>(String command, |
| scala.collection.Map<String,String> env)</code> |
| <div class="block">Return an RDD created by piping elements to a forked external process.</div> |
| </td> |
| </tr> |
| <tr id="i58" class="altColor"> |
| <td class="colFirst"><code>scala.collection.Seq<String></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#preferredLocations-org.apache.spark.Partition-">preferredLocations</a></span>(<a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a> split)</code> |
| <div class="block">Get the preferred locations of a partition, taking into account whether the |
| RDD is checkpointed.</div> |
| </td> |
| </tr> |
| <tr id="i59" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>[]</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#randomSplit-double:A-long-">randomSplit</a></span>(double[] weights, |
| long seed)</code> |
| <div class="block">Randomly splits this RDD with the provided weights.</div> |
| </td> |
| </tr> |
| <tr id="i60" class="altColor"> |
| <td class="colFirst"><code>static <T> <a href="../../../../org/apache/spark/rdd/AsyncRDDActions.html" title="class in org.apache.spark.rdd">AsyncRDDActions</a><T></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#rddToAsyncRDDActions-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-">rddToAsyncRDDActions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.reflect.ClassTag<T> evidence$36)</code> </td> |
| </tr> |
| <tr id="i61" class="rowColor"> |
| <td class="colFirst"><code>static <K,V> <a href="../../../../org/apache/spark/rdd/OrderedRDDFunctions.html" title="class in org.apache.spark.rdd">OrderedRDDFunctions</a><K,V,scala.Tuple2<K,V>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#rddToOrderedRDDFunctions-org.apache.spark.rdd.RDD-scala.math.Ordering-scala.reflect.ClassTag-scala.reflect.ClassTag-">rddToOrderedRDDFunctions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> rdd, |
| scala.math.Ordering<K> evidence$37, |
| scala.reflect.ClassTag<K> evidence$38, |
| scala.reflect.ClassTag<V> evidence$39)</code> </td> |
| </tr> |
| <tr id="i62" class="altColor"> |
| <td class="colFirst"><code>static <K,V> <a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="class in org.apache.spark.rdd">PairRDDFunctions</a><K,V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#rddToPairRDDFunctions-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.math.Ordering-">rddToPairRDDFunctions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> rdd, |
| scala.reflect.ClassTag<K> kt, |
| scala.reflect.ClassTag<V> vt, |
| scala.math.Ordering<K> ord)</code> </td> |
| </tr> |
| <tr id="i63" class="rowColor"> |
| <td class="colFirst"><code>static <K,V> <a href="../../../../org/apache/spark/rdd/SequenceFileRDDFunctions.html" title="class in org.apache.spark.rdd">SequenceFileRDDFunctions</a><K,V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#rddToSequenceFileRDDFunctions-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-scala.reflect.ClassTag---">rddToSequenceFileRDDFunctions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> rdd, |
| scala.reflect.ClassTag<K> kt, |
| scala.reflect.ClassTag<V> vt, |
| <any> keyWritableFactory, |
| <any> valueWritableFactory)</code> </td> |
| </tr> |
| <tr id="i64" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#reduce-scala.Function2-">reduce</a></span>(scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> f)</code> |
| <div class="block">Reduces the elements of this RDD using the specified commutative and |
| associative binary operator.</div> |
| </td> |
| </tr> |
| <tr id="i65" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#repartition-int-scala.math.Ordering-">repartition</a></span>(int numPartitions, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Return a new RDD that has exactly numPartitions partitions.</div> |
| </td> |
| </tr> |
| <tr id="i66" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#sample-boolean-double-long-">sample</a></span>(boolean withReplacement, |
| double fraction, |
| long seed)</code> |
| <div class="block">Return a sampled subset of this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i67" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#saveAsObjectFile-java.lang.String-">saveAsObjectFile</a></span>(String path)</code> |
| <div class="block">Save this RDD as a SequenceFile of serialized objects.</div> |
| </td> |
| </tr> |
| <tr id="i68" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#saveAsTextFile-java.lang.String-">saveAsTextFile</a></span>(String path)</code> |
| <div class="block">Save this RDD as a text file, using string representations of elements.</div> |
| </td> |
| </tr> |
| <tr id="i69" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#saveAsTextFile-java.lang.String-java.lang.Class-">saveAsTextFile</a></span>(String path, |
| Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec)</code> |
| <div class="block">Save this RDD as a compressed text file, using string representations of elements.</div> |
| </td> |
| </tr> |
| <tr id="i70" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#setName-java.lang.String-">setName</a></span>(String _name)</code> |
| <div class="block">Assign a name to this RDD</div> |
| </td> |
| </tr> |
| <tr id="i71" class="rowColor"> |
| <td class="colFirst"><code><K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#sortBy-scala.Function1-boolean-int-scala.math.Ordering-scala.reflect.ClassTag-">sortBy</a></span>(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| boolean ascending, |
| int numPartitions, |
| scala.math.Ordering<K> ord, |
| scala.reflect.ClassTag<K> ctag)</code> |
| <div class="block">Return this RDD sorted by the given key function.</div> |
| </td> |
| </tr> |
| <tr id="i72" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#sparkContext--">sparkContext</a></span>()</code> |
| <div class="block">The SparkContext that created this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i73" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#subtract-org.apache.spark.rdd.RDD-">subtract</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other)</code> |
| <div class="block">Return an RDD with the elements from <code>this</code> that are not in <code>other</code>.</div> |
| </td> |
| </tr> |
| <tr id="i74" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#subtract-org.apache.spark.rdd.RDD-int-">subtract</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| int numPartitions)</code> |
| <div class="block">Return an RDD with the elements from <code>this</code> that are not in <code>other</code>.</div> |
| </td> |
| </tr> |
| <tr id="i75" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#subtract-org.apache.spark.rdd.RDD-org.apache.spark.Partitioner-scala.math.Ordering-">subtract</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| <a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> p, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Return an RDD with the elements from <code>this</code> that are not in <code>other</code>.</div> |
| </td> |
| </tr> |
| <tr id="i76" class="altColor"> |
| <td class="colFirst"><code>Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#take-int-">take</a></span>(int num)</code> |
| <div class="block">Take the first num elements of the RDD.</div> |
| </td> |
| </tr> |
| <tr id="i77" class="rowColor"> |
| <td class="colFirst"><code>Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#takeOrdered-int-scala.math.Ordering-">takeOrdered</a></span>(int num, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Returns the first k (smallest) elements from this RDD as defined by the specified |
| implicit Ordering[T] and maintains the ordering.</div> |
| </td> |
| </tr> |
| <tr id="i78" class="altColor"> |
| <td class="colFirst"><code>Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#takeSample-boolean-int-long-">takeSample</a></span>(boolean withReplacement, |
| int num, |
| long seed)</code> |
| <div class="block">Return a fixed-size sampled subset of this RDD in an array</div> |
| </td> |
| </tr> |
| <tr id="i79" class="rowColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#toDebugString--">toDebugString</a></span>()</code> |
| <div class="block">A description of this RDD and its recursive dependencies for debugging.</div> |
| </td> |
| </tr> |
| <tr id="i80" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#toJavaRDD--">toJavaRDD</a></span>()</code> </td> |
| </tr> |
| <tr id="i81" class="rowColor"> |
| <td class="colFirst"><code>scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#toLocalIterator--">toLocalIterator</a></span>()</code> |
| <div class="block">Return an iterator that contains all of the elements in this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i82" class="altColor"> |
| <td class="colFirst"><code>Object</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#top-int-scala.math.Ordering-">top</a></span>(int num, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</code> |
| <div class="block">Returns the top k (largest) elements from this RDD as defined by the specified |
| implicit Ordering[T] and maintains the ordering.</div> |
| </td> |
| </tr> |
| <tr id="i83" class="rowColor"> |
| <td class="colFirst"><code>String</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#toString--">toString</a></span>()</code> </td> |
| </tr> |
| <tr id="i84" class="altColor"> |
| <td class="colFirst"><code><U> U</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#treeAggregate-U-scala.Function2-scala.Function2-int-boolean-scala.reflect.ClassTag-">treeAggregate</a></span>(U zeroValue, |
| scala.Function2<U,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> seqOp, |
| scala.Function2<U,U,U> combOp, |
| int depth, |
| boolean finalAggregateOnExecutor, |
| scala.reflect.ClassTag<U> evidence$33)</code> |
| <div class="block"><a href="../../../../org/apache/spark/rdd/RDD.html#treeAggregate-U-scala.Function2-scala.Function2-int-scala.reflect.ClassTag-"><code>treeAggregate(U, scala.Function2<U, T, U>, scala.Function2<U, U, U>, int, scala.reflect.ClassTag<U>)</code></a> with a parameter to do the final |
| aggregation on the executor</div> |
| </td> |
| </tr> |
| <tr id="i85" class="rowColor"> |
| <td class="colFirst"><code><U> U</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#treeAggregate-U-scala.Function2-scala.Function2-int-scala.reflect.ClassTag-">treeAggregate</a></span>(U zeroValue, |
| scala.Function2<U,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> seqOp, |
| scala.Function2<U,U,U> combOp, |
| int depth, |
| scala.reflect.ClassTag<U> evidence$32)</code> |
| <div class="block">Aggregates the elements of this RDD in a multi-level tree pattern.</div> |
| </td> |
| </tr> |
| <tr id="i86" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#treeReduce-scala.Function2-int-">treeReduce</a></span>(scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> f, |
| int depth)</code> |
| <div class="block">Reduces the elements of this RDD in a multi-level tree pattern.</div> |
| </td> |
| </tr> |
| <tr id="i87" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#union-org.apache.spark.rdd.RDD-">union</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other)</code> |
| <div class="block">Return the union of this RDD and another one.</div> |
| </td> |
| </tr> |
| <tr id="i88" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#unpersist-boolean-">unpersist</a></span>(boolean blocking)</code> |
| <div class="block">Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.</div> |
| </td> |
| </tr> |
| <tr id="i89" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#withResources-org.apache.spark.resource.ResourceProfile-">withResources</a></span>(<a href="../../../../org/apache/spark/resource/ResourceProfile.html" title="class in org.apache.spark.resource">ResourceProfile</a> rp)</code> |
| <div class="block">Specify a ResourceProfile to use when calculating this RDD.</div> |
| </td> |
| </tr> |
| <tr id="i90" class="altColor"> |
| <td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zip-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-">zip</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> other, |
| scala.reflect.ClassTag<U> evidence$11)</code> |
| <div class="block">Zips this RDD with another one, returning key-value pairs with the first element in each RDD, |
| second element in each RDD, etc.</div> |
| </td> |
| </tr> |
| <tr id="i91" class="rowColor"> |
| <td class="colFirst"><code><B,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipPartitions-org.apache.spark.rdd.RDD-boolean-scala.Function2-scala.reflect.ClassTag-scala.reflect.ClassTag-">zipPartitions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| boolean preservesPartitioning, |
| scala.Function2<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$12, |
| scala.reflect.ClassTag<V> evidence$13)</code> |
| <div class="block">Zip this RDD's partitions with one (or more) RDD(s) and return a new RDD by |
| applying a function to the zipped partitions.</div> |
| </td> |
| </tr> |
| <tr id="i92" class="altColor"> |
| <td class="colFirst"><code><B,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipPartitions-org.apache.spark.rdd.RDD-scala.Function2-scala.reflect.ClassTag-scala.reflect.ClassTag-">zipPartitions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| scala.Function2<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$14, |
| scala.reflect.ClassTag<V> evidence$15)</code> </td> |
| </tr> |
| <tr id="i93" class="rowColor"> |
| <td class="colFirst"><code><B,C,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-boolean-scala.Function3-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">zipPartitions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| boolean preservesPartitioning, |
| scala.Function3<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$16, |
| scala.reflect.ClassTag<C> evidence$17, |
| scala.reflect.ClassTag<V> evidence$18)</code> </td> |
| </tr> |
| <tr id="i94" class="altColor"> |
| <td class="colFirst"><code><B,C,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-scala.Function3-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">zipPartitions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| scala.Function3<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$19, |
| scala.reflect.ClassTag<C> evidence$20, |
| scala.reflect.ClassTag<V> evidence$21)</code> </td> |
| </tr> |
| <tr id="i95" class="rowColor"> |
| <td class="colFirst"><code><B,C,D,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-boolean-scala.Function4-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">zipPartitions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><D> rdd4, |
| boolean preservesPartitioning, |
| scala.Function4<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<D>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$22, |
| scala.reflect.ClassTag<C> evidence$23, |
| scala.reflect.ClassTag<D> evidence$24, |
| scala.reflect.ClassTag<V> evidence$25)</code> </td> |
| </tr> |
| <tr id="i96" class="altColor"> |
| <td class="colFirst"><code><B,C,D,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-scala.Function4-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-">zipPartitions</a></span>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><D> rdd4, |
| scala.Function4<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<D>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$26, |
| scala.reflect.ClassTag<C> evidence$27, |
| scala.reflect.ClassTag<D> evidence$28, |
| scala.reflect.ClassTag<V> evidence$29)</code> </td> |
| </tr> |
| <tr id="i97" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipWithIndex--">zipWithIndex</a></span>()</code> |
| <div class="block">Zips this RDD with its element indices.</div> |
| </td> |
| </tr> |
| <tr id="i98" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/spark/rdd/RDD.html#zipWithUniqueId--">zipWithUniqueId</a></span>()</code> |
| <div class="block">Zips this RDD with generated unique Long ids.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class Object</h3> |
| <code>equals, getClass, hashCode, notify, notifyAll, wait, wait, wait</code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.spark.internal.Logging"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from interface org.apache.spark.internal.Logging</h3> |
| <code>$init$, initializeForcefully, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, initLock, isTraceEnabled, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning, org$apache$spark$internal$Logging$$log__$eq, org$apache$spark$internal$Logging$$log_, uninitialize</code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a name="RDD-org.apache.spark.SparkContext-scala.collection.Seq-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>RDD</h4> |
| <pre>public RDD(<a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> _sc, |
| scala.collection.Seq<<a href="../../../../org/apache/spark/Dependency.html" title="class in org.apache.spark">Dependency</a><?>> deps, |
| scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> evidence$1)</pre> |
| </li> |
| </ul> |
| <a name="RDD-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>RDD</h4> |
| <pre>public RDD(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><?> oneParent, |
| scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> evidence$2)</pre> |
| <div class="block">Construct an RDD with just a one-to-one dependency on one parent</div> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="rddToPairRDDFunctions-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>rddToPairRDDFunctions</h4> |
| <pre>public static <K,V> <a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="class in org.apache.spark.rdd">PairRDDFunctions</a><K,V> rddToPairRDDFunctions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> rdd, |
| scala.reflect.ClassTag<K> kt, |
| scala.reflect.ClassTag<V> vt, |
| scala.math.Ordering<K> ord)</pre> |
| </li> |
| </ul> |
| <a name="rddToAsyncRDDActions-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>rddToAsyncRDDActions</h4> |
| <pre>public static <T> <a href="../../../../org/apache/spark/rdd/AsyncRDDActions.html" title="class in org.apache.spark.rdd">AsyncRDDActions</a><T> rddToAsyncRDDActions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.reflect.ClassTag<T> evidence$36)</pre> |
| </li> |
| </ul> |
| <a name="rddToSequenceFileRDDFunctions-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-scala.reflect.ClassTag---"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>rddToSequenceFileRDDFunctions</h4> |
| <pre>public static <K,V> <a href="../../../../org/apache/spark/rdd/SequenceFileRDDFunctions.html" title="class in org.apache.spark.rdd">SequenceFileRDDFunctions</a><K,V> rddToSequenceFileRDDFunctions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> rdd, |
| scala.reflect.ClassTag<K> kt, |
| scala.reflect.ClassTag<V> vt, |
| <any> keyWritableFactory, |
| <any> valueWritableFactory)</pre> |
| </li> |
| </ul> |
| <a name="rddToOrderedRDDFunctions-org.apache.spark.rdd.RDD-scala.math.Ordering-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>rddToOrderedRDDFunctions</h4> |
| <pre>public static <K,V> <a href="../../../../org/apache/spark/rdd/OrderedRDDFunctions.html" title="class in org.apache.spark.rdd">OrderedRDDFunctions</a><K,V,scala.Tuple2<K,V>> rddToOrderedRDDFunctions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,V>> rdd, |
| scala.math.Ordering<K> evidence$37, |
| scala.reflect.ClassTag<K> evidence$38, |
| scala.reflect.ClassTag<V> evidence$39)</pre> |
| </li> |
| </ul> |
| <a name="doubleRDDToDoubleRDDFunctions-org.apache.spark.rdd.RDD-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>doubleRDDToDoubleRDDFunctions</h4> |
| <pre>public static <a href="../../../../org/apache/spark/rdd/DoubleRDDFunctions.html" title="class in org.apache.spark.rdd">DoubleRDDFunctions</a> doubleRDDToDoubleRDDFunctions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object> rdd)</pre> |
| </li> |
| </ul> |
| <a name="numericRDDToDoubleRDDFunctions-org.apache.spark.rdd.RDD-scala.math.Numeric-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>numericRDDToDoubleRDDFunctions</h4> |
| <pre>public static <T> <a href="../../../../org/apache/spark/rdd/DoubleRDDFunctions.html" title="class in org.apache.spark.rdd">DoubleRDDFunctions</a> numericRDDToDoubleRDDFunctions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><T> rdd, |
| scala.math.Numeric<T> num)</pre> |
| </li> |
| </ul> |
| <a name="compute-org.apache.spark.Partition-org.apache.spark.TaskContext-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>compute</h4> |
| <pre>public abstract scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> compute(<a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a> split, |
| <a href="../../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a> context)</pre> |
| <div class="block">:: DeveloperApi :: |
| Implemented by subclasses to compute a given partition.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>split</code> - (undocumented)</dd> |
| <dd><code>context</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="partitioner--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>partitioner</h4> |
| <pre>public scala.Option<<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a>> partitioner()</pre> |
| <div class="block">Optionally overridden by subclasses to specify how they are partitioned.</div> |
| </li> |
| </ul> |
| <a name="sparkContext--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>sparkContext</h4> |
| <pre>public <a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> sparkContext()</pre> |
| <div class="block">The SparkContext that created this RDD.</div> |
| </li> |
| </ul> |
| <a name="id--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>id</h4> |
| <pre>public int id()</pre> |
| <div class="block">A unique ID for this RDD (within its SparkContext).</div> |
| </li> |
| </ul> |
| <a name="name--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>name</h4> |
| <pre>public String name()</pre> |
| <div class="block">A friendly name for this RDD</div> |
| </li> |
| </ul> |
| <a name="setName-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setName</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> setName(String _name)</pre> |
| <div class="block">Assign a name to this RDD</div> |
| </li> |
| </ul> |
| <a name="persist-org.apache.spark.storage.StorageLevel-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>persist</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> persist(<a href="../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a> newLevel)</pre> |
| <div class="block">Set this RDD's storage level to persist its values across operations after the first time |
| it is computed. This can only be used to assign a new storage level if the RDD does not |
| have a storage level set yet. Local checkpointing is an exception.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>newLevel</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="persist--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>persist</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> persist()</pre> |
| <div class="block">Persist this RDD with the default storage level (<code>MEMORY_ONLY</code>).</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cache--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cache</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> cache()</pre> |
| <div class="block">Persist this RDD with the default storage level (<code>MEMORY_ONLY</code>).</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="unpersist-boolean-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>unpersist</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> unpersist(boolean blocking)</pre> |
| <div class="block">Mark the RDD as non-persistent, and remove all blocks for it from memory and disk. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>blocking</code> - Whether to block until all blocks are deleted (default: false)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>This RDD.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getStorageLevel--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getStorageLevel</h4> |
| <pre>public <a href="../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a> getStorageLevel()</pre> |
| <div class="block">Get the RDD's current storage level, or StorageLevel.NONE if none is set.</div> |
| </li> |
| </ul> |
| <a name="dependencies--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>dependencies</h4> |
| <pre>public final scala.collection.Seq<<a href="../../../../org/apache/spark/Dependency.html" title="class in org.apache.spark">Dependency</a><?>> dependencies()</pre> |
| <div class="block">Get the list of dependencies of this RDD, taking into account whether the |
| RDD is checkpointed or not.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="partitions--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>partitions</h4> |
| <pre>public final <a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a>[] partitions()</pre> |
| <div class="block">Get the array of partitions of this RDD, taking into account whether the |
| RDD is checkpointed or not.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getNumPartitions--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getNumPartitions</h4> |
| <pre>public final int getNumPartitions()</pre> |
| <div class="block">Returns the number of partitions of this RDD.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="preferredLocations-org.apache.spark.Partition-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>preferredLocations</h4> |
| <pre>public final scala.collection.Seq<String> preferredLocations(<a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a> split)</pre> |
| <div class="block">Get the preferred locations of a partition, taking into account whether the |
| RDD is checkpointed.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>split</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="iterator-org.apache.spark.Partition-org.apache.spark.TaskContext-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>iterator</h4> |
| <pre>public final scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> iterator(<a href="../../../../org/apache/spark/Partition.html" title="interface in org.apache.spark">Partition</a> split, |
| <a href="../../../../org/apache/spark/TaskContext.html" title="class in org.apache.spark">TaskContext</a> context)</pre> |
| <div class="block">Internal method to this RDD; will read from cache if applicable, or otherwise compute it. |
| This should ''not'' be called by users directly, but is available for implementers of custom |
| subclasses of RDD.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>split</code> - (undocumented)</dd> |
| <dd><code>context</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="map-scala.Function1-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>map</h4> |
| <pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> map(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> f, |
| scala.reflect.ClassTag<U> evidence$3)</pre> |
| <div class="block">Return a new RDD by applying a function to all elements of this RDD.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>evidence$3</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="flatMap-scala.Function1-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>flatMap</h4> |
| <pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> flatMap(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,scala.collection.TraversableOnce<U>> f, |
| scala.reflect.ClassTag<U> evidence$4)</pre> |
| <div class="block">Return a new RDD by first applying a function to all elements of this |
| RDD, and then flattening the results.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>evidence$4</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="filter-scala.Function1-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>filter</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> filter(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object> f)</pre> |
| <div class="block">Return a new RDD containing only the elements that satisfy a predicate.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="distinct-int-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>distinct</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> distinct(int numPartitions, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Return a new RDD containing the distinct elements in this RDD.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>numPartitions</code> - (undocumented)</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="distinct--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>distinct</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> distinct()</pre> |
| <div class="block">Return a new RDD containing the distinct elements in this RDD.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="repartition-int-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>repartition</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> repartition(int numPartitions, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Return a new RDD that has exactly numPartitions partitions. |
| <p> |
| Can increase or decrease the level of parallelism in this RDD. Internally, this uses |
| a shuffle to redistribute data. |
| <p> |
| If you are decreasing the number of partitions in this RDD, consider using <code>coalesce</code>, |
| which can avoid performing a shuffle.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>numPartitions</code> - (undocumented)</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="coalesce-int-boolean-scala.Option-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>coalesce</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> coalesce(int numPartitions, |
| boolean shuffle, |
| scala.Option<<a href="../../../../org/apache/spark/rdd/PartitionCoalescer.html" title="interface in org.apache.spark.rdd">PartitionCoalescer</a>> partitionCoalescer, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Return a new RDD that is reduced into <code>numPartitions</code> partitions. |
| <p> |
| This results in a narrow dependency, e.g. if you go from 1000 partitions |
| to 100 partitions, there will not be a shuffle, instead each of the 100 |
| new partitions will claim 10 of the current partitions. If a larger number |
| of partitions is requested, it will stay at the current number of partitions. |
| <p> |
| However, if you're doing a drastic coalesce, e.g. to numPartitions = 1, |
| this may result in your computation taking place on fewer nodes than |
| you like (e.g. one node in the case of numPartitions = 1). To avoid this, |
| you can pass shuffle = true. This will add a shuffle step, but means the |
| current upstream partitions will be executed in parallel (per whatever |
| the current partitioning is). |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>numPartitions</code> - (undocumented)</dd> |
| <dd><code>shuffle</code> - (undocumented)</dd> |
| <dd><code>partitionCoalescer</code> - (undocumented)</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>With shuffle = true, you can actually coalesce to a larger number |
| of partitions. This is useful if you have a small number of partitions, |
| say 100, potentially with a few partitions being abnormally large. Calling |
| coalesce(1000, shuffle = true) will result in 1000 partitions with the |
| data distributed using a hash partitioner. The optional partition coalescer |
| passed in must be serializable.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="sample-boolean-double-long-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>sample</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> sample(boolean withReplacement, |
| double fraction, |
| long seed)</pre> |
| <div class="block">Return a sampled subset of this RDD. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>withReplacement</code> - can elements be sampled multiple times (replaced when sampled out)</dd> |
| <dd><code>fraction</code> - expected size of the sample as a fraction of this RDD's size |
| without replacement: probability that each element is chosen; fraction must be [0, 1] |
| with replacement: expected number of times each element is chosen; fraction must be greater |
| than or equal to 0</dd> |
| <dd><code>seed</code> - seed for the random number generator |
| <p></dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This is NOT guaranteed to provide exactly the fraction of the count |
| of the given <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd"><code>RDD</code></a>.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="randomSplit-double:A-long-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>randomSplit</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>[] randomSplit(double[] weights, |
| long seed)</pre> |
| <div class="block">Randomly splits this RDD with the provided weights. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>weights</code> - weights for splits, will be normalized if they don't sum to 1</dd> |
| <dd><code>seed</code> - random seed |
| <p></dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>split RDDs in an array</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="takeSample-boolean-int-long-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>takeSample</h4> |
| <pre>public Object takeSample(boolean withReplacement, |
| int num, |
| long seed)</pre> |
| <div class="block">Return a fixed-size sampled subset of this RDD in an array |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>withReplacement</code> - whether sampling is done with replacement</dd> |
| <dd><code>num</code> - size of the returned sample</dd> |
| <dd><code>seed</code> - seed for the random number generator</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>sample of specified size in an array |
| <p></dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>this method should only be used if the resulting array is expected to be small, as |
| all the data is loaded into the driver's memory.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="union-org.apache.spark.rdd.RDD-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>union</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> union(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other)</pre> |
| <div class="block">Return the union of this RDD and another one. Any identical elements will appear multiple |
| times (use <code>.distinct()</code> to eliminate them).</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="sortBy-scala.Function1-boolean-int-scala.math.Ordering-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>sortBy</h4> |
| <pre>public <K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> sortBy(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| boolean ascending, |
| int numPartitions, |
| scala.math.Ordering<K> ord, |
| scala.reflect.ClassTag<K> ctag)</pre> |
| <div class="block">Return this RDD sorted by the given key function.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>ascending</code> - (undocumented)</dd> |
| <dd><code>numPartitions</code> - (undocumented)</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dd><code>ctag</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="intersection-org.apache.spark.rdd.RDD-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>intersection</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> intersection(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other)</pre> |
| <div class="block">Return the intersection of this RDD and another one. The output will not contain any duplicate |
| elements, even if the input RDDs did. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method performs a shuffle internally.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="intersection-org.apache.spark.rdd.RDD-org.apache.spark.Partitioner-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>intersection</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> intersection(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| <a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Return the intersection of this RDD and another one. The output will not contain any duplicate |
| elements, even if the input RDDs did. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>partitioner</code> - Partitioner to use for the resulting RDD</dd> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method performs a shuffle internally. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="intersection-org.apache.spark.rdd.RDD-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>intersection</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> intersection(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| int numPartitions)</pre> |
| <div class="block">Return the intersection of this RDD and another one. The output will not contain any duplicate |
| elements, even if the input RDDs did. Performs a hash partition across the cluster |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>numPartitions</code> - How many partitions to use in the resulting RDD</dd> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method performs a shuffle internally. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="glom--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>glom</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><Object> glom()</pre> |
| <div class="block">Return an RDD created by coalescing all elements within each partition into an array.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cartesian-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cartesian</h4> |
| <pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U>> cartesian(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> other, |
| scala.reflect.ClassTag<U> evidence$5)</pre> |
| <div class="block">Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of |
| elements (a, b) where a is in <code>this</code> and b is in <code>other</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dd><code>evidence$5</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="groupBy-scala.Function1-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>groupBy</h4> |
| <pre>public <K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>>> groupBy(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| scala.reflect.ClassTag<K> kt)</pre> |
| <div class="block">Return an RDD of grouped items. Each group consists of a key and a sequence of elements |
| mapping to that key. The ordering of elements within each group is not guaranteed, and |
| may even differ each time the resulting RDD is evaluated. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>kt</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This operation may be very expensive. If you are grouping in order to perform an |
| aggregation (such as a sum or average) over each key, using <code>PairRDDFunctions.aggregateByKey</code> |
| or <code>PairRDDFunctions.reduceByKey</code> will provide much better performance.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="groupBy-scala.Function1-int-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>groupBy</h4> |
| <pre>public <K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>>> groupBy(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| int numPartitions, |
| scala.reflect.ClassTag<K> kt)</pre> |
| <div class="block">Return an RDD of grouped elements. Each group consists of a key and a sequence of elements |
| mapping to that key. The ordering of elements within each group is not guaranteed, and |
| may even differ each time the resulting RDD is evaluated. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>numPartitions</code> - (undocumented)</dd> |
| <dd><code>kt</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This operation may be very expensive. If you are grouping in order to perform an |
| aggregation (such as a sum or average) over each key, using <code>PairRDDFunctions.aggregateByKey</code> |
| or <code>PairRDDFunctions.reduceByKey</code> will provide much better performance.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="groupBy-scala.Function1-org.apache.spark.Partitioner-scala.reflect.ClassTag-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>groupBy</h4> |
| <pre>public <K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>>> groupBy(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f, |
| <a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> p, |
| scala.reflect.ClassTag<K> kt, |
| scala.math.Ordering<K> ord)</pre> |
| <div class="block">Return an RDD of grouped items. Each group consists of a key and a sequence of elements |
| mapping to that key. The ordering of elements within each group is not guaranteed, and |
| may even differ each time the resulting RDD is evaluated. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>p</code> - (undocumented)</dd> |
| <dd><code>kt</code> - (undocumented)</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This operation may be very expensive. If you are grouping in order to perform an |
| aggregation (such as a sum or average) over each key, using <code>PairRDDFunctions.aggregateByKey</code> |
| or <code>PairRDDFunctions.reduceByKey</code> will provide much better performance.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="pipe-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>pipe</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String> pipe(String command)</pre> |
| <div class="block">Return an RDD created by piping elements to a forked external process.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>command</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="pipe-java.lang.String-scala.collection.Map-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>pipe</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String> pipe(String command, |
| scala.collection.Map<String,String> env)</pre> |
| <div class="block">Return an RDD created by piping elements to a forked external process.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>command</code> - (undocumented)</dd> |
| <dd><code>env</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="pipe-scala.collection.Seq-scala.collection.Map-scala.Function1-scala.Function2-boolean-int-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>pipe</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><String> pipe(scala.collection.Seq<String> command, |
| scala.collection.Map<String,String> env, |
| scala.Function1<scala.Function1<String,scala.runtime.BoxedUnit>,scala.runtime.BoxedUnit> printPipeContext, |
| scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,scala.Function1<String,scala.runtime.BoxedUnit>,scala.runtime.BoxedUnit> printRDDElement, |
| boolean separateWorkingDir, |
| int bufferSize, |
| String encoding)</pre> |
| <div class="block">Return an RDD created by piping elements to a forked external process. The resulting RDD |
| is computed by executing the given process once per partition. All elements |
| of each input partition are written to a process's stdin as lines of input separated |
| by a newline. The resulting partition consists of the process's stdout output, with |
| each line of stdout resulting in one element of the output partition. A process is invoked |
| even for empty partitions. |
| <p> |
| The print behavior can be customized by providing two functions. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>command</code> - command to run in forked process.</dd> |
| <dd><code>env</code> - environment variables to set.</dd> |
| <dd><code>printPipeContext</code> - Before piping elements, this function is called as an opportunity |
| to pipe context data. Print line function (like out.println) will be |
| passed as printPipeContext's parameter.</dd> |
| <dd><code>printRDDElement</code> - Use this function to customize how to pipe elements. This function |
| will be called with each RDD element as the 1st parameter, and the |
| print line function (like out.println()) as the 2nd parameter. |
| An example of pipe the RDD data of groupBy() in a streaming way, |
| instead of constructing a huge String to concat all the elements: |
| <pre><code> |
| def printRDDElement(record:(String, Seq[String]), f:String=>Unit) = |
| for (e <- record._2) {f(e)} |
| </code></pre></dd> |
| <dd><code>separateWorkingDir</code> - Use separate working directories for each task.</dd> |
| <dd><code>bufferSize</code> - Buffer size for the stdin writer for the piped process.</dd> |
| <dd><code>encoding</code> - Char encoding used for interacting (via stdin, stdout and stderr) with |
| the piped process</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>the result RDD</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="mapPartitions-scala.Function1-boolean-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>mapPartitions</h4> |
| <pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> mapPartitions(scala.Function1<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<U>> f, |
| boolean preservesPartitioning, |
| scala.reflect.ClassTag<U> evidence$6)</pre> |
| <div class="block">Return a new RDD by applying a function to each partition of this RDD. |
| <p> |
| <code>preservesPartitioning</code> indicates whether the input function preserves the partitioner, which |
| should be <code>false</code> unless this is a pair RDD and the input function doesn't modify the keys.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>preservesPartitioning</code> - (undocumented)</dd> |
| <dd><code>evidence$6</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="mapPartitionsWithIndex-scala.Function2-boolean-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>mapPartitionsWithIndex</h4> |
| <pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> mapPartitionsWithIndex(scala.Function2<Object,scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<U>> f, |
| boolean preservesPartitioning, |
| scala.reflect.ClassTag<U> evidence$9)</pre> |
| <div class="block">Return a new RDD by applying a function to each partition of this RDD, while tracking the index |
| of the original partition. |
| <p> |
| <code>preservesPartitioning</code> indicates whether the input function preserves the partitioner, which |
| should be <code>false</code> unless this is a pair RDD and the input function doesn't modify the keys.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>preservesPartitioning</code> - (undocumented)</dd> |
| <dd><code>evidence$9</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="zip-org.apache.spark.rdd.RDD-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zip</h4> |
| <pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U>> zip(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> other, |
| scala.reflect.ClassTag<U> evidence$11)</pre> |
| <div class="block">Zips this RDD with another one, returning key-value pairs with the first element in each RDD, |
| second element in each RDD, etc. Assumes that the two RDDs have the *same number of |
| partitions* and the *same number of elements in each partition* (e.g. one was made through |
| a map on the other).</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dd><code>evidence$11</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="zipPartitions-org.apache.spark.rdd.RDD-boolean-scala.Function2-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipPartitions</h4> |
| <pre>public <B,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V> zipPartitions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| boolean preservesPartitioning, |
| scala.Function2<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$12, |
| scala.reflect.ClassTag<V> evidence$13)</pre> |
| <div class="block">Zip this RDD's partitions with one (or more) RDD(s) and return a new RDD by |
| applying a function to the zipped partitions. Assumes that all the RDDs have the |
| *same number of partitions*, but does *not* require them to have the same number |
| of elements in each partition.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rdd2</code> - (undocumented)</dd> |
| <dd><code>preservesPartitioning</code> - (undocumented)</dd> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>evidence$12</code> - (undocumented)</dd> |
| <dd><code>evidence$13</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="zipPartitions-org.apache.spark.rdd.RDD-scala.Function2-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipPartitions</h4> |
| <pre>public <B,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V> zipPartitions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| scala.Function2<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$14, |
| scala.reflect.ClassTag<V> evidence$15)</pre> |
| </li> |
| </ul> |
| <a name="zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-boolean-scala.Function3-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipPartitions</h4> |
| <pre>public <B,C,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V> zipPartitions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| boolean preservesPartitioning, |
| scala.Function3<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$16, |
| scala.reflect.ClassTag<C> evidence$17, |
| scala.reflect.ClassTag<V> evidence$18)</pre> |
| </li> |
| </ul> |
| <a name="zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-scala.Function3-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipPartitions</h4> |
| <pre>public <B,C,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V> zipPartitions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| scala.Function3<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$19, |
| scala.reflect.ClassTag<C> evidence$20, |
| scala.reflect.ClassTag<V> evidence$21)</pre> |
| </li> |
| </ul> |
| <a name="zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-boolean-scala.Function4-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipPartitions</h4> |
| <pre>public <B,C,D,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V> zipPartitions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><D> rdd4, |
| boolean preservesPartitioning, |
| scala.Function4<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<D>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$22, |
| scala.reflect.ClassTag<C> evidence$23, |
| scala.reflect.ClassTag<D> evidence$24, |
| scala.reflect.ClassTag<V> evidence$25)</pre> |
| </li> |
| </ul> |
| <a name="zipPartitions-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-org.apache.spark.rdd.RDD-scala.Function4-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipPartitions</h4> |
| <pre>public <B,C,D,V> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><V> zipPartitions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><B> rdd2, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><C> rdd3, |
| <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><D> rdd4, |
| scala.Function4<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.collection.Iterator<B>,scala.collection.Iterator<C>,scala.collection.Iterator<D>,scala.collection.Iterator<V>> f, |
| scala.reflect.ClassTag<B> evidence$26, |
| scala.reflect.ClassTag<C> evidence$27, |
| scala.reflect.ClassTag<D> evidence$28, |
| scala.reflect.ClassTag<V> evidence$29)</pre> |
| </li> |
| </ul> |
| <a name="foreach-scala.Function1-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>foreach</h4> |
| <pre>public void foreach(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,scala.runtime.BoxedUnit> f)</pre> |
| <div class="block">Applies a function f to all elements of this RDD.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="foreachPartition-scala.Function1-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>foreachPartition</h4> |
| <pre>public void foreachPartition(scala.Function1<scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>,scala.runtime.BoxedUnit> f)</pre> |
| <div class="block">Applies a function f to each partition of this RDD.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="collect--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>collect</h4> |
| <pre>public Object collect()</pre> |
| <div class="block">Return an array that contains all of the elements in this RDD. |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method should only be used if the resulting array is expected to be small, as |
| all the data is loaded into the driver's memory.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="toLocalIterator--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>toLocalIterator</h4> |
| <pre>public scala.collection.Iterator<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> toLocalIterator()</pre> |
| <div class="block">Return an iterator that contains all of the elements in this RDD. |
| <p> |
| The iterator will consume as much memory as the largest partition in this RDD. |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This results in multiple Spark jobs, and if the input RDD is the result |
| of a wide transformation (e.g. join with different partitioners), to avoid |
| recomputing the input RDD should be cached first.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="collect-scala.PartialFunction-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>collect</h4> |
| <pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><U> collect(scala.PartialFunction<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> f, |
| scala.reflect.ClassTag<U> evidence$30)</pre> |
| <div class="block">Return an RDD that contains all matching values by applying <code>f</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dd><code>evidence$30</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="subtract-org.apache.spark.rdd.RDD-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>subtract</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> subtract(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other)</pre> |
| <div class="block">Return an RDD with the elements from <code>this</code> that are not in <code>other</code>. |
| <p> |
| Uses <code>this</code> partitioner/partition size, because even if <code>other</code> is huge, the resulting |
| RDD will be &lt;= us.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="subtract-org.apache.spark.rdd.RDD-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>subtract</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> subtract(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| int numPartitions)</pre> |
| <div class="block">Return an RDD with the elements from <code>this</code> that are not in <code>other</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dd><code>numPartitions</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="subtract-org.apache.spark.rdd.RDD-org.apache.spark.Partitioner-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>subtract</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> subtract(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> other, |
| <a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> p, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Return an RDD with the elements from <code>this</code> that are not in <code>other</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>other</code> - (undocumented)</dd> |
| <dd><code>p</code> - (undocumented)</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="reduce-scala.Function2-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>reduce</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> reduce(scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> f)</pre> |
| <div class="block">Reduces the elements of this RDD using the specified commutative and |
| associative binary operator.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="treeReduce-scala.Function2-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>treeReduce</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> treeReduce(scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> f, |
| int depth)</pre> |
| <div class="block">Reduces the elements of this RDD in a multi-level tree pattern. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>depth</code> - suggested depth of the tree (default: 2)</dd> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../org/apache/spark/rdd/RDD.html#reduce-scala.Function2-"><code>reduce(scala.Function2<T, T, T>)</code></a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="fold-java.lang.Object-scala.Function2-"> |
| <!-- --> |
| </a><a name="fold-T-scala.Function2-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>fold</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> fold(<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> zeroValue, |
| scala.Function2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> op)</pre> |
| <div class="block">Aggregate the elements of each partition, and then the results for all the partitions, using a |
| given associative function and a neutral "zero value". The function |
| op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object |
| allocation; however, it should not modify t2. |
| <p> |
| This behaves somewhat differently from fold operations implemented for non-distributed |
| collections in functional languages like Scala. This fold operation may be applied to |
| partitions individually, and then fold those results into the final result, rather than |
| apply the fold to each element sequentially in some defined ordering. For functions |
| that are not commutative, the result may differ from that of a fold applied to a |
| non-distributed collection. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>zeroValue</code> - the initial value for the accumulated result of each partition for the <code>op</code> |
| operator, and also the initial value for the combine results from different |
| partitions for the <code>op</code> operator - this will typically be the neutral |
| element (e.g. <code>Nil</code> for list concatenation or <code>0</code> for summation)</dd> |
| <dd><code>op</code> - an operator used to both accumulate results within a partition and combine results |
| from different partitions</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="aggregate-java.lang.Object-scala.Function2-scala.Function2-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a><a name="aggregate-U-scala.Function2-scala.Function2-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>aggregate</h4> |
| <pre>public <U> U aggregate(U zeroValue, |
| scala.Function2<U,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> seqOp, |
| scala.Function2<U,U,U> combOp, |
| scala.reflect.ClassTag<U> evidence$31)</pre> |
| <div class="block">Aggregate the elements of each partition, and then the results for all the partitions, using |
| given combine functions and a neutral "zero value". This function can return a different result |
| type, U, than the type of this RDD, T. Thus, we need one operation for merging a T into an U |
| and one operation for merging two U's, as in scala.TraversableOnce. Both of these functions are |
| allowed to modify and return their first argument instead of creating a new U to avoid memory |
| allocation. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>zeroValue</code> - the initial value for the accumulated result of each partition for the |
| <code>seqOp</code> operator, and also the initial value for the combine results from |
| different partitions for the <code>combOp</code> operator - this will typically be the |
| neutral element (e.g. <code>Nil</code> for list concatenation or <code>0</code> for summation)</dd> |
| <dd><code>seqOp</code> - an operator used to accumulate results within a partition</dd> |
| <dd><code>combOp</code> - an associative operator used to combine results from different partitions</dd> |
| <dd><code>evidence$31</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="treeAggregate-java.lang.Object-scala.Function2-scala.Function2-int-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a><a name="treeAggregate-U-scala.Function2-scala.Function2-int-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>treeAggregate</h4> |
| <pre>public <U> U treeAggregate(U zeroValue, |
| scala.Function2<U,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> seqOp, |
| scala.Function2<U,U,U> combOp, |
| int depth, |
| scala.reflect.ClassTag<U> evidence$32)</pre> |
| <div class="block">Aggregates the elements of this RDD in a multi-level tree pattern. |
| This method is semantically identical to <a href="../../../../org/apache/spark/rdd/RDD.html#aggregate-U-scala.Function2-scala.Function2-scala.reflect.ClassTag-"><code>aggregate(U, scala.Function2<U, T, U>, scala.Function2<U, U, U>, scala.reflect.ClassTag<U>)</code></a>. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>depth</code> - suggested depth of the tree (default: 2)</dd> |
| <dd><code>zeroValue</code> - (undocumented)</dd> |
| <dd><code>seqOp</code> - (undocumented)</dd> |
| <dd><code>combOp</code> - (undocumented)</dd> |
| <dd><code>evidence$32</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="treeAggregate-java.lang.Object-scala.Function2-scala.Function2-int-boolean-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a><a name="treeAggregate-U-scala.Function2-scala.Function2-int-boolean-scala.reflect.ClassTag-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>treeAggregate</h4> |
| <pre>public <U> U treeAggregate(U zeroValue, |
| scala.Function2<U,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,U> seqOp, |
| scala.Function2<U,U,U> combOp, |
| int depth, |
| boolean finalAggregateOnExecutor, |
| scala.reflect.ClassTag<U> evidence$33)</pre> |
| <div class="block"><a href="../../../../org/apache/spark/rdd/RDD.html#treeAggregate-U-scala.Function2-scala.Function2-int-scala.reflect.ClassTag-"><code>treeAggregate(U, scala.Function2<U, T, U>, scala.Function2<U, U, U>, int, scala.reflect.ClassTag<U>)</code></a> with a parameter to do the final |
| aggregation on the executor |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>finalAggregateOnExecutor</code> - do final aggregation on executor</dd> |
| <dd><code>zeroValue</code> - (undocumented)</dd> |
| <dd><code>seqOp</code> - (undocumented)</dd> |
| <dd><code>combOp</code> - (undocumented)</dd> |
| <dd><code>depth</code> - (undocumented)</dd> |
| <dd><code>evidence$33</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="count--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>count</h4> |
| <pre>public long count()</pre> |
| <div class="block">Return the number of elements in the RDD.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="countApprox-long-double-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>countApprox</h4> |
| <pre>public <a href="../../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><<a href="../../../../org/apache/spark/partial/BoundedDouble.html" title="class in org.apache.spark.partial">BoundedDouble</a>> countApprox(long timeout, |
| double confidence)</pre> |
| <div class="block">Approximate version of count() that returns a potentially incomplete result |
| within a timeout, even if not all tasks have finished. |
| <p> |
| The confidence is the probability that the error bounds of the result will |
| contain the true value. That is, if countApprox were called repeatedly |
| with confidence 0.9, we would expect 90% of the results to contain the |
| true count. The confidence must be in the range [0,1] or an exception will |
| be thrown. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>timeout</code> - maximum time to wait for the job, in milliseconds</dd> |
| <dd><code>confidence</code> - the desired statistical confidence in the result</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>a potentially incomplete result, with error bounds</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="countByValue-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>countByValue</h4> |
| <pre>public scala.collection.Map<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object> countByValue(scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Return the count of each unique value in this RDD as a local map of (value, count) pairs. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method should only be used if the resulting map is expected to be small, as |
| the whole thing is loaded into the driver's memory. |
| To handle very large results, consider using |
| <p> |
| <pre><code> |
| rdd.map(x => (x, 1L)).reduceByKey(_ + _) |
| </code></pre> |
| <p> |
| , which returns an RDD[T, Long] instead of a map.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="countByValueApprox-long-double-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>countByValueApprox</h4> |
| <pre>public <a href="../../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><scala.collection.Map<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,<a href="../../../../org/apache/spark/partial/BoundedDouble.html" title="class in org.apache.spark.partial">BoundedDouble</a>>> countByValueApprox(long timeout, |
| double confidence, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Approximate version of countByValue(). |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>timeout</code> - maximum time to wait for the job, in milliseconds</dd> |
| <dd><code>confidence</code> - the desired statistical confidence in the result</dd> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>a potentially incomplete result, with error bounds</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="countApproxDistinct-int-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>countApproxDistinct</h4> |
| <pre>public long countApproxDistinct(int p, |
| int sp)</pre> |
| <div class="block">Return approximate number of distinct elements in the RDD. |
| <p> |
| The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice: |
| Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available |
| <a href="https://doi.org/10.1145/2452376.2452456">here</a>. |
| <p> |
| The relative accuracy is approximately <code>1.054 / sqrt(2^p)</code>. Setting a nonzero (<code>sp</code> is greater |
| than <code>p</code>) would trigger sparse representation of registers, which may reduce the memory |
| consumption and increase accuracy when the cardinality is small. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>p</code> - The precision value for the normal set. |
| <code>p</code> must be a value between 4 and <code>sp</code> if <code>sp</code> is not zero (32 max).</dd> |
| <dd><code>sp</code> - The precision value for the sparse set, between 0 and 32. |
| If <code>sp</code> equals 0, the sparse representation is skipped.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="countApproxDistinct-double-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>countApproxDistinct</h4> |
| <pre>public long countApproxDistinct(double relativeSD)</pre> |
| <div class="block">Return approximate number of distinct elements in the RDD. |
| <p> |
| The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice: |
| Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available |
| <a href="https://doi.org/10.1145/2452376.2452456">here</a>. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>relativeSD</code> - Relative accuracy. Smaller values create counters that require more space. |
| It must be greater than 0.000017.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="zipWithIndex--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipWithIndex</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object>> zipWithIndex()</pre> |
| <div class="block">Zips this RDD with its element indices. The ordering is first based on the partition index |
| and then the ordering of items within each partition. So the first item in the first |
| partition gets index 0, and the last item in the last partition receives the largest index. |
| <p> |
| This is similar to Scala's zipWithIndex but it uses Long instead of Int as the index type. |
| This method needs to trigger a spark job when this RDD contains more than one partitions. |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Some RDDs, such as those returned by groupBy(), do not guarantee order of |
| elements in a partition. The index assigned to each element is therefore not guaranteed, |
| and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee |
| the same index assignments, you should sort the RDD with sortByKey() or save it to a file.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="zipWithUniqueId--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>zipWithUniqueId</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,Object>> zipWithUniqueId()</pre> |
| <div class="block">Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k, |
| 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method |
| won't trigger a spark job, which is different from <a href="../../../../org/apache/spark/rdd/RDD.html#zipWithIndex--"><code>zipWithIndex()</code></a>. |
| <p></div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Some RDDs, such as those returned by groupBy(), do not guarantee order of |
| elements in a partition. The unique ID assigned to each element is therefore not guaranteed, |
| and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee |
| the same index assignments, you should sort the RDD with sortByKey() or save it to a file.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="take-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>take</h4> |
| <pre>public Object take(int num)</pre> |
| <div class="block">Take the first num elements of the RDD. It works by first scanning one partition, and use the |
| results from that partition to estimate the number of additional partitions needed to satisfy |
| the limit. |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>num</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method should only be used if the resulting array is expected to be small, as |
| all the data is loaded into the driver's memory. |
| <p>, Due to complications in the internal implementation, this method will raise |
| an exception if called on an RDD of <code>Nothing</code> or <code>Null</code>.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="first--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>first</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> first()</pre> |
| <div class="block">Return the first element in this RDD.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="top-int-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>top</h4> |
| <pre>public Object top(int num, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Returns the top k (largest) elements from this RDD as defined by the specified |
| implicit Ordering[T] and maintains the ordering. This does the opposite of |
| <code>takeOrdered</code>. For example: |
| <pre><code> |
| sc.parallelize(Seq(10, 4, 2, 12, 3)).top(1) |
| // returns Array(12) |
| |
| sc.parallelize(Seq(2, 3, 4, 5, 6)).top(2) |
| // returns Array(6, 5) |
| </code></pre> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>num</code> - k, the number of top elements to return</dd> |
| <dd><code>ord</code> - the implicit ordering for T</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>an array of top elements</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method should only be used if the resulting array is expected to be small, as |
| all the data is loaded into the driver's memory. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="takeOrdered-int-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>takeOrdered</h4> |
| <pre>public Object takeOrdered(int num, |
| scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Returns the first k (smallest) elements from this RDD as defined by the specified |
| implicit Ordering[T] and maintains the ordering. This does the opposite of <code>top</code>. |
| For example: |
| <pre><code> |
| sc.parallelize(Seq(10, 4, 2, 12, 3)).takeOrdered(1) |
| // returns Array(2) |
| |
| sc.parallelize(Seq(2, 3, 4, 5, 6)).takeOrdered(2) |
| // returns Array(2, 3) |
| </code></pre> |
| <p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>num</code> - k, the number of elements to return</dd> |
| <dd><code>ord</code> - the implicit ordering for T</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>an array of top elements</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>This method should only be used if the resulting array is expected to be small, as |
| all the data is loaded into the driver's memory. |
| <p></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="max-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>max</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> max(scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Returns the max of this RDD as defined by the implicit Ordering[T].</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>the maximum element of the RDD</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="min-scala.math.Ordering-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>min</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a> min(scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> ord)</pre> |
| <div class="block">Returns the min of this RDD as defined by the implicit Ordering[T].</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>ord</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>the minimum element of the RDD</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="isEmpty--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>isEmpty</h4> |
| <pre>public boolean isEmpty()</pre> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>true if and only if the RDD contains no elements at all. Note that an RDD |
| may be empty even when it has at least 1 partition.</dd> |
| <dt><span class="simpleTagLabel">Note:</span></dt> |
| <dd>Due to complications in the internal implementation, this method will raise an |
| exception if called on an RDD of <code>Nothing</code> or <code>Null</code>. This may be come up in practice |
| because, for example, the type of <code>parallelize(Seq())</code> is <code>RDD[Nothing]</code>. |
| (<code>parallelize(Seq())</code> should be avoided anyway in favor of <code>parallelize(Seq[T]())</code>.)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="saveAsTextFile-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>saveAsTextFile</h4> |
| <pre>public void saveAsTextFile(String path)</pre> |
| <div class="block">Save this RDD as a text file, using string representations of elements.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="saveAsTextFile-java.lang.String-java.lang.Class-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>saveAsTextFile</h4> |
| <pre>public void saveAsTextFile(String path, |
| Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec)</pre> |
| <div class="block">Save this RDD as a compressed text file, using string representations of elements.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - (undocumented)</dd> |
| <dd><code>codec</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="saveAsObjectFile-java.lang.String-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>saveAsObjectFile</h4> |
| <pre>public void saveAsObjectFile(String path)</pre> |
| <div class="block">Save this RDD as a SequenceFile of serialized objects.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>path</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="keyBy-scala.Function1-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>keyBy</h4> |
| <pre>public <K> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<K,<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>>> keyBy(scala.Function1<<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>,K> f)</pre> |
| <div class="block">Creates tuples of the elements in this RDD by applying <code>f</code>.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>f</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="checkpoint--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>checkpoint</h4> |
| <pre>public void checkpoint()</pre> |
| <div class="block">Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint |
| directory set with <code>SparkContext#setCheckpointDir</code> and all references to its parent |
| RDDs will be removed. This function must be called before any job has been |
| executed on this RDD. It is strongly recommended that this RDD is persisted in |
| memory, otherwise saving it on a file will require recomputation.</div> |
| </li> |
| </ul> |
| <a name="localCheckpoint--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>localCheckpoint</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> localCheckpoint()</pre> |
| <div class="block">Mark this RDD for local checkpointing using Spark's existing caching layer. |
| <p> |
| This method is for users who wish to truncate RDD lineages while skipping the expensive |
| step of replicating the materialized data in a reliable distributed file system. This is |
| useful for RDDs with long lineages that need to be truncated periodically (e.g. GraphX). |
| <p> |
| Local checkpointing sacrifices fault-tolerance for performance. In particular, checkpointed |
| data is written to ephemeral local storage in the executors instead of to a reliable, |
| fault-tolerant storage. The effect is that if an executor fails during the computation, |
| the checkpointed data may no longer be accessible, causing an irrecoverable job failure. |
| <p> |
| This is NOT safe to use with dynamic allocation, which removes executors along |
| with their cached blocks. If you must use both features, you are advised to set |
| <code>spark.dynamicAllocation.cachedExecutorIdleTimeout</code> to a high value. |
| <p> |
| The checkpoint directory set through <code>SparkContext#setCheckpointDir</code> is not used.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="isCheckpointed--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>isCheckpointed</h4> |
| <pre>public boolean isCheckpointed()</pre> |
| <div class="block">Return whether this RDD is checkpointed and materialized, either reliably or locally.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getCheckpointFile--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getCheckpointFile</h4> |
| <pre>public scala.Option<String> getCheckpointFile()</pre> |
| <div class="block">Gets the name of the directory to which this RDD was checkpointed. |
| This is not defined if the RDD is checkpointed locally.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="cleanShuffleDependencies-boolean-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>cleanShuffleDependencies</h4> |
| <pre>public void cleanShuffleDependencies(boolean blocking)</pre> |
| <div class="block">Removes an RDD's shuffles and it's non-persisted ancestors. |
| When running without a shuffle service, cleaning up shuffle files enables downscaling. |
| If you use the RDD after this call, you should checkpoint and materialize it first. |
| If you are uncertain of what you are doing, please do not use this feature. |
| Additional techniques for mitigating orphaned shuffle files: |
| * Tuning the driver GC to be more aggressive, so the regular context cleaner is triggered |
| * Setting an appropriate TTL for shuffle files to be auto cleaned</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>blocking</code> - (undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="barrier--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>barrier</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDDBarrier.html" title="class in org.apache.spark.rdd">RDDBarrier</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> barrier()</pre> |
| <div class="block">:: Experimental :: |
| Marks the current stage as a barrier stage, where Spark must launch all tasks together. |
| In case of a task failure, instead of only restarting the failed task, Spark will abort the |
| entire stage and re-launch all tasks for this stage. |
| The barrier execution mode feature is experimental and it only handles limited scenarios. |
| Please read the linked SPIP and design docs to understand the limitations and future plans.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>an <a href="../../../../org/apache/spark/rdd/RDDBarrier.html" title="class in org.apache.spark.rdd"><code>RDDBarrier</code></a> instance that provides actions within a barrier stage</dd> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../org/apache/spark/BarrierTaskContext.html" title="class in org.apache.spark"><code>BarrierTaskContext</code></a>, |
| <a href="https://jira.apache.org/jira/browse/SPARK-24374">SPIP: Barrier Execution Mode</a>, |
| <a href="https://jira.apache.org/jira/browse/SPARK-24582">Design Doc</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="withResources-org.apache.spark.resource.ResourceProfile-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>withResources</h4> |
| <pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> withResources(<a href="../../../../org/apache/spark/resource/ResourceProfile.html" title="class in org.apache.spark.resource">ResourceProfile</a> rp)</pre> |
| <div class="block">Specify a ResourceProfile to use when calculating this RDD. This is only supported on |
| certain cluster managers and currently requires dynamic allocation to be enabled. |
| It will result in new executors with the resources specified being acquired to |
| calculate the RDD.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>rp</code> - (undocumented)</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getResourceProfile--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getResourceProfile</h4> |
| <pre>public <a href="../../../../org/apache/spark/resource/ResourceProfile.html" title="class in org.apache.spark.resource">ResourceProfile</a> getResourceProfile()</pre> |
| <div class="block">Get the ResourceProfile specified with this RDD or null if it wasn't specified.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>the user specified ResourceProfile or null (for Java compatibility) if |
| none was specified</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="context--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>context</h4> |
| <pre>public <a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark">SparkContext</a> context()</pre> |
| <div class="block">The <a href="../../../../org/apache/spark/SparkContext.html" title="class in org.apache.spark"><code>SparkContext</code></a> that this RDD was created on.</div> |
| </li> |
| </ul> |
| <a name="toDebugString--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>toDebugString</h4> |
| <pre>public String toDebugString()</pre> |
| <div class="block">A description of this RDD and its recursive dependencies for debugging.</div> |
| </li> |
| </ul> |
| <a name="toString--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>toString</h4> |
| <pre>public String toString()</pre> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Overrides:</span></dt> |
| <dd><code>toString</code> in class <code>Object</code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="toJavaRDD--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>toJavaRDD</h4> |
| <pre>public <a href="../../../../org/apache/spark/api/java/JavaRDD.html" title="class in org.apache.spark.api.java">JavaRDD</a><<a href="../../../../org/apache/spark/rdd/RDD.html" title="type parameter in RDD">T</a>> toJavaRDD()</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/spark/rdd/PartitionPruningRDD.html" title="class in org.apache.spark.rdd"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/spark/rdd/RDDBarrier.html" title="class in org.apache.spark.rdd"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/spark/rdd/RDD.html" target="_top">Frames</a></li> |
| <li><a href="RDD.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <script defer="defer" type="text/javascript" src="../../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../../lib/api-javadocs.js"></script></body> |
| </html> |