| |
| <!DOCTYPE html> |
| |
| <html> |
| <head> |
| <meta charset="utf-8" /> |
| <title>Spark Streaming — PySpark 3.2.3 documentation</title> |
| |
| <link rel="stylesheet" href="../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css"> |
| |
| |
| <link rel="stylesheet" |
| href="../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| <link rel="stylesheet" |
| href="../_static/vendor/open-sans_all/1.44.1/index.css"> |
| <link rel="stylesheet" |
| href="../_static/vendor/lato_latin-ext/1.44.1/index.css"> |
| |
| |
| <link rel="stylesheet" href="../_static/basic.css" type="text/css" /> |
| <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/css/pyspark.css" /> |
| |
| <link rel="preload" as="script" href="../_static/js/index.3da636dd464baa7582d2.js"> |
| |
| <script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script> |
| <script src="../_static/jquery.js"></script> |
| <script src="../_static/underscore.js"></script> |
| <script src="../_static/doctools.js"></script> |
| <script src="../_static/language_data.js"></script> |
| <script src="../_static/copybutton.js"></script> |
| <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> |
| <script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| <script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script> |
| <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.streaming.html" /> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="pyspark.streaming.StreamingContext" href="api/pyspark.streaming.StreamingContext.html" /> |
| <link rel="prev" title="MLWriter" href="api/pyspark.ml.util.MLWriter.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="en" /> |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"> |
| <div class="container-xl"> |
| |
| <a class="navbar-brand" href="../index.html"> |
| |
| <img src="../_static/spark-logo-reverse.png" class="logo" alt="logo" /> |
| |
| </a> |
| <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| |
| <div id="navbar-menu" class="col-lg-9 collapse navbar-collapse"> |
| <ul id="navbar-main-elements" class="navbar-nav mr-auto"> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../getting_started/index.html">Getting Started</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../user_guide/index.html">User Guide</a> |
| </li> |
| |
| <li class="nav-item active"> |
| <a class="nav-link" href="index.html">API Reference</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../development/index.html">Development</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../migration_guide/index.html">Migration Guide</a> |
| </li> |
| |
| |
| </ul> |
| |
| |
| |
| |
| <ul class="navbar-nav"> |
| |
| |
| </ul> |
| </div> |
| </div> |
| </nav> |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| <div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form> |
| <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| |
| <div class="bd-toc-item active"> |
| |
| |
| <ul class="nav bd-sidenav"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.sql.html">Spark SQL</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.pandas/index.html">Pandas API on Spark</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.ss.html">Structured Streaming</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.ml.html">MLlib (DataFrame-based)</a> |
| </li> |
| |
| |
| |
| <li class="active"> |
| <a href="">Spark Streaming</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.mllib.html">MLlib (RDD-based)</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.html">Spark Core</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.resource.html">Resource Management</a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| </ul> |
| |
| </nav> |
| </div> |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| <div class="tocsection onthispage pt-5 pb-3"> |
| <i class="fas fa-list"></i> On this page |
| </div> |
| |
| <nav id="bd-toc-nav"> |
| <ul class="nav section-nav flex-column"> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#core-classes" class="nav-link">Core Classes</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#streaming-management" class="nav-link">Streaming Management</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#input-and-output" class="nav-link">Input and Output</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#transformations-and-actions" class="nav-link">Transformations and Actions</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#kinesis" class="nav-link">Kinesis</a> |
| </li> |
| |
| </ul> |
| </nav> |
| |
| |
| |
| </div> |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <div class="section" id="spark-streaming"> |
| <h1>Spark Streaming<a class="headerlink" href="#spark-streaming" title="Permalink to this headline">¶</a></h1> |
| <div class="section" id="core-classes"> |
| <h2>Core Classes<a class="headerlink" href="#core-classes" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.html#pyspark.streaming.StreamingContext" title="pyspark.streaming.StreamingContext"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext</span></code></a>(sparkContext[, …])</p></td> |
| <td><p>Main entry point for Spark Streaming functionality.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.html#pyspark.streaming.DStream" title="pyspark.streaming.DStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream</span></code></a>(jdstream, ssc, jrdd_deserializer)</p></td> |
| <td><p>A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous sequence of RDDs (of the same type) representing a continuous stream of data (see <code class="xref py py-class docutils literal notranslate"><span class="pre">RDD</span></code> in the Spark core documentation for more details on RDDs).</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="streaming-management"> |
| <h2>Streaming Management<a class="headerlink" href="#streaming-management" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.addStreamingListener.html#pyspark.streaming.StreamingContext.addStreamingListener" title="pyspark.streaming.StreamingContext.addStreamingListener"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.addStreamingListener</span></code></a>(…)</p></td> |
| <td><p>Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for receiving system events related to streaming.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.awaitTermination.html#pyspark.streaming.StreamingContext.awaitTermination" title="pyspark.streaming.StreamingContext.awaitTermination"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.awaitTermination</span></code></a>([timeout])</p></td> |
| <td><p>Wait for the execution to stop.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.awaitTerminationOrTimeout.html#pyspark.streaming.StreamingContext.awaitTerminationOrTimeout" title="pyspark.streaming.StreamingContext.awaitTerminationOrTimeout"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.awaitTerminationOrTimeout</span></code></a>(timeout)</p></td> |
| <td><p>Wait for the execution to stop.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.checkpoint.html#pyspark.streaming.StreamingContext.checkpoint" title="pyspark.streaming.StreamingContext.checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.checkpoint</span></code></a>(directory)</p></td> |
| <td><p>Sets the context to periodically checkpoint the DStream operations for master fault-tolerance.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.getActive.html#pyspark.streaming.StreamingContext.getActive" title="pyspark.streaming.StreamingContext.getActive"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.getActive</span></code></a>()</p></td> |
| <td><p>Return either the currently active StreamingContext (i.e., if there is a context started but not stopped) or None.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.getActiveOrCreate.html#pyspark.streaming.StreamingContext.getActiveOrCreate" title="pyspark.streaming.StreamingContext.getActiveOrCreate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.getActiveOrCreate</span></code></a>(…)</p></td> |
| <td><p>Either return the active StreamingContext (i.e.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.getOrCreate.html#pyspark.streaming.StreamingContext.getOrCreate" title="pyspark.streaming.StreamingContext.getOrCreate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.getOrCreate</span></code></a>(checkpointPath, …)</p></td> |
| <td><p>Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.remember.html#pyspark.streaming.StreamingContext.remember" title="pyspark.streaming.StreamingContext.remember"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.remember</span></code></a>(duration)</p></td> |
| <td><p>Set each DStreams in this context to remember RDDs it generated in the last given duration.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.sparkContext.html#pyspark.streaming.StreamingContext.sparkContext" title="pyspark.streaming.StreamingContext.sparkContext"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.sparkContext</span></code></a></p></td> |
| <td><p>Return SparkContext which is associated with this StreamingContext.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.start.html#pyspark.streaming.StreamingContext.start" title="pyspark.streaming.StreamingContext.start"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.start</span></code></a>()</p></td> |
| <td><p>Start the execution of the streams.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.stop.html#pyspark.streaming.StreamingContext.stop" title="pyspark.streaming.StreamingContext.stop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.stop</span></code></a>([stopSparkContext, …])</p></td> |
| <td><p>Stop the execution of the streams, with option of ensuring all received data has been processed.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.transform.html#pyspark.streaming.StreamingContext.transform" title="pyspark.streaming.StreamingContext.transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.transform</span></code></a>(dstreams, …)</p></td> |
| <td><p>Create a new DStream in which each RDD is generated by applying a function on RDDs of the DStreams.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.union.html#pyspark.streaming.StreamingContext.union" title="pyspark.streaming.StreamingContext.union"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.union</span></code></a>(*dstreams)</p></td> |
| <td><p>Create a unified DStream from multiple DStreams of the same type and same slide duration.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="input-and-output"> |
| <h2>Input and Output<a class="headerlink" href="#input-and-output" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.binaryRecordsStream.html#pyspark.streaming.StreamingContext.binaryRecordsStream" title="pyspark.streaming.StreamingContext.binaryRecordsStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.binaryRecordsStream</span></code></a>(…)</p></td> |
| <td><p>Create an input stream that monitors a Hadoop-compatible file system for new files and reads them as flat binary files with records of fixed length.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.queueStream.html#pyspark.streaming.StreamingContext.queueStream" title="pyspark.streaming.StreamingContext.queueStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.queueStream</span></code></a>(rdds[, …])</p></td> |
| <td><p>Create an input stream from a queue of RDDs or list.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.socketTextStream.html#pyspark.streaming.StreamingContext.socketTextStream" title="pyspark.streaming.StreamingContext.socketTextStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.socketTextStream</span></code></a>(hostname, port)</p></td> |
| <td><p>Create an input from TCP source hostname:port.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.StreamingContext.textFileStream.html#pyspark.streaming.StreamingContext.textFileStream" title="pyspark.streaming.StreamingContext.textFileStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingContext.textFileStream</span></code></a>(directory)</p></td> |
| <td><p>Create an input stream that monitors a Hadoop-compatible file system for new files and reads them as text files.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.pprint.html#pyspark.streaming.DStream.pprint" title="pyspark.streaming.DStream.pprint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.pprint</span></code></a>([num])</p></td> |
| <td><p>Print the first num elements of each RDD generated in this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.saveAsTextFiles.html#pyspark.streaming.DStream.saveAsTextFiles" title="pyspark.streaming.DStream.saveAsTextFiles"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.saveAsTextFiles</span></code></a>(prefix[, suffix])</p></td> |
| <td><p>Save each RDD in this DStream as at text file, using string representation of elements.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="transformations-and-actions"> |
| <h2>Transformations and Actions<a class="headerlink" href="#transformations-and-actions" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.cache.html#pyspark.streaming.DStream.cache" title="pyspark.streaming.DStream.cache"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.cache</span></code></a>()</p></td> |
| <td><p>Persist the RDDs of this DStream with the default storage level (<cite>MEMORY_ONLY</cite>).</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.checkpoint.html#pyspark.streaming.DStream.checkpoint" title="pyspark.streaming.DStream.checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.checkpoint</span></code></a>(interval)</p></td> |
| <td><p>Enable periodic checkpointing of RDDs of this DStream</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.cogroup.html#pyspark.streaming.DStream.cogroup" title="pyspark.streaming.DStream.cogroup"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.cogroup</span></code></a>(other[, numPartitions])</p></td> |
| <td><p>Return a new DStream by applying ‘cogroup’ between RDDs of this DStream and <cite>other</cite> DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.combineByKey.html#pyspark.streaming.DStream.combineByKey" title="pyspark.streaming.DStream.combineByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.combineByKey</span></code></a>(createCombiner, …[, …])</p></td> |
| <td><p>Return a new DStream by applying combineByKey to each RDD.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.context.html#pyspark.streaming.DStream.context" title="pyspark.streaming.DStream.context"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.context</span></code></a>()</p></td> |
| <td><p>Return the StreamingContext associated with this DStream</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.count.html#pyspark.streaming.DStream.count" title="pyspark.streaming.DStream.count"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.count</span></code></a>()</p></td> |
| <td><p>Return a new DStream in which each RDD has a single element generated by counting each RDD of this DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.countByValue.html#pyspark.streaming.DStream.countByValue" title="pyspark.streaming.DStream.countByValue"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.countByValue</span></code></a>()</p></td> |
| <td><p>Return a new DStream in which each RDD contains the counts of each distinct value in each RDD of this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.countByValueAndWindow.html#pyspark.streaming.DStream.countByValueAndWindow" title="pyspark.streaming.DStream.countByValueAndWindow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.countByValueAndWindow</span></code></a>(…[, …])</p></td> |
| <td><p>Return a new DStream in which each RDD contains the count of distinct elements in RDDs in a sliding window over this DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.countByWindow.html#pyspark.streaming.DStream.countByWindow" title="pyspark.streaming.DStream.countByWindow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.countByWindow</span></code></a>(windowDuration, …)</p></td> |
| <td><p>Return a new DStream in which each RDD has a single element generated by counting the number of elements in a window over this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.filter.html#pyspark.streaming.DStream.filter" title="pyspark.streaming.DStream.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.filter</span></code></a>(f)</p></td> |
| <td><p>Return a new DStream containing only the elements that satisfy predicate.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.flatMap.html#pyspark.streaming.DStream.flatMap" title="pyspark.streaming.DStream.flatMap"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.flatMap</span></code></a>(f[, preservesPartitioning])</p></td> |
| <td><p>Return a new DStream by applying a function to all elements of this DStream, and then flattening the results</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.flatMapValues.html#pyspark.streaming.DStream.flatMapValues" title="pyspark.streaming.DStream.flatMapValues"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.flatMapValues</span></code></a>(f)</p></td> |
| <td><p>Return a new DStream by applying a flatmap function to the value of each key-value pairs in this DStream without changing the key.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.foreachRDD.html#pyspark.streaming.DStream.foreachRDD" title="pyspark.streaming.DStream.foreachRDD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.foreachRDD</span></code></a>(func)</p></td> |
| <td><p>Apply a function to each RDD in this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.fullOuterJoin.html#pyspark.streaming.DStream.fullOuterJoin" title="pyspark.streaming.DStream.fullOuterJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.fullOuterJoin</span></code></a>(other[, numPartitions])</p></td> |
| <td><p>Return a new DStream by applying ‘full outer join’ between RDDs of this DStream and <cite>other</cite> DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.glom.html#pyspark.streaming.DStream.glom" title="pyspark.streaming.DStream.glom"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.glom</span></code></a>()</p></td> |
| <td><p>Return a new DStream in which RDD is generated by applying glom() to RDD of this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.groupByKey.html#pyspark.streaming.DStream.groupByKey" title="pyspark.streaming.DStream.groupByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.groupByKey</span></code></a>([numPartitions])</p></td> |
| <td><p>Return a new DStream by applying groupByKey on each RDD.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.groupByKeyAndWindow.html#pyspark.streaming.DStream.groupByKeyAndWindow" title="pyspark.streaming.DStream.groupByKeyAndWindow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.groupByKeyAndWindow</span></code></a>(windowDuration, …)</p></td> |
| <td><p>Return a new DStream by applying <cite>groupByKey</cite> over a sliding window.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.join.html#pyspark.streaming.DStream.join" title="pyspark.streaming.DStream.join"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.join</span></code></a>(other[, numPartitions])</p></td> |
| <td><p>Return a new DStream by applying ‘join’ between RDDs of this DStream and <cite>other</cite> DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.leftOuterJoin.html#pyspark.streaming.DStream.leftOuterJoin" title="pyspark.streaming.DStream.leftOuterJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.leftOuterJoin</span></code></a>(other[, numPartitions])</p></td> |
| <td><p>Return a new DStream by applying ‘left outer join’ between RDDs of this DStream and <cite>other</cite> DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.map.html#pyspark.streaming.DStream.map" title="pyspark.streaming.DStream.map"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.map</span></code></a>(f[, preservesPartitioning])</p></td> |
| <td><p>Return a new DStream by applying a function to each element of DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.mapPartitions.html#pyspark.streaming.DStream.mapPartitions" title="pyspark.streaming.DStream.mapPartitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.mapPartitions</span></code></a>(f[, preservesPartitioning])</p></td> |
| <td><p>Return a new DStream in which each RDD is generated by applying mapPartitions() to each RDDs of this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.mapPartitionsWithIndex.html#pyspark.streaming.DStream.mapPartitionsWithIndex" title="pyspark.streaming.DStream.mapPartitionsWithIndex"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.mapPartitionsWithIndex</span></code></a>(f[, …])</p></td> |
| <td><p>Return a new DStream in which each RDD is generated by applying mapPartitionsWithIndex() to each RDDs of this DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.mapValues.html#pyspark.streaming.DStream.mapValues" title="pyspark.streaming.DStream.mapValues"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.mapValues</span></code></a>(f)</p></td> |
| <td><p>Return a new DStream by applying a map function to the value of each key-value pairs in this DStream without changing the key.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.partitionBy.html#pyspark.streaming.DStream.partitionBy" title="pyspark.streaming.DStream.partitionBy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.partitionBy</span></code></a>(numPartitions[, …])</p></td> |
| <td><p>Return a copy of the DStream in which each RDD are partitioned using the specified partitioner.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.persist.html#pyspark.streaming.DStream.persist" title="pyspark.streaming.DStream.persist"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.persist</span></code></a>(storageLevel)</p></td> |
| <td><p>Persist the RDDs of this DStream with the given storage level</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.reduce.html#pyspark.streaming.DStream.reduce" title="pyspark.streaming.DStream.reduce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.reduce</span></code></a>(func)</p></td> |
| <td><p>Return a new DStream in which each RDD has a single element generated by reducing each RDD of this DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.reduceByKey.html#pyspark.streaming.DStream.reduceByKey" title="pyspark.streaming.DStream.reduceByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.reduceByKey</span></code></a>(func[, numPartitions])</p></td> |
| <td><p>Return a new DStream by applying reduceByKey to each RDD.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.reduceByKeyAndWindow.html#pyspark.streaming.DStream.reduceByKeyAndWindow" title="pyspark.streaming.DStream.reduceByKeyAndWindow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.reduceByKeyAndWindow</span></code></a>(func, invFunc, …)</p></td> |
| <td><p>Return a new DStream by applying incremental <cite>reduceByKey</cite> over a sliding window.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.reduceByWindow.html#pyspark.streaming.DStream.reduceByWindow" title="pyspark.streaming.DStream.reduceByWindow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.reduceByWindow</span></code></a>(reduceFunc, …)</p></td> |
| <td><p>Return a new DStream in which each RDD has a single element generated by reducing all elements in a sliding window over this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.repartition.html#pyspark.streaming.DStream.repartition" title="pyspark.streaming.DStream.repartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.repartition</span></code></a>(numPartitions)</p></td> |
| <td><p>Return a new DStream with an increased or decreased level of parallelism.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.rightOuterJoin.html#pyspark.streaming.DStream.rightOuterJoin" title="pyspark.streaming.DStream.rightOuterJoin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.rightOuterJoin</span></code></a>(other[, numPartitions])</p></td> |
| <td><p>Return a new DStream by applying ‘right outer join’ between RDDs of this DStream and <cite>other</cite> DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.slice.html#pyspark.streaming.DStream.slice" title="pyspark.streaming.DStream.slice"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.slice</span></code></a>(begin, end)</p></td> |
| <td><p>Return all the RDDs between ‘begin’ to ‘end’ (both included)</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.transform.html#pyspark.streaming.DStream.transform" title="pyspark.streaming.DStream.transform"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.transform</span></code></a>(func)</p></td> |
| <td><p>Return a new DStream in which each RDD is generated by applying a function on each RDD of this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.transformWith.html#pyspark.streaming.DStream.transformWith" title="pyspark.streaming.DStream.transformWith"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.transformWith</span></code></a>(func, other[, …])</p></td> |
| <td><p>Return a new DStream in which each RDD is generated by applying a function on each RDD of this DStream and ‘other’ DStream.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.union.html#pyspark.streaming.DStream.union" title="pyspark.streaming.DStream.union"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.union</span></code></a>(other)</p></td> |
| <td><p>Return a new DStream by unifying data of another DStream with this DStream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.updateStateByKey.html#pyspark.streaming.DStream.updateStateByKey" title="pyspark.streaming.DStream.updateStateByKey"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.updateStateByKey</span></code></a>(updateFunc[, …])</p></td> |
| <td><p>Return a new “state” DStream where the state for each key is updated by applying the given function on the previous state of the key and the new values of the key.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.DStream.window.html#pyspark.streaming.DStream.window" title="pyspark.streaming.DStream.window"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DStream.window</span></code></a>(windowDuration[, slideDuration])</p></td> |
| <td><p>Return a new DStream in which each RDD contains all the elements in seen in a sliding window of time over this DStream.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="kinesis"> |
| <h2>Kinesis<a class="headerlink" href="#kinesis" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.kinesis.KinesisUtils.createStream.html#pyspark.streaming.kinesis.KinesisUtils.createStream" title="pyspark.streaming.kinesis.KinesisUtils.createStream"><code class="xref py py-obj docutils literal notranslate"><span class="pre">KinesisUtils.createStream</span></code></a>(ssc, …[, …])</p></td> |
| <td><p>Create an input stream that pulls messages from a Kinesis stream.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.streaming.kinesis.InitialPositionInStream.LATEST.html#pyspark.streaming.kinesis.InitialPositionInStream.LATEST" title="pyspark.streaming.kinesis.InitialPositionInStream.LATEST"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InitialPositionInStream.LATEST</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON.html#pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON" title="pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InitialPositionInStream.TRIM_HORIZON</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| |
| <div class='prev-next-bottom'> |
| |
| <a class='left-prev' id="prev-link" href="api/pyspark.ml.util.MLWriter.html" title="previous page">MLWriter</a> |
| <a class='right-next' id="next-link" href="api/pyspark.streaming.StreamingContext.html" title="next page">pyspark.streaming.StreamingContext</a> |
| |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| |
| <script src="../_static/js/index.3da636dd464baa7582d2.js"></script> |
| |
| |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| <p> |
| © Copyright .<br/> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/> |
| </p> |
| </div> |
| </footer> |
| </body> |
| </html> |