blob: d53c4d526a6a0c7e4f451c606dfb07dd5c555bc4 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>StreamingLogisticRegressionWithSGD &#8212; PySpark 3.4.3 documentation</title>
<link rel="stylesheet" href="../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/language_data.js"></script>
<script src="../../_static/clipboard.min.js"></script>
<script src="../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="BisectingKMeansModel" href="pyspark.mllib.clustering.BisectingKMeansModel.html" />
<link rel="prev" title="NaiveBayes" href="pyspark.mllib.classification.NaiveBayes.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../index.html">
<img src="../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../index.html">Overview</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../user_guide/index.html">User Guides</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../migration_guide/index.html">Migration Guides</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="">
<a href="../pyspark.sql/index.html">Spark SQL</a>
</li>
<li class="">
<a href="../pyspark.pandas/index.html">Pandas API on Spark</a>
</li>
<li class="">
<a href="../pyspark.ss/index.html">Structured Streaming</a>
</li>
<li class="">
<a href="../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="../pyspark.streaming.html">Spark Streaming (Legacy)</a>
</li>
<li class="active">
<a href="../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../pyspark.resource.html">Resource Management</a>
</li>
<li class="">
<a href="../pyspark.errors.html">Errors</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="streaminglogisticregressionwithsgd">
<h1>StreamingLogisticRegressionWithSGD<a class="headerlink" href="#streaminglogisticregressionwithsgd" title="Permalink to this headline"></a></h1>
<dl class="py class">
<dt id="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD">
<em class="property">class </em><code class="sig-prename descclassname">pyspark.mllib.classification.</code><code class="sig-name descname">StreamingLogisticRegressionWithSGD</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">stepSize</span><span class="p">:</span> <span class="n">float</span> <span class="o">=</span> <span class="default_value">0.1</span></em>, <em class="sig-param"><span class="n">numIterations</span><span class="p">:</span> <span class="n">int</span> <span class="o">=</span> <span class="default_value">50</span></em>, <em class="sig-param"><span class="n">miniBatchFraction</span><span class="p">:</span> <span class="n">float</span> <span class="o">=</span> <span class="default_value">1.0</span></em>, <em class="sig-param"><span class="n">regParam</span><span class="p">:</span> <span class="n">float</span> <span class="o">=</span> <span class="default_value">0.0</span></em>, <em class="sig-param"><span class="n">convergenceTol</span><span class="p">:</span> <span class="n">float</span> <span class="o">=</span> <span class="default_value">0.001</span></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/pyspark/mllib/classification.html#StreamingLogisticRegressionWithSGD"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD" title="Permalink to this definition"></a></dt>
<dd><p>Train or predict a logistic regression model on streaming data.
Training uses Stochastic Gradient Descent to update the model based on
each new batch of incoming data from a DStream.</p>
<p>Each batch of data is assumed to be an RDD of LabeledPoints.
The number of data points per batch can vary, but the number
of features must be constant. An initial weight
vector must be provided.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 1.5.0.</span></p>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><dl class="simple">
<dt><strong>stepSize</strong><span class="classifier">float, optional</span></dt><dd><p>Step size for each iteration of gradient descent.
(default: 0.1)</p>
</dd>
<dt><strong>numIterations</strong><span class="classifier">int, optional</span></dt><dd><p>Number of iterations run for each batch of data.
(default: 50)</p>
</dd>
<dt><strong>miniBatchFraction</strong><span class="classifier">float, optional</span></dt><dd><p>Fraction of each batch of data to use for updates.
(default: 1.0)</p>
</dd>
<dt><strong>regParam</strong><span class="classifier">float, optional</span></dt><dd><p>L2 Regularization parameter.
(default: 0.0)</p>
</dd>
<dt><strong>convergenceTol</strong><span class="classifier">float, optional</span></dt><dd><p>Value used to determine when to terminate iterations.
(default: 0.001)</p>
</dd>
</dl>
</dd>
</dl>
<p class="rubric">Methods</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.latestModel" title="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.latestModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">latestModel</span></code></a>()</p></td>
<td><p>Returns the latest model.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOn" title="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">predictOn</span></code></a>(dstream)</p></td>
<td><p>Use the model to make predictions on batches of data from a DStream.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOnValues" title="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOnValues"><code class="xref py py-obj docutils literal notranslate"><span class="pre">predictOnValues</span></code></a>(dstream)</p></td>
<td><p>Use the model to make predictions on the values of a DStream and carry over its keys.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.setInitialWeights" title="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.setInitialWeights"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setInitialWeights</span></code></a>(initialWeights)</p></td>
<td><p>Set the initial value of weights.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.trainOn" title="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.trainOn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">trainOn</span></code></a>(dstream)</p></td>
<td><p>Train the model on the incoming dstream.</p></td>
</tr>
</tbody>
</table>
<p class="rubric">Methods Documentation</p>
<dl class="py method">
<dt id="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.latestModel">
<code class="sig-name descname">latestModel</code><span class="sig-paren">(</span><span class="sig-paren">)</span> &#x2192; Optional<span class="p">[</span><a class="reference internal" href="pyspark.mllib.regression.LinearModel.html#pyspark.mllib.regression.LinearModel" title="pyspark.mllib.regression.LinearModel">pyspark.mllib.regression.LinearModel</a><span class="p">]</span><a class="headerlink" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.latestModel" title="Permalink to this definition"></a></dt>
<dd><p>Returns the latest model.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 1.5.0.</span></p>
</div>
</dd></dl>
<dl class="py method">
<dt id="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOn">
<code class="sig-name descname">predictOn</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">dstream</span><span class="p">:</span> <span class="n">DStream<span class="p">[</span>VectorLike<span class="p">]</span></span></em><span class="sig-paren">)</span> &#x2192; DStream<span class="p">[</span>float<span class="p">]</span><a class="headerlink" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOn" title="Permalink to this definition"></a></dt>
<dd><p>Use the model to make predictions on batches of data from a
DStream.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 1.5.0.</span></p>
</div>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><dl class="simple">
<dt><a class="reference internal" href="pyspark.streaming.DStream.html#pyspark.streaming.DStream" title="pyspark.streaming.DStream"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.streaming.DStream</span></code></a></dt><dd><p>DStream containing predictions.</p>
</dd>
</dl>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOnValues">
<code class="sig-name descname">predictOnValues</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">dstream</span><span class="p">:</span> <span class="n">DStream<span class="p">[</span>Tuple<span class="p">[</span>K<span class="p">, </span>VectorLike<span class="p">]</span><span class="p">]</span></span></em><span class="sig-paren">)</span> &#x2192; DStream<span class="p">[</span>Tuple<span class="p">[</span>K<span class="p">, </span>float<span class="p">]</span><span class="p">]</span><a class="headerlink" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.predictOnValues" title="Permalink to this definition"></a></dt>
<dd><p>Use the model to make predictions on the values of a DStream and
carry over its keys.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 1.5.0.</span></p>
</div>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><dl class="simple">
<dt><a class="reference internal" href="pyspark.streaming.DStream.html#pyspark.streaming.DStream" title="pyspark.streaming.DStream"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.streaming.DStream</span></code></a></dt><dd><p>DStream containing predictions.</p>
</dd>
</dl>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt id="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.setInitialWeights">
<code class="sig-name descname">setInitialWeights</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">initialWeights</span><span class="p">:</span> <span class="n">VectorLike</span></em><span class="sig-paren">)</span> &#x2192; StreamingLogisticRegressionWithSGD<a class="reference internal" href="../../_modules/pyspark/mllib/classification.html#StreamingLogisticRegressionWithSGD.setInitialWeights"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.setInitialWeights" title="Permalink to this definition"></a></dt>
<dd><p>Set the initial value of weights.</p>
<p>This must be set before running trainOn and predictOn.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 1.5.0.</span></p>
</div>
</dd></dl>
<dl class="py method">
<dt id="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.trainOn">
<code class="sig-name descname">trainOn</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">dstream</span><span class="p">:</span> <span class="n">pyspark.streaming.dstream.DStream<span class="p">[</span><a class="reference internal" href="pyspark.mllib.regression.LabeledPoint.html#pyspark.mllib.regression.LabeledPoint" title="pyspark.mllib.regression.LabeledPoint">pyspark.mllib.regression.LabeledPoint</a><span class="p">]</span></span></em><span class="sig-paren">)</span> &#x2192; None<a class="reference internal" href="../../_modules/pyspark/mllib/classification.html#StreamingLogisticRegressionWithSGD.trainOn"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.trainOn" title="Permalink to this definition"></a></dt>
<dd><p>Train the model on the incoming dstream.</p>
<div class="versionadded">
<p><span class="versionmodified added">New in version 1.5.0.</span></p>
</div>
</dd></dl>
</dd></dl>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="pyspark.mllib.classification.NaiveBayes.html" title="previous page">NaiveBayes</a>
<a class='right-next' id="next-link" href="pyspark.mllib.clustering.BisectingKMeansModel.html" title="next page">BisectingKMeansModel</a>
</div>
</main>
</div>
</div>
<script src="../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>