| |
| <!DOCTYPE html> |
| |
| <html> |
| <head> |
| <meta charset="utf-8" /> |
| <title>MLlib (RDD-based) — PySpark 3.3.1 documentation</title> |
| |
| <link rel="stylesheet" href="../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css"> |
| |
| |
| <link rel="stylesheet" |
| href="../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| <link rel="stylesheet" |
| href="../_static/vendor/open-sans_all/1.44.1/index.css"> |
| <link rel="stylesheet" |
| href="../_static/vendor/lato_latin-ext/1.44.1/index.css"> |
| |
| |
| <link rel="stylesheet" href="../_static/basic.css" type="text/css" /> |
| <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> |
| <link rel="stylesheet" type="text/css" href="../_static/css/pyspark.css" /> |
| |
| <link rel="preload" as="script" href="../_static/js/index.3da636dd464baa7582d2.js"> |
| |
| <script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script> |
| <script src="../_static/jquery.js"></script> |
| <script src="../_static/underscore.js"></script> |
| <script src="../_static/doctools.js"></script> |
| <script src="../_static/language_data.js"></script> |
| <script src="../_static/copybutton.js"></script> |
| <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> |
| <script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| <script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script> |
| <link rel="search" title="Search" href="../search.html" /> |
| <link rel="next" title="LogisticRegressionModel" href="api/pyspark.mllib.classification.LogisticRegressionModel.html" /> |
| <link rel="prev" title="pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON" href="api/pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="en" /> |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"> |
| <div class="container-xl"> |
| |
| <a class="navbar-brand" href="../index.html"> |
| |
| <img src="../_static/spark-logo-reverse.png" class="logo" alt="logo" /> |
| |
| </a> |
| <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| |
| <div id="navbar-menu" class="col-lg-9 collapse navbar-collapse"> |
| <ul id="navbar-main-elements" class="navbar-nav mr-auto"> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../getting_started/index.html">Getting Started</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../user_guide/index.html">User Guide</a> |
| </li> |
| |
| <li class="nav-item active"> |
| <a class="nav-link" href="index.html">API Reference</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../development/index.html">Development</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../migration_guide/index.html">Migration Guide</a> |
| </li> |
| |
| |
| </ul> |
| |
| |
| |
| |
| <ul class="navbar-nav"> |
| |
| |
| </ul> |
| </div> |
| </div> |
| </nav> |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| <div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form> |
| <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| |
| <div class="bd-toc-item active"> |
| |
| |
| <ul class="nav bd-sidenav"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.sql/index.html">Spark SQL</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.pandas/index.html">Pandas API on Spark</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.ss/index.html">Structured Streaming</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.ml.html">MLlib (DataFrame-based)</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.streaming.html">Spark Streaming</a> |
| </li> |
| |
| |
| |
| <li class="active"> |
| <a href="">MLlib (RDD-based)</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.html">Spark Core</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="pyspark.resource.html">Resource Management</a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| </ul> |
| |
| </nav> |
| </div> |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| <div class="tocsection onthispage pt-5 pb-3"> |
| <i class="fas fa-list"></i> On this page |
| </div> |
| |
| <nav id="bd-toc-nav"> |
| <ul class="nav section-nav flex-column"> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#classification" class="nav-link">Classification</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#clustering" class="nav-link">Clustering</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#evaluation" class="nav-link">Evaluation</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#feature" class="nav-link">Feature</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#frequency-pattern-mining" class="nav-link">Frequency Pattern Mining</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#vector-and-matrix" class="nav-link">Vector and Matrix</a><ul class="nav section-nav flex-column"> |
| |
| <li class="nav-item toc-entry toc-h3"> |
| <a href="#distributed-representation" class="nav-link">Distributed Representation</a> |
| </li> |
| |
| </ul> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#random" class="nav-link">Random</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#recommendation" class="nav-link">Recommendation</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#regression" class="nav-link">Regression</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#statistics" class="nav-link">Statistics</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#tree" class="nav-link">Tree</a> |
| </li> |
| |
| <li class="nav-item toc-entry toc-h2"> |
| <a href="#utilities" class="nav-link">Utilities</a> |
| </li> |
| |
| </ul> |
| </nav> |
| |
| |
| |
| </div> |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <div class="section" id="mllib-rdd-based"> |
| <h1>MLlib (RDD-based)<a class="headerlink" href="#mllib-rdd-based" title="Permalink to this headline">¶</a></h1> |
| <div class="section" id="classification"> |
| <h2>Classification<a class="headerlink" href="#classification" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.LogisticRegressionModel.html#pyspark.mllib.classification.LogisticRegressionModel" title="pyspark.mllib.classification.LogisticRegressionModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LogisticRegressionModel</span></code></a>(weights, intercept, …)</p></td> |
| <td><p>Classification model trained using Multinomial/Binary Logistic Regression.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.LogisticRegressionWithSGD.html#pyspark.mllib.classification.LogisticRegressionWithSGD" title="pyspark.mllib.classification.LogisticRegressionWithSGD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LogisticRegressionWithSGD</span></code></a></p></td> |
| <td><p>Train a classification model for Binary Logistic Regression using Stochastic Gradient Descent.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.LogisticRegressionWithLBFGS.html#pyspark.mllib.classification.LogisticRegressionWithLBFGS" title="pyspark.mllib.classification.LogisticRegressionWithLBFGS"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LogisticRegressionWithLBFGS</span></code></a></p></td> |
| <td><p>Train a classification model for Multinomial/Binary Logistic Regression using Limited-memory BFGS.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.SVMModel.html#pyspark.mllib.classification.SVMModel" title="pyspark.mllib.classification.SVMModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SVMModel</span></code></a>(weights, intercept)</p></td> |
| <td><p>Model for Support Vector Machines (SVMs).</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.SVMWithSGD.html#pyspark.mllib.classification.SVMWithSGD" title="pyspark.mllib.classification.SVMWithSGD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SVMWithSGD</span></code></a></p></td> |
| <td><p>Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.NaiveBayesModel.html#pyspark.mllib.classification.NaiveBayesModel" title="pyspark.mllib.classification.NaiveBayesModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">NaiveBayesModel</span></code></a>(labels, pi, theta)</p></td> |
| <td><p>Model for Naive Bayes classifiers.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.NaiveBayes.html#pyspark.mllib.classification.NaiveBayes" title="pyspark.mllib.classification.NaiveBayes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">NaiveBayes</span></code></a></p></td> |
| <td><p>Train a Multinomial Naive Bayes model.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.html#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD" title="pyspark.mllib.classification.StreamingLogisticRegressionWithSGD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingLogisticRegressionWithSGD</span></code></a>([…])</p></td> |
| <td><p>Train or predict a logistic regression model on streaming data.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="clustering"> |
| <h2>Clustering<a class="headerlink" href="#clustering" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.BisectingKMeansModel.html#pyspark.mllib.clustering.BisectingKMeansModel" title="pyspark.mllib.clustering.BisectingKMeansModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BisectingKMeansModel</span></code></a>(java_model)</p></td> |
| <td><p>A clustering model derived from the bisecting k-means method.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.BisectingKMeans.html#pyspark.mllib.clustering.BisectingKMeans" title="pyspark.mllib.clustering.BisectingKMeans"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BisectingKMeans</span></code></a></p></td> |
| <td><p>A bisecting k-means algorithm based on the paper “A comparison of document clustering techniques” by Steinbach, Karypis, and Kumar, with modification to fit Spark.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.KMeansModel.html#pyspark.mllib.clustering.KMeansModel" title="pyspark.mllib.clustering.KMeansModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">KMeansModel</span></code></a>(centers)</p></td> |
| <td><p>A clustering model derived from the k-means method.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.KMeans.html#pyspark.mllib.clustering.KMeans" title="pyspark.mllib.clustering.KMeans"><code class="xref py py-obj docutils literal notranslate"><span class="pre">KMeans</span></code></a></p></td> |
| <td><p>K-means clustering.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.GaussianMixtureModel.html#pyspark.mllib.clustering.GaussianMixtureModel" title="pyspark.mllib.clustering.GaussianMixtureModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GaussianMixtureModel</span></code></a>(java_model)</p></td> |
| <td><p>A clustering model derived from the Gaussian Mixture Model method.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.GaussianMixture.html#pyspark.mllib.clustering.GaussianMixture" title="pyspark.mllib.clustering.GaussianMixture"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GaussianMixture</span></code></a></p></td> |
| <td><p>Learning algorithm for Gaussian Mixtures using the expectation-maximization algorithm.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.PowerIterationClusteringModel.html#pyspark.mllib.clustering.PowerIterationClusteringModel" title="pyspark.mllib.clustering.PowerIterationClusteringModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PowerIterationClusteringModel</span></code></a>(java_model)</p></td> |
| <td><p>Model produced by <a class="reference internal" href="api/pyspark.mllib.clustering.PowerIterationClustering.html#pyspark.mllib.clustering.PowerIterationClustering" title="pyspark.mllib.clustering.PowerIterationClustering"><code class="xref py py-class docutils literal notranslate"><span class="pre">PowerIterationClustering</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.PowerIterationClustering.html#pyspark.mllib.clustering.PowerIterationClustering" title="pyspark.mllib.clustering.PowerIterationClustering"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PowerIterationClustering</span></code></a></p></td> |
| <td><p>Power Iteration Clustering (PIC), a scalable graph clustering algorithm.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.StreamingKMeans.html#pyspark.mllib.clustering.StreamingKMeans" title="pyspark.mllib.clustering.StreamingKMeans"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingKMeans</span></code></a>([k, decayFactor, timeUnit])</p></td> |
| <td><p>Provides methods to set k, decayFactor, timeUnit to configure the KMeans algorithm for fitting and predicting on incoming dstreams.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.StreamingKMeansModel.html#pyspark.mllib.clustering.StreamingKMeansModel" title="pyspark.mllib.clustering.StreamingKMeansModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingKMeansModel</span></code></a>(clusterCenters, …)</p></td> |
| <td><p>Clustering model which can perform an online update of the centroids.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.LDA.html#pyspark.mllib.clustering.LDA" title="pyspark.mllib.clustering.LDA"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LDA</span></code></a></p></td> |
| <td><p>Train Latent Dirichlet Allocation (LDA) model.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.clustering.LDAModel.html#pyspark.mllib.clustering.LDAModel" title="pyspark.mllib.clustering.LDAModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LDAModel</span></code></a>(java_model)</p></td> |
| <td><p>A clustering model derived from the LDA method.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="evaluation"> |
| <h2>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.evaluation.BinaryClassificationMetrics.html#pyspark.mllib.evaluation.BinaryClassificationMetrics" title="pyspark.mllib.evaluation.BinaryClassificationMetrics"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BinaryClassificationMetrics</span></code></a>(scoreAndLabels)</p></td> |
| <td><p>Evaluator for binary classification.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.evaluation.RegressionMetrics.html#pyspark.mllib.evaluation.RegressionMetrics" title="pyspark.mllib.evaluation.RegressionMetrics"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RegressionMetrics</span></code></a>(predictionAndObservations)</p></td> |
| <td><p>Evaluator for regression.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics" title="pyspark.mllib.evaluation.MulticlassMetrics"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MulticlassMetrics</span></code></a>(predictionAndLabels)</p></td> |
| <td><p>Evaluator for multiclass classification.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.evaluation.RankingMetrics.html#pyspark.mllib.evaluation.RankingMetrics" title="pyspark.mllib.evaluation.RankingMetrics"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RankingMetrics</span></code></a>(predictionAndLabels)</p></td> |
| <td><p>Evaluator for ranking algorithms.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="feature"> |
| <h2>Feature<a class="headerlink" href="#feature" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.Normalizer.html#pyspark.mllib.feature.Normalizer" title="pyspark.mllib.feature.Normalizer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Normalizer</span></code></a>([p])</p></td> |
| <td><p>Normalizes samples individually to unit L<sup>p</sup> norm</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.StandardScalerModel.html#pyspark.mllib.feature.StandardScalerModel" title="pyspark.mllib.feature.StandardScalerModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StandardScalerModel</span></code></a>(java_model)</p></td> |
| <td><p>Represents a StandardScaler model that can transform vectors.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.StandardScaler.html#pyspark.mllib.feature.StandardScaler" title="pyspark.mllib.feature.StandardScaler"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StandardScaler</span></code></a>([withMean, withStd])</p></td> |
| <td><p>Standardizes features by removing the mean and scaling to unit variance using column summary statistics on the samples in the training set.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.HashingTF.html#pyspark.mllib.feature.HashingTF" title="pyspark.mllib.feature.HashingTF"><code class="xref py py-obj docutils literal notranslate"><span class="pre">HashingTF</span></code></a>([numFeatures])</p></td> |
| <td><p>Maps a sequence of terms to their term frequencies using the hashing trick.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.IDFModel.html#pyspark.mllib.feature.IDFModel" title="pyspark.mllib.feature.IDFModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IDFModel</span></code></a>(java_model)</p></td> |
| <td><p>Represents an IDF model that can transform term frequency vectors.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.IDF.html#pyspark.mllib.feature.IDF" title="pyspark.mllib.feature.IDF"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IDF</span></code></a>([minDocFreq])</p></td> |
| <td><p>Inverse document frequency (IDF).</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.Word2Vec.html#pyspark.mllib.feature.Word2Vec" title="pyspark.mllib.feature.Word2Vec"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Word2Vec</span></code></a>()</p></td> |
| <td><p>Word2Vec creates vector representation of words in a text corpus.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.Word2VecModel.html#pyspark.mllib.feature.Word2VecModel" title="pyspark.mllib.feature.Word2VecModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Word2VecModel</span></code></a>(java_model)</p></td> |
| <td><p>class for Word2Vec model</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.ChiSqSelector.html#pyspark.mllib.feature.ChiSqSelector" title="pyspark.mllib.feature.ChiSqSelector"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ChiSqSelector</span></code></a>([numTopFeatures, …])</p></td> |
| <td><p>Creates a ChiSquared feature selector.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.ChiSqSelectorModel.html#pyspark.mllib.feature.ChiSqSelectorModel" title="pyspark.mllib.feature.ChiSqSelectorModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ChiSqSelectorModel</span></code></a>(java_model)</p></td> |
| <td><p>Represents a Chi Squared selector model.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.feature.ElementwiseProduct.html#pyspark.mllib.feature.ElementwiseProduct" title="pyspark.mllib.feature.ElementwiseProduct"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ElementwiseProduct</span></code></a>(scalingVector)</p></td> |
| <td><p>Scales each column of the vector, with the supplied weight vector.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="frequency-pattern-mining"> |
| <h2>Frequency Pattern Mining<a class="headerlink" href="#frequency-pattern-mining" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.fpm.FPGrowth.html#pyspark.mllib.fpm.FPGrowth" title="pyspark.mllib.fpm.FPGrowth"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FPGrowth</span></code></a></p></td> |
| <td><p>A Parallel FP-growth algorithm to mine frequent itemsets.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.fpm.FPGrowthModel.html#pyspark.mllib.fpm.FPGrowthModel" title="pyspark.mllib.fpm.FPGrowthModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FPGrowthModel</span></code></a>(java_model)</p></td> |
| <td><p>A FP-Growth model for mining frequent itemsets using the Parallel FP-Growth algorithm.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.fpm.PrefixSpan.html#pyspark.mllib.fpm.PrefixSpan" title="pyspark.mllib.fpm.PrefixSpan"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PrefixSpan</span></code></a></p></td> |
| <td><p>A parallel PrefixSpan algorithm to mine frequent sequential patterns.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.fpm.PrefixSpanModel.html#pyspark.mllib.fpm.PrefixSpanModel" title="pyspark.mllib.fpm.PrefixSpanModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PrefixSpanModel</span></code></a>(java_model)</p></td> |
| <td><p>Model fitted by PrefixSpan</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="vector-and-matrix"> |
| <h2>Vector and Matrix<a class="headerlink" href="#vector-and-matrix" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.Vector.html#pyspark.mllib.linalg.Vector" title="pyspark.mllib.linalg.Vector"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Vector</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.DenseVector.html#pyspark.mllib.linalg.DenseVector" title="pyspark.mllib.linalg.DenseVector"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DenseVector</span></code></a>(ar)</p></td> |
| <td><p>A dense vector represented by a value array.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.SparseVector.html#pyspark.mllib.linalg.SparseVector" title="pyspark.mllib.linalg.SparseVector"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparseVector</span></code></a>(size, *args)</p></td> |
| <td><p>A simple sparse vector class for passing data to MLlib.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.Vectors.html#pyspark.mllib.linalg.Vectors" title="pyspark.mllib.linalg.Vectors"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Vectors</span></code></a></p></td> |
| <td><p>Factory methods for working with vectors.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.Matrix.html#pyspark.mllib.linalg.Matrix" title="pyspark.mllib.linalg.Matrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Matrix</span></code></a>(numRows, numCols[, isTransposed])</p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.DenseMatrix.html#pyspark.mllib.linalg.DenseMatrix" title="pyspark.mllib.linalg.DenseMatrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DenseMatrix</span></code></a>(numRows, numCols, values[, …])</p></td> |
| <td><p>Column-major dense matrix.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.SparseMatrix.html#pyspark.mllib.linalg.SparseMatrix" title="pyspark.mllib.linalg.SparseMatrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SparseMatrix</span></code></a>(numRows, numCols, colPtrs, …)</p></td> |
| <td><p>Sparse Matrix stored in CSC format.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.Matrices.html#pyspark.mllib.linalg.Matrices" title="pyspark.mllib.linalg.Matrices"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Matrices</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.QRDecomposition.html#pyspark.mllib.linalg.QRDecomposition" title="pyspark.mllib.linalg.QRDecomposition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QRDecomposition</span></code></a>(Q, R)</p></td> |
| <td><p>Represents QR factors.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <div class="section" id="distributed-representation"> |
| <h3>Distributed Representation<a class="headerlink" href="#distributed-representation" title="Permalink to this headline">¶</a></h3> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.BlockMatrix.html#pyspark.mllib.linalg.distributed.BlockMatrix" title="pyspark.mllib.linalg.distributed.BlockMatrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BlockMatrix</span></code></a>(blocks, rowsPerBlock, colsPerBlock)</p></td> |
| <td><p>Represents a distributed matrix in blocks of local matrices.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.CoordinateMatrix.html#pyspark.mllib.linalg.distributed.CoordinateMatrix" title="pyspark.mllib.linalg.distributed.CoordinateMatrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">CoordinateMatrix</span></code></a>(entries[, numRows, numCols])</p></td> |
| <td><p>Represents a matrix in coordinate format.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.DistributedMatrix.html#pyspark.mllib.linalg.distributed.DistributedMatrix" title="pyspark.mllib.linalg.distributed.DistributedMatrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DistributedMatrix</span></code></a></p></td> |
| <td><p>Represents a distributively stored matrix backed by one or more RDDs.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.IndexedRow.html#pyspark.mllib.linalg.distributed.IndexedRow" title="pyspark.mllib.linalg.distributed.IndexedRow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IndexedRow</span></code></a>(index, vector)</p></td> |
| <td><p>Represents a row of an IndexedRowMatrix.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.IndexedRowMatrix.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix" title="pyspark.mllib.linalg.distributed.IndexedRowMatrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IndexedRowMatrix</span></code></a>(rows[, numRows, numCols])</p></td> |
| <td><p>Represents a row-oriented distributed Matrix with indexed rows.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.MatrixEntry.html#pyspark.mllib.linalg.distributed.MatrixEntry" title="pyspark.mllib.linalg.distributed.MatrixEntry"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MatrixEntry</span></code></a>(i, j, value)</p></td> |
| <td><p>Represents an entry of a CoordinateMatrix.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.RowMatrix.html#pyspark.mllib.linalg.distributed.RowMatrix" title="pyspark.mllib.linalg.distributed.RowMatrix"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RowMatrix</span></code></a>(rows[, numRows, numCols])</p></td> |
| <td><p>Represents a row-oriented distributed Matrix with no meaningful row indices.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.linalg.distributed.SingularValueDecomposition.html#pyspark.mllib.linalg.distributed.SingularValueDecomposition" title="pyspark.mllib.linalg.distributed.SingularValueDecomposition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SingularValueDecomposition</span></code></a>(java_model)</p></td> |
| <td><p>Represents singular value decomposition (SVD) factors.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| <div class="section" id="random"> |
| <h2>Random<a class="headerlink" href="#random" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.random.RandomRDDs.html#pyspark.mllib.random.RandomRDDs" title="pyspark.mllib.random.RandomRDDs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RandomRDDs</span></code></a></p></td> |
| <td><p>Generator methods for creating RDDs comprised of i.i.d samples from some distribution.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="recommendation"> |
| <h2>Recommendation<a class="headerlink" href="#recommendation" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.recommendation.MatrixFactorizationModel.html#pyspark.mllib.recommendation.MatrixFactorizationModel" title="pyspark.mllib.recommendation.MatrixFactorizationModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MatrixFactorizationModel</span></code></a>(java_model)</p></td> |
| <td><p>A matrix factorisation model trained by regularized alternating least-squares.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.recommendation.ALS.html#pyspark.mllib.recommendation.ALS" title="pyspark.mllib.recommendation.ALS"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ALS</span></code></a></p></td> |
| <td><p>Alternating Least Squares matrix factorization</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.recommendation.Rating.html#pyspark.mllib.recommendation.Rating" title="pyspark.mllib.recommendation.Rating"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Rating</span></code></a></p></td> |
| <td><p>Represents a (user, product, rating) tuple.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="regression"> |
| <h2>Regression<a class="headerlink" href="#regression" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.LabeledPoint.html#pyspark.mllib.regression.LabeledPoint" title="pyspark.mllib.regression.LabeledPoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LabeledPoint</span></code></a>(label, features)</p></td> |
| <td><p>Class that represents the features and labels of a data point.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.LinearModel.html#pyspark.mllib.regression.LinearModel" title="pyspark.mllib.regression.LinearModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LinearModel</span></code></a>(weights, intercept)</p></td> |
| <td><p>A linear model that has a vector of coefficients and an intercept.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.LinearRegressionModel.html#pyspark.mllib.regression.LinearRegressionModel" title="pyspark.mllib.regression.LinearRegressionModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LinearRegressionModel</span></code></a>(weights, intercept)</p></td> |
| <td><p>A linear regression model derived from a least-squares fit.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.LinearRegressionWithSGD.html#pyspark.mllib.regression.LinearRegressionWithSGD" title="pyspark.mllib.regression.LinearRegressionWithSGD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LinearRegressionWithSGD</span></code></a></p></td> |
| <td><p>Train a linear regression model with no regularization using Stochastic Gradient Descent.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.RidgeRegressionModel.html#pyspark.mllib.regression.RidgeRegressionModel" title="pyspark.mllib.regression.RidgeRegressionModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RidgeRegressionModel</span></code></a>(weights, intercept)</p></td> |
| <td><p>A linear regression model derived from a least-squares fit with an l_2 penalty term.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.RidgeRegressionWithSGD.html#pyspark.mllib.regression.RidgeRegressionWithSGD" title="pyspark.mllib.regression.RidgeRegressionWithSGD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RidgeRegressionWithSGD</span></code></a></p></td> |
| <td><p>Train a regression model with L2-regularization using Stochastic Gradient Descent.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.LassoModel.html#pyspark.mllib.regression.LassoModel" title="pyspark.mllib.regression.LassoModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LassoModel</span></code></a>(weights, intercept)</p></td> |
| <td><p>A linear regression model derived from a least-squares fit with an l_1 penalty term.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.LassoWithSGD.html#pyspark.mllib.regression.LassoWithSGD" title="pyspark.mllib.regression.LassoWithSGD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LassoWithSGD</span></code></a></p></td> |
| <td><p>Train a regression model with L1-regularization using Stochastic Gradient Descent.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.IsotonicRegressionModel.html#pyspark.mllib.regression.IsotonicRegressionModel" title="pyspark.mllib.regression.IsotonicRegressionModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IsotonicRegressionModel</span></code></a>(boundaries, …)</p></td> |
| <td><p>Regression model for isotonic regression.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.IsotonicRegression.html#pyspark.mllib.regression.IsotonicRegression" title="pyspark.mllib.regression.IsotonicRegression"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IsotonicRegression</span></code></a></p></td> |
| <td><p>Isotonic regression.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.StreamingLinearAlgorithm.html#pyspark.mllib.regression.StreamingLinearAlgorithm" title="pyspark.mllib.regression.StreamingLinearAlgorithm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingLinearAlgorithm</span></code></a>(model)</p></td> |
| <td><p>Base class that has to be inherited by any StreamingLinearAlgorithm.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.regression.StreamingLinearRegressionWithSGD.html#pyspark.mllib.regression.StreamingLinearRegressionWithSGD" title="pyspark.mllib.regression.StreamingLinearRegressionWithSGD"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StreamingLinearRegressionWithSGD</span></code></a>([stepSize, …])</p></td> |
| <td><p>Train or predict a linear regression model on streaming data.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="statistics"> |
| <h2>Statistics<a class="headerlink" href="#statistics" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.stat.Statistics.html#pyspark.mllib.stat.Statistics" title="pyspark.mllib.stat.Statistics"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Statistics</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.stat.MultivariateStatisticalSummary.html#pyspark.mllib.stat.MultivariateStatisticalSummary" title="pyspark.mllib.stat.MultivariateStatisticalSummary"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MultivariateStatisticalSummary</span></code></a>(java_model)</p></td> |
| <td><p>Trait for multivariate statistical summary of a data matrix.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.stat.ChiSqTestResult.html#pyspark.mllib.stat.ChiSqTestResult" title="pyspark.mllib.stat.ChiSqTestResult"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ChiSqTestResult</span></code></a>(java_model)</p></td> |
| <td><p>Contains test results for the chi-squared hypothesis test.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.stat.MultivariateGaussian.html#pyspark.mllib.stat.MultivariateGaussian" title="pyspark.mllib.stat.MultivariateGaussian"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MultivariateGaussian</span></code></a></p></td> |
| <td><p>Represents a (mu, sigma) tuple</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.stat.KernelDensity.html#pyspark.mllib.stat.KernelDensity" title="pyspark.mllib.stat.KernelDensity"><code class="xref py py-obj docutils literal notranslate"><span class="pre">KernelDensity</span></code></a>()</p></td> |
| <td><p>Estimate probability density at required points given an RDD of samples from the population.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.stat.ChiSqTestResult.html#pyspark.mllib.stat.ChiSqTestResult" title="pyspark.mllib.stat.ChiSqTestResult"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ChiSqTestResult</span></code></a>(java_model)</p></td> |
| <td><p>Contains test results for the chi-squared hypothesis test.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.stat.KolmogorovSmirnovTestResult.html#pyspark.mllib.stat.KolmogorovSmirnovTestResult" title="pyspark.mllib.stat.KolmogorovSmirnovTestResult"><code class="xref py py-obj docutils literal notranslate"><span class="pre">KolmogorovSmirnovTestResult</span></code></a>(java_model)</p></td> |
| <td><p>Contains test results for the Kolmogorov-Smirnov test.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="tree"> |
| <h2>Tree<a class="headerlink" href="#tree" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.tree.DecisionTreeModel.html#pyspark.mllib.tree.DecisionTreeModel" title="pyspark.mllib.tree.DecisionTreeModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DecisionTreeModel</span></code></a>(java_model)</p></td> |
| <td><p>A decision tree model for classification or regression.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.tree.DecisionTree.html#pyspark.mllib.tree.DecisionTree" title="pyspark.mllib.tree.DecisionTree"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DecisionTree</span></code></a></p></td> |
| <td><p>Learning algorithm for a decision tree model for classification or regression.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.tree.RandomForestModel.html#pyspark.mllib.tree.RandomForestModel" title="pyspark.mllib.tree.RandomForestModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RandomForestModel</span></code></a>(java_model)</p></td> |
| <td><p>Represents a random forest model.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.tree.RandomForest.html#pyspark.mllib.tree.RandomForest" title="pyspark.mllib.tree.RandomForest"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RandomForest</span></code></a></p></td> |
| <td><p>Learning algorithm for a random forest model for classification or regression.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.tree.GradientBoostedTreesModel.html#pyspark.mllib.tree.GradientBoostedTreesModel" title="pyspark.mllib.tree.GradientBoostedTreesModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GradientBoostedTreesModel</span></code></a>(java_model)</p></td> |
| <td><p>Represents a gradient-boosted tree model.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.tree.GradientBoostedTrees.html#pyspark.mllib.tree.GradientBoostedTrees" title="pyspark.mllib.tree.GradientBoostedTrees"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GradientBoostedTrees</span></code></a></p></td> |
| <td><p>Learning algorithm for a gradient boosted trees model for classification or regression.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| <div class="section" id="utilities"> |
| <h2>Utilities<a class="headerlink" href="#utilities" title="Permalink to this headline">¶</a></h2> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.util.JavaLoader.html#pyspark.mllib.util.JavaLoader" title="pyspark.mllib.util.JavaLoader"><code class="xref py py-obj docutils literal notranslate"><span class="pre">JavaLoader</span></code></a></p></td> |
| <td><p>Mixin for classes which can load saved models using its Scala implementation.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.util.JavaSaveable.html#pyspark.mllib.util.JavaSaveable" title="pyspark.mllib.util.JavaSaveable"><code class="xref py py-obj docutils literal notranslate"><span class="pre">JavaSaveable</span></code></a></p></td> |
| <td><p>Mixin for models that provide save() through their Scala implementation.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.util.LinearDataGenerator.html#pyspark.mllib.util.LinearDataGenerator" title="pyspark.mllib.util.LinearDataGenerator"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LinearDataGenerator</span></code></a></p></td> |
| <td><p>Utils for generating linear data.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.util.Loader.html#pyspark.mllib.util.Loader" title="pyspark.mllib.util.Loader"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Loader</span></code></a></p></td> |
| <td><p>Mixin for classes which can load saved models from files.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="api/pyspark.mllib.util.MLUtils.html#pyspark.mllib.util.MLUtils" title="pyspark.mllib.util.MLUtils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MLUtils</span></code></a></p></td> |
| <td><p>Helper methods to load, save and pre-process data used in MLlib.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="api/pyspark.mllib.util.Saveable.html#pyspark.mllib.util.Saveable" title="pyspark.mllib.util.Saveable"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Saveable</span></code></a></p></td> |
| <td><p>Mixin for models and transformers which may be saved as files.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| </div> |
| </div> |
| |
| |
| </div> |
| |
| |
| <div class='prev-next-bottom'> |
| |
| <a class='left-prev' id="prev-link" href="api/pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON.html" title="previous page">pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON</a> |
| <a class='right-next' id="next-link" href="api/pyspark.mllib.classification.LogisticRegressionModel.html" title="next page">LogisticRegressionModel</a> |
| |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| |
| <script src="../_static/js/index.3da636dd464baa7582d2.js"></script> |
| |
| |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| <p> |
| © Copyright .<br/> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/> |
| </p> |
| </div> |
| </footer> |
| </body> |
| </html> |