blob: 58cac1c238a24ac57cbe49d6687e80ce8a125a54 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Vectors &#8212; PySpark 3.3.4 documentation</title>
<link rel="stylesheet" href="../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/language_data.js"></script>
<script src="../../_static/clipboard.min.js"></script>
<script src="../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.linalg.Vectors.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Matrix" href="pyspark.ml.linalg.Matrix.html" />
<link rel="prev" title="SparseVector" href="pyspark.ml.linalg.SparseVector.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../index.html">
<img src="../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../user_guide/index.html">User Guide</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../migration_guide/index.html">Migration Guide</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="">
<a href="../pyspark.sql/index.html">Spark SQL</a>
</li>
<li class="">
<a href="../pyspark.pandas/index.html">Pandas API on Spark</a>
</li>
<li class="">
<a href="../pyspark.ss/index.html">Structured Streaming</a>
</li>
<li class="active">
<a href="../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="">
<a href="../pyspark.streaming.html">Spark Streaming</a>
</li>
<li class="">
<a href="../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../pyspark.resource.html">Resource Management</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="vectors">
<h1>Vectors<a class="headerlink" href="#vectors" title="Permalink to this headline"></a></h1>
<dl class="py class">
<dt id="pyspark.ml.linalg.Vectors">
<em class="property">class </em><code class="sig-prename descclassname">pyspark.ml.linalg.</code><code class="sig-name descname">Vectors</code><a class="reference internal" href="../../_modules/pyspark/ml/linalg.html#Vectors"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.linalg.Vectors" title="Permalink to this definition"></a></dt>
<dd><p>Factory methods for working with vectors.</p>
<p class="rubric">Notes</p>
<p>Dense vectors are simply represented as NumPy array objects,
so there is no need to covert them for use in MLlib. For sparse vectors,
the factory methods in this class create an MLlib-compatible type, or users
can pass in SciPy’s <cite>scipy.sparse</cite> column vectors.</p>
<p class="rubric">Methods</p>
<table class="longtable table autosummary">
<colgroup>
<col style="width: 10%" />
<col style="width: 90%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.linalg.Vectors.dense" title="pyspark.ml.linalg.Vectors.dense"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dense</span></code></a>(*elements)</p></td>
<td><p>Create a dense vector of 64-bit floats from a Python list or numbers.</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.linalg.Vectors.norm" title="pyspark.ml.linalg.Vectors.norm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">norm</span></code></a>(vector, p)</p></td>
<td><p>Find norm of the given vector.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.linalg.Vectors.sparse" title="pyspark.ml.linalg.Vectors.sparse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sparse</span></code></a>(size, *args)</p></td>
<td><p>Create a sparse vector, using either a dictionary, a list of (index, value) pairs, or two separate arrays of indices and values (sorted by index).</p></td>
</tr>
<tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.linalg.Vectors.squared_distance" title="pyspark.ml.linalg.Vectors.squared_distance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">squared_distance</span></code></a>(v1, v2)</p></td>
<td><p>Squared distance between two vectors.</p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.linalg.Vectors.zeros" title="pyspark.ml.linalg.Vectors.zeros"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zeros</span></code></a>(size)</p></td>
<td><p></p></td>
</tr>
</tbody>
</table>
<p class="rubric">Methods Documentation</p>
<dl class="py method">
<dt id="pyspark.ml.linalg.Vectors.dense">
<em class="property">static </em><code class="sig-name descname">dense</code><span class="sig-paren">(</span><em class="sig-param"><span class="o">*</span><span class="n">elements</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>float<span class="p">, </span>bytes<span class="p">, </span>numpy.ndarray<span class="p">, </span>Iterable<span class="p">[</span>float<span class="p">]</span><span class="p">]</span></span></em><span class="sig-paren">)</span> &#x2192; <a class="reference internal" href="pyspark.ml.linalg.DenseVector.html#pyspark.ml.linalg.DenseVector" title="pyspark.ml.linalg.DenseVector">pyspark.ml.linalg.DenseVector</a><a class="reference internal" href="../../_modules/pyspark/ml/linalg.html#Vectors.dense"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.linalg.Vectors.dense" title="Permalink to this definition"></a></dt>
<dd><p>Create a dense vector of 64-bit floats from a Python list or numbers.</p>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">Vectors</span><span class="o">.</span><span class="n">dense</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="go">DenseVector([1.0, 2.0, 3.0])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">Vectors</span><span class="o">.</span><span class="n">dense</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.0</span><span class="p">)</span>
<span class="go">DenseVector([1.0, 2.0])</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt id="pyspark.ml.linalg.Vectors.norm">
<em class="property">static </em><code class="sig-name descname">norm</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">vector</span><span class="p">:</span> <span class="n"><a class="reference internal" href="pyspark.ml.linalg.Vector.html#pyspark.ml.linalg.Vector" title="pyspark.ml.linalg.Vector">pyspark.ml.linalg.Vector</a></span></em>, <em class="sig-param"><span class="n">p</span><span class="p">:</span> <span class="n">NormType</span></em><span class="sig-paren">)</span> &#x2192; numpy.float64<a class="reference internal" href="../../_modules/pyspark/ml/linalg.html#Vectors.norm"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.linalg.Vectors.norm" title="Permalink to this definition"></a></dt>
<dd><p>Find norm of the given vector.</p>
</dd></dl>
<dl class="py method">
<dt id="pyspark.ml.linalg.Vectors.sparse">
<em class="property">static </em><code class="sig-name descname">sparse</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">size</span><span class="p">:</span> <span class="n">int</span></em>, <em class="sig-param"><span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>bytes<span class="p">, </span>Tuple<span class="p">[</span>int<span class="p">, </span>float<span class="p">]</span><span class="p">, </span>Iterable<span class="p">[</span>float<span class="p">]</span><span class="p">, </span>Iterable<span class="p">[</span>Tuple<span class="p">[</span>int<span class="p">, </span>float<span class="p">]</span><span class="p">]</span><span class="p">, </span>Dict<span class="p">[</span>int<span class="p">, </span>float<span class="p">]</span><span class="p">]</span></span></em><span class="sig-paren">)</span> &#x2192; <a class="reference internal" href="pyspark.ml.linalg.SparseVector.html#pyspark.ml.linalg.SparseVector" title="pyspark.ml.linalg.SparseVector">pyspark.ml.linalg.SparseVector</a><a class="reference internal" href="../../_modules/pyspark/ml/linalg.html#Vectors.sparse"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.linalg.Vectors.sparse" title="Permalink to this definition"></a></dt>
<dd><p>Create a sparse vector, using either a dictionary, a list of
(index, value) pairs, or two separate arrays of indices and
values (sorted by index).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><dl class="simple">
<dt><strong>size</strong><span class="classifier">int</span></dt><dd><p>Size of the vector.</p>
</dd>
<dt><strong>args</strong></dt><dd><p>Non-zero entries, as a dictionary, list of tuples,
or two sorted lists containing indices and values.</p>
</dd>
</dl>
</dd>
</dl>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">Vectors</span><span class="o">.</span><span class="n">sparse</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="p">{</span><span class="mi">1</span><span class="p">:</span> <span class="mf">1.0</span><span class="p">,</span> <span class="mi">3</span><span class="p">:</span> <span class="mf">5.5</span><span class="p">})</span>
<span class="go">SparseVector(4, {1: 1.0, 3: 5.5})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">Vectors</span><span class="o">.</span><span class="n">sparse</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="p">[(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mf">5.5</span><span class="p">)])</span>
<span class="go">SparseVector(4, {1: 1.0, 3: 5.5})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">Vectors</span><span class="o">.</span><span class="n">sparse</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">5.5</span><span class="p">])</span>
<span class="go">SparseVector(4, {1: 1.0, 3: 5.5})</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt id="pyspark.ml.linalg.Vectors.squared_distance">
<em class="property">static </em><code class="sig-name descname">squared_distance</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">v1</span><span class="p">:</span> <span class="n"><a class="reference internal" href="pyspark.ml.linalg.Vector.html#pyspark.ml.linalg.Vector" title="pyspark.ml.linalg.Vector">pyspark.ml.linalg.Vector</a></span></em>, <em class="sig-param"><span class="n">v2</span><span class="p">:</span> <span class="n"><a class="reference internal" href="pyspark.ml.linalg.Vector.html#pyspark.ml.linalg.Vector" title="pyspark.ml.linalg.Vector">pyspark.ml.linalg.Vector</a></span></em><span class="sig-paren">)</span> &#x2192; numpy.float64<a class="reference internal" href="../../_modules/pyspark/ml/linalg.html#Vectors.squared_distance"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.linalg.Vectors.squared_distance" title="Permalink to this definition"></a></dt>
<dd><p>Squared distance between two vectors.
a and b can be of type SparseVector, DenseVector, np.ndarray
or array.array.</p>
<p class="rubric">Examples</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">Vectors</span><span class="o">.</span><span class="n">sparse</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="p">[(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">Vectors</span><span class="o">.</span><span class="n">dense</span><span class="p">([</span><span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">squared_distance</span><span class="p">(</span><span class="n">b</span><span class="p">)</span>
<span class="go">51.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="py method">
<dt id="pyspark.ml.linalg.Vectors.zeros">
<em class="property">static </em><code class="sig-name descname">zeros</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">size</span><span class="p">:</span> <span class="n">int</span></em><span class="sig-paren">)</span> &#x2192; <a class="reference internal" href="pyspark.ml.linalg.DenseVector.html#pyspark.ml.linalg.DenseVector" title="pyspark.ml.linalg.DenseVector">pyspark.ml.linalg.DenseVector</a><a class="reference internal" href="../../_modules/pyspark/ml/linalg.html#Vectors.zeros"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.linalg.Vectors.zeros" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="pyspark.ml.linalg.SparseVector.html" title="previous page">SparseVector</a>
<a class='right-next' id="next-link" href="pyspark.ml.linalg.Matrix.html" title="next page">Matrix</a>
</div>
</main>
</div>
</div>
<script src="../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>