blob: a3396108ed843913f304e531fe6bae6642447672 [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>QuickStart &mdash; SystemDS 3.1.0-dev documentation</title>
<link rel="stylesheet" href="../static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../static/pygments.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../static/js/html5shiv.min.js"></script>
<![endif]-->
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../static/documentation_options.js"></script>
<script src="../static/jquery.js"></script>
<script src="../static/underscore.js"></script>
<script src="../static/doctools.js"></script>
<script src="../static/language_data.js"></script>
<script type="text/javascript" src="../static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Federated Environment" href="../guide/federated.html" />
<link rel="prev" title="Install SystemDS" href="install.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home" alt="Documentation Home"> SystemDS
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Getting Started:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="install.html">Install SystemDS</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">QuickStart</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#matrix-operations">Matrix Operations</a></li>
<li class="toctree-l2"><a class="reference internal" href="#more-complex-operations">More complex operations</a></li>
</ul>
</li>
</ul>
<p class="caption"><span class="caption-text">Guides</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../guide/federated.html">Federated Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../guide/algorithms_basics.html">Built-in Algorithms</a></li>
<li class="toctree-l1"><a class="reference internal" href="../guide/python_end_to_end_tut.html">Python end-to-end tutorial</a></li>
</ul>
<p class="caption"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/context/systemds_context.html">SystemDSContext</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/algorithms.html">Algorithms</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/matrix.html">Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/frame.html">Frame</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/list.html">List</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/scalar.html">Scalar</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/source.html">Source</a></li>
</ul>
<p class="caption"><span class="caption-text">Internals API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/operation_node.html">Operation Node</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/script_building/dag.html">Dag</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/script_building/script.html">Script</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/utils/converters.html">Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/utils/helpers.html">Helpers</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">SystemDS</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>QuickStart</li>
<li class="wy-breadcrumbs-aside">
<a href="../sources/getting_started/simple_examples.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="quickstart">
<h1>QuickStart<a class="headerlink" href="#quickstart" title="Permalink to this headline"></a></h1>
<p>Let’s take a look at some code examples.</p>
<div class="section" id="matrix-operations">
<h2>Matrix Operations<a class="headerlink" href="#matrix-operations" title="Permalink to this headline"></a></h2>
<p>Making use of SystemDS, let us multiply an Matrix with an scalar:</p>
<pre class="code python literal-block"><code><span class="keyword namespace">import</span> <span class="name namespace">logging</span>
<span class="keyword namespace">from</span> <span class="name namespace">systemds.context</span> <span class="keyword namespace">import</span> <span class="name">SystemDSContext</span>
<span class="comment single"># Create a context and if necessary (no SystemDS py4j instance running)</span>
<span class="comment single"># it starts a subprocess which does the execution in SystemDS</span>
<span class="keyword">with</span> <span class="name">SystemDSContext</span><span class="punctuation">()</span> <span class="keyword">as</span> <span class="name">sds</span><span class="punctuation">:</span>
<span class="comment single"># Full generates a matrix completely filled with one number.</span>
<span class="comment single"># Generate a 5x10 matrix filled with 4.2</span>
<span class="name">m</span> <span class="operator">=</span> <span class="name">sds</span><span class="operator">.</span><span class="name">full</span><span class="punctuation">((</span><span class="literal number integer">5</span><span class="punctuation">,</span> <span class="literal number integer">10</span><span class="punctuation">),</span> <span class="literal number float">4.20</span><span class="punctuation">)</span>
<span class="comment single"># multiply with scalar. Nothing is executed yet!</span>
<span class="name">m_res</span> <span class="operator">=</span> <span class="name">m</span> <span class="operator">*</span> <span class="literal number float">3.1</span>
<span class="comment single"># Do the calculation in SystemDS by calling compute().</span>
<span class="comment single"># The returned value is an numpy array that can be directly printed.</span>
<span class="name">logging</span><span class="operator">.</span><span class="name">info</span><span class="punctuation">(</span><span class="name">m_res</span><span class="operator">.</span><span class="name">compute</span><span class="punctuation">())</span>
<span class="comment single"># context will automatically be closed and process stopped</span></code></pre>
<p>As output we get</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]]</span>
</pre></div>
</div>
<p>The Python SystemDS package is compatible with numpy arrays.
Let us do a quick element-wise matrix multiplication of numpy arrays with SystemDS.
Remember to first start up a new terminal:</p>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">systemds.context</span> <span class="kn">import</span> <span class="n">SystemDSContext</span>
<span class="c1"># create a random array</span>
<span class="n">m1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">5</span> <span class="o">*</span> <span class="mi">5</span><span class="p">)</span> <span class="o">+</span> <span class="mf">1.01</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">double</span><span class="p">)</span>
<span class="n">m1</span><span class="o">.</span><span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="c1"># create another random array</span>
<span class="n">m2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">5</span> <span class="o">*</span> <span class="mi">5</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">double</span><span class="p">)</span>
<span class="n">m2</span><span class="o">.</span><span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="c1"># Create a context</span>
<span class="k">with</span> <span class="n">SystemDSContext</span><span class="p">()</span> <span class="k">as</span> <span class="n">sds</span><span class="p">:</span>
<span class="c1"># element-wise matrix multiplication, note that nothing is executed yet!</span>
<span class="n">m_res</span> <span class="o">=</span> <span class="n">sds</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">m1</span><span class="p">)</span> <span class="o">*</span> <span class="n">sds</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">m2</span><span class="p">)</span>
<span class="c1"># lets do the actual computation in SystemDS! The result is an numpy array</span>
<span class="n">m_res_np</span> <span class="o">=</span> <span class="n">m_res</span><span class="o">.</span><span class="n">compute</span><span class="p">()</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">m_res_np</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="more-complex-operations">
<h2>More complex operations<a class="headerlink" href="#more-complex-operations" title="Permalink to this headline"></a></h2>
<p>SystemDS provides algorithm level functions as built-in functions to simplify development.
One example of this is l2SVM, a high level functions for Data-Scientists. Let’s take a look at l2svm:</p>
<pre class="code python literal-block"><code><span class="keyword namespace">import</span> <span class="name namespace">logging</span>
<span class="keyword namespace">import</span> <span class="name namespace">numpy</span> <span class="keyword">as</span> <span class="name namespace">np</span>
<span class="keyword namespace">from</span> <span class="name namespace">systemds.context</span> <span class="keyword namespace">import</span> <span class="name">SystemDSContext</span>
<span class="keyword namespace">from</span> <span class="name namespace">systemds.operator.algorithm</span> <span class="keyword namespace">import</span> <span class="name">l2svm</span>
<span class="comment single"># Set a seed</span>
<span class="name">np</span><span class="operator">.</span><span class="name">random</span><span class="operator">.</span><span class="name">seed</span><span class="punctuation">(</span><span class="literal number integer">0</span><span class="punctuation">)</span>
<span class="comment single"># Generate random features and labels in numpy</span>
<span class="comment single"># This can easily be exchanged with a data set.</span>
<span class="name">features</span> <span class="operator">=</span> <span class="name">np</span><span class="operator">.</span><span class="name">array</span><span class="punctuation">(</span><span class="name">np</span><span class="operator">.</span><span class="name">random</span><span class="operator">.</span><span class="name">randint</span><span class="punctuation">(</span>
<span class="literal number integer">100</span><span class="punctuation">,</span> <span class="name">size</span><span class="operator">=</span><span class="literal number integer">10</span> <span class="operator">*</span> <span class="literal number integer">10</span><span class="punctuation">)</span> <span class="operator">+</span> <span class="literal number float">1.01</span><span class="punctuation">,</span> <span class="name">dtype</span><span class="operator">=</span><span class="name">np</span><span class="operator">.</span><span class="name">double</span><span class="punctuation">)</span>
<span class="name">features</span><span class="operator">.</span><span class="name">shape</span> <span class="operator">=</span> <span class="punctuation">(</span><span class="literal number integer">10</span><span class="punctuation">,</span> <span class="literal number integer">10</span><span class="punctuation">)</span>
<span class="name">labels</span> <span class="operator">=</span> <span class="name">np</span><span class="operator">.</span><span class="name">zeros</span><span class="punctuation">((</span><span class="literal number integer">10</span><span class="punctuation">,</span> <span class="literal number integer">1</span><span class="punctuation">))</span>
<span class="comment single"># l2svm labels can only be 0 or 1</span>
<span class="keyword">for</span> <span class="name">i</span> <span class="operator word">in</span> <span class="name builtin">range</span><span class="punctuation">(</span><span class="literal number integer">10</span><span class="punctuation">):</span>
<span class="keyword">if</span> <span class="name">np</span><span class="operator">.</span><span class="name">random</span><span class="operator">.</span><span class="name">random</span><span class="punctuation">()</span> <span class="operator">&gt;</span> <span class="literal number float">0.5</span><span class="punctuation">:</span>
<span class="name">labels</span><span class="punctuation">[</span><span class="name">i</span><span class="punctuation">][</span><span class="literal number integer">0</span><span class="punctuation">]</span> <span class="operator">=</span> <span class="literal number integer">1</span>
<span class="comment single"># compute our model</span>
<span class="keyword">with</span> <span class="name">SystemDSContext</span><span class="punctuation">()</span> <span class="keyword">as</span> <span class="name">sds</span><span class="punctuation">:</span>
<span class="name">model</span> <span class="operator">=</span> <span class="name">l2svm</span><span class="punctuation">(</span><span class="name">sds</span><span class="operator">.</span><span class="name">from_numpy</span><span class="punctuation">(</span><span class="name">features</span><span class="punctuation">),</span>
<span class="name">sds</span><span class="operator">.</span><span class="name">from_numpy</span><span class="punctuation">(</span><span class="name">labels</span><span class="punctuation">),</span> <span class="name">verbose</span><span class="operator">=</span><span class="keyword constant">False</span><span class="punctuation">)</span><span class="operator">.</span><span class="name">compute</span><span class="punctuation">()</span>
<span class="name">logging</span><span class="operator">.</span><span class="name">info</span><span class="punctuation">(</span><span class="name">model</span><span class="punctuation">)</span></code></pre>
<p>The output should be similar to</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[[</span> <span class="mf">0.02033445</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.00324092</span><span class="p">]</span>
<span class="p">[</span> <span class="mf">0.0014692</span> <span class="p">]</span>
<span class="p">[</span> <span class="mf">0.02649209</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.00616902</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.0095087</span> <span class="p">]</span>
<span class="p">[</span> <span class="mf">0.01039221</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.0011352</span> <span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.01686351</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.03839821</span><span class="p">]]</span>
</pre></div>
</div>
<p>To get the full performance of SystemDS one can modify the script to only use internal functionality,
instead of using numpy arrays that have to be transfered into systemDS.
The above script transformed goes like this:</p>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">from</span> <span class="nn">systemds.context</span> <span class="kn">import</span> <span class="n">SystemDSContext</span>
<span class="kn">from</span> <span class="nn">systemds.operator.algorithm</span> <span class="kn">import</span> <span class="n">l2svm</span>
<span class="k">with</span> <span class="n">SystemDSContext</span><span class="p">()</span> <span class="k">as</span> <span class="n">sds</span><span class="p">:</span>
<span class="c1"># Generate 10 by 10 matrix with values in range 0 to 100.</span>
<span class="n">features</span> <span class="o">=</span> <span class="n">sds</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
<span class="c1"># Add value to all cells in features</span>
<span class="n">features</span> <span class="o">+=</span> <span class="mf">1.1</span>
<span class="c1"># Generate labels of all ones and zeros</span>
<span class="n">labels</span> <span class="o">=</span> <span class="n">sds</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">sparsity</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">l2svm</span><span class="p">(</span><span class="n">features</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">compute</span><span class="p">()</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
</pre></div>
</div>
<p>When reading in datasets for processing it is highly recommended that you read from inside systemds using
sds.read(“file”), since this avoid the transferring of numpy arrays.</p>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="../guide/federated.html" class="btn btn-neutral float-right" title="Federated Environment" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="install.html" class="btn btn-neutral float-left" title="Install SystemDS" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2022, Apache SystemDS
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>