blob: 8dfea759771cab643cea2715759e1f44f154eddf [file]
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>QuickStart &mdash; SystemDS 3.3.0-dev documentation</title>
<link rel="stylesheet" type="text/css" href="../static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../static/jquery.js?v=5d32c60e"></script>
<script src="../static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../static/documentation_options.js?v=492a8e1f"></script>
<script src="../static/doctools.js?v=9a2dae69"></script>
<script src="../static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Federated Environment" href="../guide/federated.html" />
<link rel="prev" title="Install SystemDS" href="install.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
SystemDS
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Getting Started:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="install.html">Install SystemDS</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">QuickStart</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#matrix-operations">Matrix Operations</a></li>
<li class="toctree-l2"><a class="reference internal" href="#more-complex-operations">More complex operations</a></li>
</ul>
</li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Guides</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../guide/federated.html">Federated Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../guide/algorithms_basics.html">Built-in Algorithms</a></li>
<li class="toctree-l1"><a class="reference internal" href="../guide/python_end_to_end_tut.html">Python end-to-end tutorial</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/context/systemds_context.html">SystemDSContext</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/algorithms.html">Algorithms</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/matrix.html">Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/frame.html">Frame</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/list.html">List</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/scalar.html">Scalar</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/node/source.html">Source</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Internals API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/operation_node.html">Operation Node</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/script_building/dag.html">Dag</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/script_building/script.html">Script</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/utils/converters.html">Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/utils/helpers.html">Helpers</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">SystemDS</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">QuickStart</li>
<li class="wy-breadcrumbs-aside">
<a href="../sources/getting_started/simple_examples.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="quickstart">
<h1>QuickStart<a class="headerlink" href="#quickstart" title="Link to this heading"></a></h1>
<p>Let’s take a look at some code examples.</p>
<section id="matrix-operations">
<h2>Matrix Operations<a class="headerlink" href="#matrix-operations" title="Link to this heading"></a></h2>
<p>Making use of SystemDS, let us multiply an Matrix with an scalar:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">from</span> <span class="nn">systemds.context</span> <span class="kn">import</span> <span class="n">SystemDSContext</span>
<span class="c1"># Create a context and if necessary (no SystemDS py4j instance running)</span>
<span class="c1"># it starts a subprocess which does the execution in SystemDS</span>
<span class="k">with</span> <span class="n">SystemDSContext</span><span class="p">()</span> <span class="k">as</span> <span class="n">sds</span><span class="p">:</span>
<span class="c1"># Full generates a matrix completely filled with one number.</span>
<span class="c1"># Generate a 5x10 matrix filled with 4.2</span>
<span class="n">m</span> <span class="o">=</span> <span class="n">sds</span><span class="o">.</span><span class="n">full</span><span class="p">((</span><span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="mf">4.20</span><span class="p">)</span>
<span class="c1"># multiply with scalar. Nothing is executed yet!</span>
<span class="n">m_res</span> <span class="o">=</span> <span class="n">m</span> <span class="o">*</span> <span class="mf">3.1</span>
<span class="c1"># Do the calculation in SystemDS by calling compute().</span>
<span class="c1"># The returned value is an numpy array that can be directly printed.</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">m_res</span><span class="o">.</span><span class="n">compute</span><span class="p">())</span>
<span class="c1"># context will automatically be closed and process stopped</span>
</pre></div>
</div>
<p>As output we get</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]</span>
<span class="p">[</span><span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span> <span class="mf">13.02</span><span class="p">]]</span>
</pre></div>
</div>
<p>The Python SystemDS package is compatible with numpy arrays.
Let us do a quick element-wise matrix multiplication of numpy arrays with SystemDS.
Remember to first start up a new terminal:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">systemds.context</span> <span class="kn">import</span> <span class="n">SystemDSContext</span>
<span class="c1"># create a random array</span>
<span class="n">m1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">5</span> <span class="o">*</span> <span class="mi">5</span><span class="p">)</span> <span class="o">+</span> <span class="mf">1.01</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">double</span><span class="p">)</span>
<span class="n">m1</span><span class="o">.</span><span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="c1"># create another random array</span>
<span class="n">m2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">5</span> <span class="o">*</span> <span class="mi">5</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">double</span><span class="p">)</span>
<span class="n">m2</span><span class="o">.</span><span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="c1"># Create a context</span>
<span class="k">with</span> <span class="n">SystemDSContext</span><span class="p">()</span> <span class="k">as</span> <span class="n">sds</span><span class="p">:</span>
<span class="c1"># element-wise matrix multiplication, note that nothing is executed yet!</span>
<span class="n">m_res</span> <span class="o">=</span> <span class="n">sds</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">m1</span><span class="p">)</span> <span class="o">*</span> <span class="n">sds</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">m2</span><span class="p">)</span>
<span class="c1"># lets do the actual computation in SystemDS! The result is an numpy array</span>
<span class="n">m_res_np</span> <span class="o">=</span> <span class="n">m_res</span><span class="o">.</span><span class="n">compute</span><span class="p">()</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">m_res_np</span><span class="p">)</span>
</pre></div>
</div>
</section>
<section id="more-complex-operations">
<h2>More complex operations<a class="headerlink" href="#more-complex-operations" title="Link to this heading"></a></h2>
<p>SystemDS provides algorithm level functions as built-in functions to simplify development.
One example of this is l2SVM, a high level functions for Data-Scientists. Let’s take a look at l2svm:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">systemds.context</span> <span class="kn">import</span> <span class="n">SystemDSContext</span>
<span class="kn">from</span> <span class="nn">systemds.operator.algorithm</span> <span class="kn">import</span> <span class="n">l2svm</span>
<span class="c1"># Set a seed</span>
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="c1"># Generate random features and labels in numpy</span>
<span class="c1"># This can easily be exchanged with a data set.</span>
<span class="n">features</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span>
<span class="mi">100</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">10</span> <span class="o">*</span> <span class="mi">10</span><span class="p">)</span> <span class="o">+</span> <span class="mf">1.01</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">double</span><span class="p">)</span>
<span class="n">features</span><span class="o">.</span><span class="n">shape</span> <span class="o">=</span> <span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
<span class="n">labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">10</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
<span class="c1"># l2svm labels can only be 0 or 1</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">()</span> <span class="o">&gt;</span> <span class="mf">0.5</span><span class="p">:</span>
<span class="n">labels</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
<span class="c1"># compute our model</span>
<span class="k">with</span> <span class="n">SystemDSContext</span><span class="p">()</span> <span class="k">as</span> <span class="n">sds</span><span class="p">:</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">l2svm</span><span class="p">(</span><span class="n">sds</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">features</span><span class="p">),</span>
<span class="n">sds</span><span class="o">.</span><span class="n">from_numpy</span><span class="p">(</span><span class="n">labels</span><span class="p">),</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">compute</span><span class="p">()</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
</pre></div>
</div>
<p>The output should be similar to</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[[</span> <span class="mf">0.02033445</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.00324092</span><span class="p">]</span>
<span class="p">[</span> <span class="mf">0.0014692</span> <span class="p">]</span>
<span class="p">[</span> <span class="mf">0.02649209</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.00616902</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.0095087</span> <span class="p">]</span>
<span class="p">[</span> <span class="mf">0.01039221</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.0011352</span> <span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.01686351</span><span class="p">]</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.03839821</span><span class="p">]]</span>
</pre></div>
</div>
<p>To get the full performance of SystemDS one can modify the script to only use internal functionality,
instead of using numpy arrays that have to be transfered into systemDS.
The above script transformed goes like this:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">from</span> <span class="nn">systemds.context</span> <span class="kn">import</span> <span class="n">SystemDSContext</span>
<span class="kn">from</span> <span class="nn">systemds.operator.algorithm</span> <span class="kn">import</span> <span class="n">l2svm</span>
<span class="k">with</span> <span class="n">SystemDSContext</span><span class="p">()</span> <span class="k">as</span> <span class="n">sds</span><span class="p">:</span>
<span class="c1"># Generate 10 by 10 matrix with values in range 0 to 100.</span>
<span class="n">features</span> <span class="o">=</span> <span class="n">sds</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
<span class="c1"># Add value to all cells in features</span>
<span class="n">features</span> <span class="o">+=</span> <span class="mf">1.1</span>
<span class="c1"># Generate labels of all ones and zeros</span>
<span class="n">labels</span> <span class="o">=</span> <span class="n">sds</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">sparsity</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">l2svm</span><span class="p">(</span><span class="n">features</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">compute</span><span class="p">()</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
</pre></div>
</div>
<p>When reading in datasets for processing it is highly recommended that you read from inside systemds using
sds.read(“file”), since this avoid the transferring of numpy arrays.</p>
</section>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="install.html" class="btn btn-neutral float-left" title="Install SystemDS" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="../guide/federated.html" class="btn btn-neutral float-right" title="Federated Environment" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Apache SystemDS.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>