blob: 8795d07c64cb43541fb2261bee4bf79f1e3ce039 [file] [log] [blame]
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Design &mdash; SystemDS 2.0.0 documentation</title>
<link rel="stylesheet" href="../static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../static/pygments.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../static/js/html5shiv.min.js"></script>
<![endif]-->
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../static/documentation_options.js"></script>
<script src="../static/jquery.js"></script>
<script src="../static/underscore.js"></script>
<script src="../static/doctools.js"></script>
<script src="../static/language_data.js"></script>
<script type="text/javascript" src="../static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Algorithms" href="../api/operator/algorithms.html" />
<link rel="prev" title="QuickStart Onnx" href="onnx_systemds.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home" alt="Documentation Home"> SystemDS
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<p class="caption"><span class="caption-text">Getting Started:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/install.html">Install SystemDS</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/simple_examples.html">QuickStart</a></li>
</ul>
<p class="caption"><span class="caption-text">Guides</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../guide/federated.html">Federated Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../guide/algorithms_basics.html">Built-in Algorithms</a></li>
</ul>
<p class="caption"><span class="caption-text">Onnx SystemDS</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="onnx_systemds.html">QuickStart Onnx</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Design</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#goals">Goals</a></li>
<li class="toctree-l2"><a class="reference internal" href="#limitations">Limitations</a></li>
<li class="toctree-l2"><a class="reference internal" href="#onnx-operators">Onnx - Operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="#onnx-files">Onnx - Files</a></li>
<li class="toctree-l2"><a class="reference internal" href="#traversing-the-graph">Traversing the Graph</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#graph-traversal">Graph traversal</a></li>
<li class="toctree-l3"><a class="reference internal" href="#example">Example</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#rendering-dml-scripts">Rendering DML scripts</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#dml-script">1. <cite>dml_script</cite></a></li>
<li class="toctree-l3"><a class="reference internal" href="#imports">2. <cite>imports</cite></a></li>
<li class="toctree-l3"><a class="reference internal" href="#sub-graphs">3. <cite>sub_graphs</cite></a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#final-script">Final Script</a></li>
<li class="toctree-l2"><a class="reference internal" href="#implementing-new-operators">Implementing new operators</a></li>
<li class="toctree-l2"><a class="reference internal" href="#testing-models">Testing models</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#creating-a-testcase">Creating a Testcase</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#steps">Steps:</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#tools">Tools</a></li>
</ul>
</li>
</ul>
<p class="caption"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/algorithms.html">Algorithms</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/context/systemds_context.html">SystemDSContext</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/matrix/matrix.html">Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/matrix/federated.html">Federated</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/operator/operation_node.html">Operation Node</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/onnx_systemds/convert.html">Convert</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/onnx_systemds/onnx_helper.html">Onnx Helper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/onnx_systemds/operator_gen.html">Operator Gen</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/onnx_systemds/render.html">Render</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/onnx_systemds/util.html">Util</a></li>
</ul>
<p class="caption"><span class="caption-text">Internals API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/script_building/dag.html">Dag</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/script_building/script.html">Script</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/utils/converters.html">Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/utils/helpers.html">Helpers</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">SystemDS</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li>Design</li>
<li class="wy-breadcrumbs-aside">
<a href="../sources/onnx_systemds/onnx_systemds_design.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="design">
<h1>Design<a class="headerlink" href="#design" title="Permalink to this headline"></a></h1>
<p>This document describes the initial design of <cite>onnx-systemds</cite></p>
<p>For dealing with different operator-set versions of onnx the current strategy is to use the
<a class="reference external" href="https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md#converting-version-of-an-onnx-model-within-default-domain-aionnx">converter provided by onnx</a> to convert to a common version.</p>
<p>However, the converter does not support adapters for all op-sets/operators so this conversion will fail for many models.
On the onnx repository you can find a list of
<a class="reference external" href="https://github.com/onnx/onnx/blob/master/onnx/version_converter.py#L21">currently supported adapters</a></p>
<div class="section" id="goals">
<h2>Goals<a class="headerlink" href="#goals" title="Permalink to this headline"></a></h2>
<blockquote>
<div><ul class="simple">
<li><p>Support for importing <a class="reference external" href="https://github.com/onnx/onnx/blob/master/docs/Operators.md">operators of the ONNX base definition</a></p></li>
<li><p>Support for importing <a class="reference external" href="https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md">operators defined by ONNX-ML</a></p></li>
<li><p>Support for exporting DML script to ONNX graphs</p></li>
</ul>
</div></blockquote>
</div>
<div class="section" id="limitations">
<h2>Limitations<a class="headerlink" href="#limitations" title="Permalink to this headline"></a></h2>
<blockquote>
<div><ul class="simple">
<li><p>Not able to support all data types / operators as they are not currently supported by SystemDS</p></li>
</ul>
</div></blockquote>
</div>
<div class="section" id="onnx-operators">
<h2>Onnx - Operators<a class="headerlink" href="#onnx-operators" title="Permalink to this headline"></a></h2>
<p>Onnx includes several very simple and also more complex operators.
When implementing an operator it’s best to have a look at the
<a class="reference external" href="https://github.com/onnx/onnx/blob/master/docs/Operators.md">operator schemas</a>,
which precisely define the inputs, outputs and attributes of the operation.</p>
<p>Besides the standard onnx definition, there also exists onnx-ML the operator schemas for which are defined in a
<a class="reference external" href="https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md">separate document</a>.
It is an extension of the standard onnx format, however currently only onnx standard operators are supported.</p>
</div>
<div class="section" id="onnx-files">
<h2>Onnx - Files<a class="headerlink" href="#onnx-files" title="Permalink to this headline"></a></h2>
<p>Onnx uses the <a class="reference external" href="https://developers.google.com/protocol-buffers/">ProtoBuf format</a>.
It specifies this representation in several <code class="docutils literal notranslate"><span class="pre">.proto</span></code>/<code class="docutils literal notranslate"><span class="pre">.proto3</span></code>
<a class="reference external" href="https://github.com/onnx/onnx/tree/master/onnx">files</a> again with dedicated files for onnx-ML.
These files are helpful to understand the underlying structure and values that are possible.</p>
<p>Protobuf creates the underlying structure such that you can access elements of the onnx graph as if they were
class members. For more information take a look at
<a class="reference external" href="https://developers.google.com/protocol-buffers/docs/pythontutorial#the-protocol-buffer-api">Google’s protocol-buffer documentation</a>.</p>
<p>This is also why in its current form, this converter does not convert the protobuf-structure into an internal format,
as the provided protobuf structure can already be conveniently used. Instead,
there exist a number of onnx-helper functions/classes (see <code class="docutils literal notranslate"><span class="pre">onnx_helper.py</span></code>).</p>
</div>
<div class="section" id="traversing-the-graph">
<h2>Traversing the Graph<a class="headerlink" href="#traversing-the-graph" title="Permalink to this headline"></a></h2>
<p>For creating the script, it is essential to insert computations in the right order into the dml-script.
To do this, the converter builds a tree-structure (DAG) from the protobuf-nodes
(see <cite>render.gen_graph_functions</cite>).</p>
<blockquote>
<div><ul class="simple">
<li><p>For traversing the graph, we start from the bottom.</p></li>
<li><p>The converter starts with the graph-outputs as available outputs.</p></li>
<li><p>It generates the dml snippets in reverse-order</p></li>
</ul>
</div></blockquote>
<div class="section" id="graph-traversal">
<h3>Graph traversal<a class="headerlink" href="#graph-traversal" title="Permalink to this headline"></a></h3>
<ol class="arabic">
<li><p>Find a node for which all outputs are available.</p></li>
<li><p>Process the node:</p>
<blockquote>
<div><ul class="simple">
<li><p>Generate the dml parts for this node</p></li>
<li><p>add its inputs to the list of available outputs</p></li>
<li><p>remove the node from the graph</p></li>
</ul>
</div></blockquote>
</li>
<li><p>if there are nodes left restart at 1.</p></li>
</ol>
</div>
<div class="section" id="example">
<h3>Example<a class="headerlink" href="#example" title="Permalink to this headline"></a></h3>
<p>In the example below with the nodes <code class="docutils literal notranslate"><span class="pre">Add</span></code>, <code class="docutils literal notranslate"><span class="pre">MatMul</span></code> and <code class="docutils literal notranslate"><span class="pre">Sub</span></code>, we would start with <code class="docutils literal notranslate"><span class="pre">F</span></code> as available output.
Therefore the first node to insert would be <code class="docutils literal notranslate"><span class="pre">Sub</span></code>. After inserting <code class="docutils literal notranslate"><span class="pre">Sub</span></code> its inputs become available outputs,
therefore all outputs of <code class="docutils literal notranslate"><span class="pre">MatMul</span></code> become available. Finally, after removing <code class="docutils literal notranslate"><span class="pre">MatMul</span></code> from the graph all outputs
to <code class="docutils literal notranslate"><span class="pre">Add</span></code> are available, and it can be removed from the graph as well.</p>
<a class="reference internal image-reference" href="../images/sample_graph.png"><img alt="sample graph" class="align-center" src="../images/sample_graph.png" style="width: 200px;" /></a>
</div>
</div>
<div class="section" id="rendering-dml-scripts">
<h2>Rendering DML scripts<a class="headerlink" href="#rendering-dml-scripts" title="Permalink to this headline"></a></h2>
<p>The main idea of this converter is, that the logic for generating the actual dml-syntax is handled by
<a class="reference external" href="https://jinja.palletsprojects.com/en/2.11.x/">Jinja templates</a> (located in <code class="docutils literal notranslate"><span class="pre">/templates</span></code>).
Therefore the python code stays uncluttered, because it does not have to merge strings together to produce valid
dml-syntax and instead simply provides the elements that are needed to render the script.</p>
<p>The template-engine then takes these inputs and renders a human readable script with valid dml syntax.
To improve readability the generator also automatically ads the doc-strings which are part of the onnx-definitions as
comments to the script.</p>
<p>When traversing the graph, a script part is generated for each node consisting of three elements:</p>
<blockquote>
<div><ul class="simple">
<li><p><cite>dml_script</cite> The actual script snipped for the node</p></li>
<li><p><cite>imports</cite> Imports required for the node</p></li>
<li><p><cite>sub_graphs</cite> Any sub_graphs of the node that need to be handled</p></li>
</ul>
</div></blockquote>
<p>The function that is called for rendering a specific operator is defined in the dictionary
<code class="docutils literal notranslate"><span class="pre">operator_generators</span></code> in <code class="docutils literal notranslate"><span class="pre">render.py</span></code></p>
<div class="section" id="dml-script">
<h3>1. <cite>dml_script</cite><a class="headerlink" href="#dml-script" title="Permalink to this headline"></a></h3>
<p>Depending on the operator this can be a function call or a more complex dml-snippet.
This part is generated by the template-engine when the corresponding template is rendered.</p>
<p>Many onnx-operators can be handled by a single template file. There exists a <code class="docutils literal notranslate"><span class="pre">function_call.dml.jinja</span></code>
template which should be able to handle a large number of operators.</p>
</div>
<div class="section" id="imports">
<h3>2. <cite>imports</cite><a class="headerlink" href="#imports" title="Permalink to this headline"></a></h3>
<p>Some operators are handled by calling scripts provided by systemds located in <code class="docutils literal notranslate"><span class="pre">$SYSTEMDS_ROOT/scripts</span></code>.
To enable these imports, the converter automatically resolves the <code class="docutils literal notranslate"><span class="pre">$SYSTEMDS_ROOT</span></code>
environment variable and adds a <code class="docutils literal notranslate"><span class="pre">setw($SYSTEMDS_ROOT/scripts)</span></code> to the script.</p>
</div>
<div class="section" id="sub-graphs">
<h3>3. <cite>sub_graphs</cite><a class="headerlink" href="#sub-graphs" title="Permalink to this headline"></a></h3>
<p>Since sub-graphs have their own variable scope and are independent, they are handled as separate functions.
The converter generates a function for each graph in the model.
In the main-graph, the sub-graph is replaced by a function call to the sub-graph function.
To handle this the function <code class="docutils literal notranslate"><span class="pre">render.gen_graph_functions</span></code> recursively calls itself to render sub-graph functions
(and also the sub-graph functions of sub-graphs and so on…).</p>
</div>
</div>
<div class="section" id="final-script">
<h2>Final Script<a class="headerlink" href="#final-script" title="Permalink to this headline"></a></h2>
<p>In the final render all required imports, the sub-functions and the main-function are combined in a single dml-file.</p>
</div>
<div class="section" id="implementing-new-operators">
<h2>Implementing new operators<a class="headerlink" href="#implementing-new-operators" title="Permalink to this headline"></a></h2>
<p>When implementing an operator it’s best to have a look at the
<a class="reference external" href="https://github.com/onnx/onnx/blob/master/docs/Operators.md">operator schemas</a>
which exactly define the inputs, outputs and attributes of the operation</p>
<p>It is also nice to have a test-model to work with, to generate one refer to
<code class="docutils literal notranslate"><span class="pre">tests/onnx/test_models/model_generate.py</span></code>.</p>
<p>To implement a new operator, the function that handles the operator needs to be defined in the <code class="docutils literal notranslate"><span class="pre">operator_generators</span></code>
located in <code class="docutils literal notranslate"><span class="pre">render.py</span></code>.
All functions listed in this dictionary need to have the same call structure.</p>
<p>If there exists a dml-script (in <code class="docutils literal notranslate"><span class="pre">$SYSTEMDS_ROOT/scripts</span></code>) that provides the functionality the operator
can be implemented by translating the arguments/inputs, adding the import-render and function-call-render to this script.</p>
</div>
<div class="section" id="testing-models">
<h2>Testing models<a class="headerlink" href="#testing-models" title="Permalink to this headline"></a></h2>
<p>onnx provides a convenient way for
<a class="reference external" href="https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md#checking-an-onnx-model">creating models</a>
using helper functions in python. All current test-models are produced like this (see <code class="docutils literal notranslate"><span class="pre">tests/onnx/test_models</span></code>).</p>
<div class="section" id="creating-a-testcase">
<h3>Creating a Testcase<a class="headerlink" href="#creating-a-testcase" title="Permalink to this headline"></a></h3>
<p>The current test-system takes a model, converts it to dml using the converter and then runs a
<code class="docutils literal notranslate"><span class="pre">dml_wrapper</span></code> which calls the model-function using the script <code class="docutils literal notranslate"><span class="pre">$SYSTEMDS_ROOT/bin/systemds</span></code>.
Finally, the output (stored by the dml-wrapper) is compared to a reference output.</p>
<p>When creating files stick to the naming conventions of other files in the same folder.</p>
<div class="section" id="steps">
<h4>Steps:<a class="headerlink" href="#steps" title="Permalink to this headline"></a></h4>
<ol class="arabic">
<li><p>Create a model in <code class="docutils literal notranslate"><span class="pre">tests/onnx/test_models</span></code>, e.g. <code class="docutils literal notranslate"><span class="pre">sample_model.onnx</span></code></p></li>
<li><p>Create a dml wrapper that calls the model-function in <code class="docutils literal notranslate"><span class="pre">tests/onnx/dml_wrapper/sample_model_wrapper.dml</span></code></p>
<blockquote>
<div><ul class="simple">
<li><p>The wrapper needs to call the model-function and store the output to <code class="docutils literal notranslate"><span class="pre">output_test/sample_model.out</span></code></p></li>
<li><p>The name of the model-function is generated from the model-name (see <code class="docutils literal notranslate"><span class="pre">util.generate_function_name</span></code> )</p></li>
</ul>
</div></blockquote>
</li>
<li><p>Provide a reference output in <code class="docutils literal notranslate"><span class="pre">tests/onnx/output_reference/sample_model_reference.out</span></code></p></li>
<li><p>Create the unit test function.</p></li>
</ol>
</div>
</div>
</div>
<div class="section" id="tools">
<h2>Tools<a class="headerlink" href="#tools" title="Permalink to this headline"></a></h2>
<blockquote>
<div><ul class="simple">
<li><p><a class="reference external" href="https://www.jetbrains.com/pycharm/">Pycharm</a> in the professional version allows you to <a class="reference external" href="https://www.jetbrains.com/help/pycharm/templates.html#debug">debug template files</a> which can be handy</p></li>
<li><p><a class="reference external" href="https://github.com/lutzroeder/netron">Neutron</a> is a nice free tool for viewing onnx-graphs</p></li>
</ul>
</div></blockquote>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="../api/operator/algorithms.html" class="btn btn-neutral float-right" title="Algorithms" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="onnx_systemds.html" class="btn btn-neutral float-left" title="QuickStart Onnx" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright 2020, Apache SystemDS
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>