blob: 61a4835786c76a559c9c3e275b28a88ccd10ce9a [file] [log] [blame]
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-alpha.70">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Submarine Blog RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Submarine Blog Atom Feed"><title data-react-helmet="true">Architecture and Requirment | Apache Submarine</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" name="docusaurus_locale" content="en"><meta data-react-helmet="true" name="docusaurus_version" content="0.6.0"><meta data-react-helmet="true" name="docusaurus_tag" content="docs-default-0.6.0"><meta data-react-helmet="true" property="og:title" content="Architecture and Requirment | Apache Submarine"><meta data-react-helmet="true" name="description" content="&lt;!--"><meta data-react-helmet="true" property="og:description" content="&lt;!--"><meta data-react-helmet="true" property="og:url" content="https://submarine.apache.org//docs/designDocs/architecture-and-requirements"><link data-react-helmet="true" rel="shortcut icon" href="/img/submarine.ico"><link data-react-helmet="true" rel="canonical" href="https://submarine.apache.org//docs/designDocs/architecture-and-requirements"><link rel="stylesheet" href="/styles.39775f96.css">
<link rel="preload" href="/styles.6ddab7ad.js" as="script">
<link rel="preload" href="/runtime~main.943b9f3a.js" as="script">
<link rel="preload" href="/main.92db4fb0.js" as="script">
<link rel="preload" href="/1.dab80c77.js" as="script">
<link rel="preload" href="/2.6a1a8326.js" as="script">
<link rel="preload" href="/1f391b9e.f22e8160.js" as="script">
<link rel="preload" href="/128.10c43f42.js" as="script">
<link rel="preload" href="/58f10d9f.87a860a5.js" as="script">
<link rel="preload" href="/17896441.be8f95bf.js" as="script">
<link rel="preload" href="/ae045997.52baff71.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<nav aria-label="Skip navigation links"><button type="button" tabindex="0" class="skipToContent_11B0">Skip to main content</button></nav><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><div aria-label="Navigation bar toggle" class="navbar__toggle" role="button" tabindex="0"><svg aria-label="Menu" width="30" height="30" viewBox="0 0 30 30" role="img" focusable="false"><title>Menu</title><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></div><a class="navbar__brand" href="/"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a><a class="navbar__item navbar__link navbar__link--active" href="/docs/gettingStarted/quickstart">Docs</a><a class="navbar__item navbar__link" href="/docs/api/environment">API</a><a class="navbar__item navbar__link" href="/docs/download">Download</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__item navbar__link" href="/docs/">0.6.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/designDocs/architecture-and-requirements">master ๐Ÿƒ</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/designDocs/architecture-and-requirements">0.6.0</a></li><li><a class="dropdown__link" href="/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__item navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation</a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events</a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License</a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks</a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security</a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship</a></li></ul></div><div class="react-toggle react-toggle--disabled displayOnlyInLargeViewport_2N3Q"><div class="react-toggle-track"><div class="react-toggle-track-check"><span class="toggle_3NWk">๐ŸŒœ</span></div><div class="react-toggle-track-x"><span class="toggle_3NWk">๐ŸŒž</span></div></div><div class="react-toggle-thumb"></div><input type="checkbox" disabled="" aria-label="Dark mode toggle" class="react-toggle-screenreader-only"></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a></div><div class="navbar-sidebar__items"><div class="menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link navbar__link--active" href="/docs/gettingStarted/quickstart">Docs</a></li><li class="menu__list-item"><a class="menu__link" href="/docs/api/environment">API</a></li><li class="menu__list-item"><a class="menu__link" href="/docs/download">Download</a></li><li class="menu__list-item"><a role="button" class="menu__link menu__link--sublist">Versions</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/next/designDocs/architecture-and-requirements">master ๐Ÿƒ</a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/docs/designDocs/architecture-and-requirements">0.6.0</a></li><li class="menu__list-item"><a class="menu__link" href="/versions">All versions</a></li></ul></li><li class="menu__list-item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="menu__link">GitHub</a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist">Apache</a><ul class="menu__list"><li class="menu__list-item"><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="menu__link">Apache Software Foundation</a></li><li class="menu__list-item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="menu__link">Events</a></li><li class="menu__list-item"><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="menu__link">Apache License</a></li><li class="menu__list-item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="menu__link">Thanks</a></li><li class="menu__list-item"><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="menu__link">Security</a></li><li class="menu__list-item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="menu__link">Sponsorship</a></li></ul></li></ul></div></div></div></nav><div class="main-wrapper"><div class="docPage_vMrn"><div class="docSidebarContainer_3Ak5" role="complementary"><div class="sidebar_3gvy"><div class="menu menu--responsive thin-scrollbar menu_1yIk"><button aria-label="Open Menu" aria-haspopup="true" class="button button--secondary button--sm menu__button" type="button"><svg aria-label="Menu" class="sidebarMenuIcon_1CUI" width="24" height="24" viewBox="0 0 30 30" role="img" focusable="false"><title>Menu</title><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><ul class="menu__list"><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Getting Started</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/gettingStarted/quickstart">Quickstart</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/gettingStarted/notebook">Jupyter Notebook</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">User Docs</a><ul class="menu__list"><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">API documentation</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/experiment">Experiment REST API</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/environment">Environment REST API</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/experiment-template">Experiment Template REST API</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/notebook">Notebook REST API</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Submarine SDK</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-sdk/experiment-client">Experiment Client</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-sdk/model-client">Model Client</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-sdk/tracking">Tracking</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Submarine Security</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-security/spark-security/README">Submarine Spark Security Plugin</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-security/spark-security/build-submarine-spark-security-plugin">Building Submarine Spark Security Plugin</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Others</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/others/mlflow">MLflow UI</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/others/tensorboard">Tensorboard</a></li></ul></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Administrator Docs</a><ul class="menu__list"><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Submarine on Yarn</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/adminDocs/yarn/README">Running Submarine on YARN</a></li></ul></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Developer Docs</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/README">Project Architecture</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/Dependencies">Dependencies for Submarine</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/BuildFromCode">How to Build Submarine</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/Development">Development Guide</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/IntegrationTestK8s">How to Run Integration K8s Test</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/IntegrationTestE2E">How to Run Frontend Integration Test</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Community</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/community/README">Apache Submarine Community</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/community/HowToCommit">Guide for Apache Submarine Committers</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/community/contributing">How To Contribute to Submarine</a></li></ul></li><li class="menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#!">Design Docs</a><ul class="menu__list"><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active active" tabindex="0" href="/docs/designDocs/architecture-and-requirements">Architecture and Requirment</a></li><li class="menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/implementation-notes">Implementation Notes</a></li><li class="menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/environments-implementation">Environments Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/experiment-implementation">Experiment Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/notebook-implementation">Notebook Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/storage-implementation">Storage Implementation</a></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="0">Submarine Server</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/submarine-server/architecture">Submarine Server Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/submarine-server/experimentSpec">Generic Expeiment Spec</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="0">WIP Design Docs</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/wip-designs/submarine-launcher">Submarine Launcher</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/wip-designs/submarine-clusterServer">Cluster Server Design - High-Availability</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/wip-designs/security-implementation">Security Implementation</a></li></ul></li></ul></li></ul></div></div></div><main class="docMainContainer_2iGs"><div class="container padding-vert--lg docItemWrapper_1bxp"><div class="row"><div class="col docItemCol_U38p"><div class="docItemContainer_a7m4"><article><div><span class="badge badge--secondary">Version: 0.6.0</span></div><header><h1 class="docTitle_Oumm">Architecture and Requirment</h1></header><div class="markdown"><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="terminology"></a>Terminology<a class="hash-link" href="#terminology" title="Direct link to heading">#</a></h2><table><thead><tr><th>Term</th><th>Description</th></tr></thead><tbody><tr><td>User</td><td>A single data-scientist/data-engineer. User has resource quota, credentials</td></tr><tr><td>Team</td><td>User belongs to one or more teams, teams have ACLs for artifacts sharing such as notebook content, model, etc.</td></tr><tr><td>Admin</td><td>Also called SRE, who manages user&#x27;s quotas, credentials, team, and other components.</td></tr></tbody></table><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="background"></a>Background<a class="hash-link" href="#background" title="Direct link to heading">#</a></h2><p>Everybody talks about machine learning today, and lots of companies are trying to leverage machine learning to push the business to the next level. Nowadays, as more and more developers, infrastructure software companies coming to this field, machine learning becomes more and more achievable. </p><p>In the last decade, the software industry has built many open source tools for machine learning to solve the pain points: </p><ol><li><p>It was not easy to build machine learning algorithms manually, such as logistic regression, GBDT, and many other algorithms:
<strong>Answer to that:</strong> Industries have open sourced many algorithm libraries, tools, and even pre-trained models so that data scientists can directly reuse these building blocks to hook up to their data without knowing intricate details inside these algorithms and models. </p></li><li><p>It was not easy to achieve &quot;WYSIWYG, what you see is what you get&quot; from IDEs: not easy to get output, visualization, troubleshooting experiences at the same place.
<strong>Answer to that:</strong> Notebooks concept was added to this picture, notebook brought the experiences of interactive coding, sharing, visualization, debugging under the same user interface. There&#x27;re popular open-source notebooks like Apache Zeppelin/Jupyter.</p></li><li><p>It was not easy to manage dependencies: ML applications can run on one machine is hard to deploy on another machine because it has lots of libraries dependencies.
<strong>Answer to that:</strong> Containerization becomes popular and a standard to packaging dependencies to make it easier to &quot;build once, run anywhere&quot;. </p></li><li><p>Fragmented tools, libraries were hard for ML engineers to learn. Experiences learned in one company are not naturally migratable to another company.
<strong>Answer to that:</strong> A few dominant open-source frameworks reduced the overhead of learning too many different frameworks, concepts. Data-scientist can learn a few libraries such as Tensorflow/PyTorch, and a few high-level wrappers like Keras will be able to create your machine learning application from other open-source building blocks.</p></li><li><p>Similarly, models built by one library (such as libsvm) were hard to be integrated into machine learning pipeline since there&#x27;s no standard format.
<strong>Answer to that:</strong> Industry has built successful open-source standard machine learning frameworks such as Tensorflow/PyTorch/Keras so their format can be easily shared across. And efforts to build an even more general model format such as ONNX.</p></li><li><p>It was hard to build a data pipeline that flows/transform data from a raw data source to whatever required by ML applications.
<strong>Answer to that:</strong> Open source big data industry plays an important role in providing, simplify, unify processes and building blocks for data flows, transformations, etc.</p></li></ol><p>The machine learning industry is moving on the right track to solve major roadblocks. So what are the pain points now for companies which have machine learning needs? What can we help here? To answer this question, let&#x27;s look at machine learning workflow first. </p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="machine-learning-workflows--pain-points"></a>Machine Learning Workflows &amp; Pain points<a class="hash-link" href="#machine-learning-workflows--pain-points" title="Direct link to heading">#</a></h2><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">1) From different data sources such as edge, clickstream, logs, etc.</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Land to data lakes </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2) From data lake, data transformation: </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Data transformations: Cleanup, remove invalid rows/columns, </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> select columns, sampling, split train/test</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> data-set, join table, etc.</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Data prepared for training.</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">3) From prepared data: </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Training, model hyper-parameter tuning, cross-validation, etc. </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Models saved to storage. </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">4) From saved models: </span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Model assurance, deployment, A/B testing, etc.</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Model deployed for online serving or offline scoring.</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>Typically data scientists responsible for item 2)-4), 1) typically handled by a different team (called Data Engineering team in many companies, some Data Engineering team also responsible for part of data transformation)</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="pain-1-complex-workflowsteps-from-raw-data-to-model-different-tools-needed-by-different-steps-hard-to-make-changes-to-workflow-and-not-error-proof"></a>Pain #1 Complex workflow/steps from raw data to model, different tools needed by different steps, hard to make changes to workflow, and not error-proof<a class="hash-link" href="#pain-1-complex-workflowsteps-from-raw-data-to-model-different-tools-needed-by-different-steps-hard-to-make-changes-to-workflow-and-not-error-proof" title="Direct link to heading">#</a></h3><p>It is a complex workflow from raw data to usable models, after talking to many different data scientists, we have learned that a typical procedure to train a new model and push to production can take months to 1-2 years. </p><p>It is also a wide skill set required by this workflow. For example, data transformation needs tools like Spark/Hive for large scale and tools like Pandas for a small scale. And model training needs to be switched between XGBoost, Tensorflow, Keras, PyTorch. Building a data pipeline requires Apache Airflow or Oozie. </p><p>Yes, there are great, standardized open-source tools built for many of such purposes. But how about changes need to be made for a particular part of the data pipeline? How about adding a few columns to the training data for experiments? How about training models, and push models to validation, A/B testing before rolling to production? All these steps need jumping between different tools, UIs, and very hard to make changes, and it is not error-proof during these procedures.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="pain-2-dependencies-of-underlying-resource-management-platform"></a>Pain #2 Dependencies of underlying resource management platform<a class="hash-link" href="#pain-2-dependencies-of-underlying-resource-management-platform" title="Direct link to heading">#</a></h3><p>To make jobs/services required by a machine learning platform to be able to run, we need an underlying resource management platform. There&#x27;re some choices of resource management platform, and they have distinct advantages and disadvantages. </p><p>For example, there&#x27;re many machine learning platform built on top of K8s. It is relatively easy to get a K8s from a cloud vendor, easy to orchestrate machine learning required services/daemons run on K8s. However, K8s doesn&#x27;t offer good support jobs like Spark/Flink/Hive. So if your company has Spark/Flink/Hive running on YARN, there&#x27;re gaps and a significant amount of work to move required jobs from YARN to K8s. Maintaining a separate K8s cluster is also overhead to Hadoop-based data infrastructure.</p><p>Similarly, if your company&#x27;s data pipelines are mostly built on top of cloud resources and SaaS offerings, asking you to install a separate YARN cluster to run a new machine learning platform doesn&#x27;t make a lot of sense.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="pain-3-data-scientist-are-forced-to-interact-with-lower-level-platform-components"></a>Pain #3 Data scientist are forced to interact with lower-level platform components<a class="hash-link" href="#pain-3-data-scientist-are-forced-to-interact-with-lower-level-platform-components" title="Direct link to heading">#</a></h3><p>In addition to the above pain, we do see Data Scientists are forced to learn underlying platform knowledge to be able to build a real-world machine learning workflow.</p><p>For most of the data scientists we talked with, they&#x27;re experts of ML algorithms/libraries, feature engineering, etc. They&#x27;re also most familiar with Python, R, and some of them understand Spark, Hive, etc. </p><p>If they&#x27;re asked to do interactions with lower-level components like fine-tuning a Spark job&#x27;s performance; or troubleshooting job failed to launch because of resource constraints; or write a K8s/YARN job spec and mount volumes, set networks properly. They will scratch their heads and typically cannot perform these operations efficiently.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="pain-4-comply-with-data-securitygovernance-requirements"></a>Pain #4 Comply with data security/governance requirements<a class="hash-link" href="#pain-4-comply-with-data-securitygovernance-requirements" title="Direct link to heading">#</a></h3><p>TODO: Add more details.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="pain-5-no-good-way-to-reduce-routine-ml-code-development"></a>Pain #5 No good way to reduce routine ML code development<a class="hash-link" href="#pain-5-no-good-way-to-reduce-routine-ml-code-development" title="Direct link to heading">#</a></h3><p>After the data is prepared, the data scientist needs to do several routine tasks to build the ML pipeline. To get a sense of the existing the data set, it usually needs a split of the data set, the statistics of data set. These tasks have a common duplicate part of code, which reduces the efficiency of data scientists.</p><p>An abstraction layer/framework to help the developer to boost ML pipeline development could be valuable. It&#x27;s better than the developer only needs to fill callback function to focus on their key logic.</p><h1><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="submarine"></a>Submarine<a class="hash-link" href="#submarine" title="Direct link to heading">#</a></h1><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="overview"></a>Overview<a class="hash-link" href="#overview" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="a-little-bit-history"></a>A little bit history<a class="hash-link" href="#a-little-bit-history" title="Direct link to heading">#</a></h3><p>Initially, Submarine is built to solve problems of running deep learning jobs like Tensorflow/PyTorch on Apache Hadoop YARN, allows admin to monitor launched deep learning jobs, and manage generated models. </p><p>It was part of YARN initially, and code resides under <code>hadoop-yarn-applications</code>. Later, the community decided to convert it to be a subproject within Hadoop (Sibling project of YARN, HDFS, etc.) because we want to support other resource management platforms like K8s. And finally, we&#x27;re reconsidering Submarine&#x27;s charter, and the Hadoop community voted that it is the time to moved Submarine to a separate Apache TLP.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="why-submarine"></a>Why Submarine?<a class="hash-link" href="#why-submarine" title="Direct link to heading">#</a></h3><p><code>ONE PLATFORM</code></p><p>Submarine is the ONE PLATFORM to allow Data Scientists to create end-to-end machine learning workflow. <code>ONE PLATFORM</code> means it supports Data Scientists and data engineers to finish their jobs on the same platform without frequently switching their toolsets. From dataset exploring data pipeline creation, model training, and tuning, and push model to production. All these steps can be completed within the <code>ONE PLATFORM</code>.</p><p><code>Resource Management Independent</code></p><p>It is also designed to be resource management independent, no matter if you have Apache Hadoop YARN, K8s, or just a container service, you will be able to run Submarine on top it.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="requirements-and-non-requirements"></a>Requirements and non-requirements<a class="hash-link" href="#requirements-and-non-requirements" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="notebook"></a>Notebook<a class="hash-link" href="#notebook" title="Direct link to heading">#</a></h3><p>1) Users should be able to create, edit, delete a notebook. (P0)
2) Notebooks can be persisted to storage and can be recovered if failure happens. (P0)
3) Users can trace back to history versions of a notebook. (P1)
4) Notebooks can be shared with different users. (P1)
5) Users can define a list of parameters of a notebook (looks like parameters of the notebook&#x27;s main function) to allow executing a notebook like a job. (P1)
6) Different users can collaborate on the same notebook at the same time. (P2)</p><p>A running notebook instance is called notebook session (or session for short).</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="experiment"></a>Experiment<a class="hash-link" href="#experiment" title="Direct link to heading">#</a></h3><p>Experiments of Submarine is an offline task. It could be a shell command, a Python command, a Spark job, a SQL query, or even a workflow. </p><p>The primary purposes of experiments under Submarine&#x27;s context is to do training tasks, offline scoring, etc. However, experiment can be generalized to do other tasks as well.</p><p>Major requirement of experiment: </p><p>1) Experiments can be submitted from UI/CLI/SDK.
2) Experiments can be monitored/managed from UI/CLI/SDK.
3) Experiments should not bind to one resource management platform (K8s/YARN).</p><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="type-of-experiments"></a>Type of experiments<a class="hash-link" href="#type-of-experiments" title="Direct link to heading">#</a></h4><p><img src="/assets/images/experiments-7a09831687ecbc0e1dcf01b0c6f45445.png"></p><p>There&#x27;re two types of experiments:
<code>Adhoc experiments</code>: which includes a Python/R/notebook, or even an adhoc Tensorflow/PyTorch task, etc. </p><p><code>Predefined experiment library</code>: This is specialized experiments, which including developed libraries such as CTR, BERT, etc. Users are only required to specify a few parameters such as input, output, hyper parameters, etc. Instead of worrying about where&#x27;s training script/dependencies located.</p><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="adhoc-experiment"></a>Adhoc experiment<a class="hash-link" href="#adhoc-experiment" title="Direct link to heading">#</a></h4><p>Requirements:</p><ul><li>Allow run adhoc scripts.</li><li>Allow model engineer, data scientist to run Tensorflow/Pytorch programs on YARN/K8s/Container-cloud. </li><li>Allow jobs easy access data/models in HDFS/s3, etc. </li><li>Support run distributed Tensorflow/Pytorch jobs with simple configs.</li><li>Support run user-specified Docker images.</li><li>Support specify GPU and other resources.</li></ul><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="predefined-experiment-library"></a>Predefined experiment library<a class="hash-link" href="#predefined-experiment-library" title="Direct link to heading">#</a></h4><p>Here&#x27;s an example of predefined experiment library to train deepfm model: </p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">{</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;input&quot;: {</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;train_data&quot;: [&quot;hdfs:///user/submarine/data/tr.libsvm&quot;],</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;valid_data&quot;: [&quot;hdfs:///user/submarine/data/va.libsvm&quot;],</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;test_data&quot;: [&quot;hdfs:///user/submarine/data/te.libsvm&quot;],</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;type&quot;: &quot;libsvm&quot;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;output&quot;: {</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;save_model_dir&quot;: &quot;hdfs:///user/submarine/deepfm&quot;,</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;metric&quot;: &quot;auc&quot;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;training&quot;: {</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;batch_size&quot; : 512,</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;field_size&quot;: 39,</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;num_epochs&quot;: 3,</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;feature_size&quot;: 117581,</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> }</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">}</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>Predefined experiment libraries can be shared across users on the same platform, users can also add new or modified predefined experiment library via UI/REST API.</p><p>We will also model AutoML, auto hyper-parameter tuning to predefined experiment library.</p><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="pipeline"></a>Pipeline<a class="hash-link" href="#pipeline" title="Direct link to heading">#</a></h4><p>Pipeline is a special kind of experiment:</p><ul><li>A pipeline is a DAG of experiments. </li><li>Can be also treated as a special kind of experiment.</li><li>Users can submit/terminate a pipeline.</li><li>Pipeline can be created/submitted via UI/API.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="environment-profiles"></a>Environment Profiles<a class="hash-link" href="#environment-profiles" title="Direct link to heading">#</a></h3><p>Environment profiles (or environment for short) defines a set of libraries and when Docker is being used, a Docker image in order to run an experiment or a notebook. </p><p>Docker or VM image (such as AMI: Amazon Machine Images) defines the base layer of the environment. </p><p>On top of that, users can define a set of libraries (such as Python/R) to install.</p><p>Users can save different environment configs which can be also shared across the platform. Environment profiles can be used to run a notebook (e.g. by choosing different kernel from Jupyter), or an experiment. Predefined experiment library includes what environment to use so users don&#x27;t have to choose which environment to use.</p><p>Environments can be added/listed/deleted/selected through CLI/SDK.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="model"></a>Model<a class="hash-link" href="#model" title="Direct link to heading">#</a></h3><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="model-management"></a>Model management<a class="hash-link" href="#model-management" title="Direct link to heading">#</a></h4><ul><li>Model artifacts are generated by experiments or notebook.</li><li>A model consists of artifacts from one or multiple files. </li><li>Users can choose to save, tag, version a produced model.</li><li>Once The Model is saved, Users can do the online model serving or offline scoring of the model.</li></ul><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="model-serving"></a>Model serving<a class="hash-link" href="#model-serving" title="Direct link to heading">#</a></h4><p>After model saved, users can specify a serving script, a model and create a web service to serve the model. </p><p>We call the web service to &quot;endpoint&quot;. Users can manage (add/stop) model serving endpoints via CLI/API/UI.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="metrics-for-training-job-and-model"></a>Metrics for training job and model<a class="hash-link" href="#metrics-for-training-job-and-model" title="Direct link to heading">#</a></h3><p>Submarine-SDK provides tracking/metrics APIs, which allows developers to add tracking/metrics and view tracking/metrics from Submarine Workbench UI.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="deployment"></a>Deployment<a class="hash-link" href="#deployment" title="Direct link to heading">#</a></h3><p>Submarine Services (See architecture overview below) should be deployed easily on-prem / on-cloud. Since there&#x27;re more and more public cloud offering for compute/storage management on cloud, we need to support deploy Submarine compute-related workloads (such as notebook session, experiments, etc.) to cloud-managed clusters. </p><p>This also include Submarine may need to take input parameters from customers and create/manage clusters if needed. It is also a common requirement to use hybrid of on-prem/on-cloud clusters.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="security--access-control--user-management--quota-management"></a>Security / Access Control / User Management / Quota Management<a class="hash-link" href="#security--access-control--user-management--quota-management" title="Direct link to heading">#</a></h3><p>There&#x27;re 4 kinds of objects need access-control: </p><ul><li>Assets belong to Submarine system, which includes notebook, experiments and results, models, predefined experiment libraries, environment profiles.</li><li>Data security. (Who owns what data, and what data can be accessed by each users). </li><li>User credentials. (Such as LDAP).</li><li>Other security, such as Git repo access, etc.</li></ul><p>For the data security / user credentials / other security, it will be delegated to 3rd libraries such as Apache Ranger, IAM roles, etc. </p><p>Assets belong to Submarine system will be handled by Submarine itself.</p><p>Here&#x27;re operations which Submarine admin can do for users / teams which can be used to access Submarine&#x27;s assets. </p><p><strong>Operations for admins</strong> </p><ul><li>Admin uses &quot;User Management System&quot; to onboard new users, upload user credentials, assign resource quotas, etc. </li><li>Admins can create new users, new teams, update user/team mappings. Or remove users/teams. </li><li>Admin can set resource quotas (if different from system default), permissions, upload/update necessary credentials (like Kerberos keytab) of a user.</li><li>A DE/DS can also be an admin if the DE/DS has admin access. (Like a privileged user). This will be useful when a cluster is exclusively shared by a user or only shared by a small team.</li><li><code>Resource Quota Management System</code> helps admin to manage resources quotas of teams, organizations. Resources can be machine resources like CPU/Memory/Disk, etc. It can also include non-machine resources like $$-based budgets.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="dataset"></a>Dataset<a class="hash-link" href="#dataset" title="Direct link to heading">#</a></h3><p>There&#x27;s also need to tag dataset which will be used for training and shared across the platform by different users. </p><p>Like mentioned above, access to the actual data will be handled by 3rd party system like Apache Ranger / Hive Metastore which is out of the Submarine&#x27;s scope.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="architecture-overview"></a>Architecture Overview<a class="hash-link" href="#architecture-overview" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="architecture-diagram"></a>Architecture Diagram<a class="hash-link" href="#architecture-diagram" title="Direct link to heading">#</a></h3><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain"> +-----------------------------------------------------------------+</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | Submarine UI / CLI / REST API / SDK |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | Mini-Submarine |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> +-----------------------------------------------------------------+</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> +--------------------Submarine Server-----------------------------+</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | +---------+ +---------+ +----------+ +----------+ +------------+|</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Data set | |Notebooks| |Experiment| |Models | |Servings ||</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | +---------+ +---------+ +----------+ +----------+ +------------+|</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> |-----------------------------------------------------------------|</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | +-----------------+ +-----------------+ +---------------------+ |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Experiment | |Compute Resource | |Other Management | |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Manager | | Manager | |Services | |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | +-----------------+ +-----------------+ +---------------------+ |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | Spark, template YARN/K8s/Docker |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | TF, PyTorch, pipeline |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> + +-----------------+ +</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Submarine Meta | |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | | Store | |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | +-----------------+ |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> | |</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> +-----------------------------------------------------------------+</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> (You can use http://stable.ascii-flow.appspot.com/#Draw</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> to draw such diagrams)</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p><code>Compute Resource Manager</code> Helps to manage compute resources on-prem/on-cloud, this module can also handle cluster creation / management, etc.</p><p><code>Experiment Manager</code> Work with &quot;Compute Resource Manager&quot; to submit different kinds of workloads such as (distributed) Tensorflow / Pytorch, etc.</p><p><code>Submarine SDK</code> provides Java/Python/REST API to allow DS or other engineers to integrate into Submarine services. It also includes a <code>mini-submarine</code> component that launches Submarine components from a single Docker container (or a VM image).</p><p>Details of Submarine Server design can be found at <a href="/docs/designDocs/submarine-server/architecture">submarine-server-design</a>.</p></div></article><div class="margin-vert--xl"><div class="row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/designDocs/architecture-and-requirements.md" target="_blank" rel="noreferrer noopener"><svg fill="currentColor" height="1.2em" width="1.2em" preserveAspectRatio="xMidYMid meet" role="img" viewBox="0 0 40 40" class="iconEdit_2LL7"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div></div></div><div class="margin-vert--lg"><nav class="pagination-nav" aria-label="Blog list page navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/community/contributing"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">ยซ How To Contribute to Submarine</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/designDocs/implementation-notes"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Implementation Notes ยป</div></a></div></nav></div></div></div><div class="col col--3"><div class="tableOfContents_2xL- thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#terminology" class="table-of-contents__link">Terminology</a></li><li><a href="#background" class="table-of-contents__link">Background</a></li><li><a href="#machine-learning-workflows--pain-points" class="table-of-contents__link">Machine Learning Workflows &amp; Pain points</a><ul><li><a href="#pain-1-complex-workflowsteps-from-raw-data-to-model-different-tools-needed-by-different-steps-hard-to-make-changes-to-workflow-and-not-error-proof" class="table-of-contents__link">Pain #1 Complex workflow/steps from raw data to model, different tools needed by different steps, hard to make changes to workflow, and not error-proof</a></li><li><a href="#pain-2-dependencies-of-underlying-resource-management-platform" class="table-of-contents__link">Pain #2 Dependencies of underlying resource management platform</a></li><li><a href="#pain-3-data-scientist-are-forced-to-interact-with-lower-level-platform-components" class="table-of-contents__link">Pain #3 Data scientist are forced to interact with lower-level platform components</a></li><li><a href="#pain-4-comply-with-data-securitygovernance-requirements" class="table-of-contents__link">Pain #4 Comply with data security/governance requirements</a></li><li><a href="#pain-5-no-good-way-to-reduce-routine-ml-code-development" class="table-of-contents__link">Pain #5 No good way to reduce routine ML code development</a></li></ul></li><li><a href="#overview" class="table-of-contents__link">Overview</a><ul><li><a href="#a-little-bit-history" class="table-of-contents__link">A little bit history</a></li><li><a href="#why-submarine" class="table-of-contents__link">Why Submarine?</a></li></ul></li><li><a href="#requirements-and-non-requirements" class="table-of-contents__link">Requirements and non-requirements</a><ul><li><a href="#notebook" class="table-of-contents__link">Notebook</a></li><li><a href="#experiment" class="table-of-contents__link">Experiment</a></li><li><a href="#environment-profiles" class="table-of-contents__link">Environment Profiles</a></li><li><a href="#model" class="table-of-contents__link">Model</a></li><li><a href="#metrics-for-training-job-and-model" class="table-of-contents__link">Metrics for training job and model</a></li><li><a href="#deployment" class="table-of-contents__link">Deployment</a></li><li><a href="#security--access-control--user-management--quota-management" class="table-of-contents__link">Security / Access Control / User Management / Quota Management</a></li><li><a href="#dataset" class="table-of-contents__link">Dataset</a></li></ul></li><li><a href="#architecture-overview" class="table-of-contents__link">Architecture Overview</a><ul><li><a href="#architecture-diagram" class="table-of-contents__link">Architecture Diagram</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container"><div class="row footer__links"><div class="col footer__col"><h4 class="footer__title">Docs</h4><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/gettingStarted/quickstart">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">Community</h4><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow</a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">More</h4><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_31Aa"><img class="footer__logo" alt="Apache Open Source Logo" src="https://hadoop.apache.org/asf_logo_wide.png"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are
either registered trademarks or trademarks of the Apache Software Foundation in the United States and other
countries.<br> Copyright ยฉ 2022 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/styles.6ddab7ad.js"></script>
<script src="/runtime~main.943b9f3a.js"></script>
<script src="/main.92db4fb0.js"></script>
<script src="/1.dab80c77.js"></script>
<script src="/2.6a1a8326.js"></script>
<script src="/1f391b9e.f22e8160.js"></script>
<script src="/128.10c43f42.js"></script>
<script src="/58f10d9f.87a860a5.js"></script>
<script src="/17896441.be8f95bf.js"></script>
<script src="/ae045997.52baff71.js"></script>
</body>
</html>