blob: 3bd6883f897a019f0f1149195e94639611097cd1 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-0.6.0 plugin-docs plugin-id-default docs-doc-id-designDocs/architecture-and-requirements">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.18">
<title data-rh="true">Architecture and Requirment | Apache Submarine</title><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://submarine.apache.org//docs/designDocs/architecture-and-requirements"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="0.6.0"><meta data-rh="true" name="docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" name="docsearch:version" content="0.6.0"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" property="og:title" content="Architecture and Requirment | Apache Submarine"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/submarine.ico"><link data-rh="true" rel="canonical" href="https://submarine.apache.org//docs/designDocs/architecture-and-requirements"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/designDocs/architecture-and-requirements" hreflang="en"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/designDocs/architecture-and-requirements" hreflang="x-default"><link rel="stylesheet" href="/assets/css/styles.80258812.css">
<link rel="preload" href="/assets/js/runtime~main.c55a74e9.js" as="script">
<link rel="preload" href="/assets/js/main.47923baa.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region"><a href="#" class="skipToContent_ZgBM">Skip to main content</a></div><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--light_TfLj"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--dark_oUvU"></div><b class="navbar__title">Apache Submarine</b></a><a class="navbar__item navbar__link navbar__link--active" href="/docs/gettingStarted/quickstart">Docs</a><a class="navbar__item navbar__link" href="/docs/api/environment">API</a><a class="navbar__item navbar__link" href="/docs/download">Download</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link" aria-haspopup="true" aria-expanded="false" role="button" href="/docs/gettingStarted/quickstart">0.6.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/designDocs/architecture-and-requirements">master 🏃</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/designDocs/architecture-and-requirements">0.6.0</a></li><li><a class="dropdown__link" href="/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="toggle_S7eR colorModeToggle_vKtC"><button class="clean-btn toggleButton_rCf9 toggleButtonDisabled_Pu9x" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_v35p"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_nQuB"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper"><div class="docPage_P2Lg"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_RiI4" type="button"></button><aside class="theme-doc-sidebar-container docSidebarContainer_rKC_"><div class="sidebar_RiAD"><nav class="menu thin-scrollbar menu_izAj"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/gettingStarted/quickstart">Getting Started</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/userDocs/api/experiment">User Docs</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/adminDocs/yarn/">Administrator Docs</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/devDocs/">Developer Docs</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" href="/docs/community/">Community</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret menu__link--active" aria-expanded="true" href="/docs/designDocs/architecture-and-requirements">Design Docs</a></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/docs/designDocs/architecture-and-requirements">Architecture and Requirment</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/implementation-notes">Implementation Notes</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/environments-implementation">Environments Implementation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/experiment-implementation">Experiment Implementation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/notebook-implementation">Notebook Implementation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/designDocs/storage-implementation">Storage Implementation</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/designDocs/submarine-server/architecture">Submarine Server</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" aria-expanded="false" tabindex="0" href="/docs/designDocs/wip-designs/submarine-launcher">WIP Design Docs</a></div></li></ul></li></ul></nav></div></aside><main class="docMainContainer_TCnq"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_DM6M"><div class="docItemContainer_vinB"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Xlws" aria-label="breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a class="breadcrumbs__link" href="/">🏠</a></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item"><span class="breadcrumbs__link" itemprop="item name">Design Docs</span><meta itemprop="position" content="1"></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="item name">Architecture and Requirment</span><meta itemprop="position" content="2"></li></ul></nav><span class="theme-doc-version-badge badge badge--secondary">Version: 0.6.0</span><div class="tocCollapsible_jdIR theme-doc-toc-mobile tocMobile_TmEX"><button type="button" class="clean-btn tocCollapsibleButton_Fzxq">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Architecture and Requirment</h1></header><h2 class="anchor anchorWithStickyNavbar_mojV" id="terminology">Terminology<a class="hash-link" href="#terminology" title="Direct link to heading"></a></h2><table><thead><tr><th>Term</th><th>Description</th></tr></thead><tbody><tr><td>User</td><td>A single data-scientist/data-engineer. User has resource quota, credentials</td></tr><tr><td>Team</td><td>User belongs to one or more teams, teams have ACLs for artifacts sharing such as notebook content, model, etc.</td></tr><tr><td>Admin</td><td>Also called SRE, who manages user&#x27;s quotas, credentials, team, and other components.</td></tr></tbody></table><h2 class="anchor anchorWithStickyNavbar_mojV" id="background">Background<a class="hash-link" href="#background" title="Direct link to heading"></a></h2><p>Everybody talks about machine learning today, and lots of companies are trying to leverage machine learning to push the business to the next level. Nowadays, as more and more developers, infrastructure software companies coming to this field, machine learning becomes more and more achievable. </p><p>In the last decade, the software industry has built many open source tools for machine learning to solve the pain points: </p><ol><li><p>It was not easy to build machine learning algorithms manually, such as logistic regression, GBDT, and many other algorithms:
<strong>Answer to that:</strong> Industries have open sourced many algorithm libraries, tools, and even pre-trained models so that data scientists can directly reuse these building blocks to hook up to their data without knowing intricate details inside these algorithms and models. </p></li><li><p>It was not easy to achieve &quot;WYSIWYG, what you see is what you get&quot; from IDEs: not easy to get output, visualization, troubleshooting experiences at the same place.
<strong>Answer to that:</strong> Notebooks concept was added to this picture, notebook brought the experiences of interactive coding, sharing, visualization, debugging under the same user interface. There&#x27;re popular open-source notebooks like Apache Zeppelin/Jupyter.</p></li><li><p>It was not easy to manage dependencies: ML applications can run on one machine is hard to deploy on another machine because it has lots of libraries dependencies.
<strong>Answer to that:</strong> Containerization becomes popular and a standard to packaging dependencies to make it easier to &quot;build once, run anywhere&quot;. </p></li><li><p>Fragmented tools, libraries were hard for ML engineers to learn. Experiences learned in one company are not naturally migratable to another company.
<strong>Answer to that:</strong> A few dominant open-source frameworks reduced the overhead of learning too many different frameworks, concepts. Data-scientist can learn a few libraries such as Tensorflow/PyTorch, and a few high-level wrappers like Keras will be able to create your machine learning application from other open-source building blocks.</p></li><li><p>Similarly, models built by one library (such as libsvm) were hard to be integrated into machine learning pipeline since there&#x27;s no standard format.
<strong>Answer to that:</strong> Industry has built successful open-source standard machine learning frameworks such as Tensorflow/PyTorch/Keras so their format can be easily shared across. And efforts to build an even more general model format such as ONNX.</p></li><li><p>It was hard to build a data pipeline that flows/transform data from a raw data source to whatever required by ML applications.
<strong>Answer to that:</strong> Open source big data industry plays an important role in providing, simplify, unify processes and building blocks for data flows, transformations, etc.</p></li></ol><p>The machine learning industry is moving on the right track to solve major roadblocks. So what are the pain points now for companies which have machine learning needs? What can we help here? To answer this question, let&#x27;s look at machine learning workflow first. </p><h2 class="anchor anchorWithStickyNavbar_mojV" id="machine-learning-workflows--pain-points">Machine Learning Workflows &amp; Pain points<a class="hash-link" href="#machine-learning-workflows--pain-points" title="Direct link to heading"></a></h2><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">1) From different data sources such as edge, clickstream, logs, etc.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Land to data lakes </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2) From data lake, data transformation: </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Data transformations: Cleanup, remove invalid rows/columns, </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> select columns, sampling, split train/test</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> data-set, join table, etc.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Data prepared for training.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">3) From prepared data: </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Training, model hyper-parameter tuning, cross-validation, etc. </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Models saved to storage. </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">4) From saved models: </span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Model assurance, deployment, A/B testing, etc.</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> =&gt; Model deployed for online serving or offline scoring.</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>Typically data scientists responsible for item 2)-4), 1) typically handled by a different team (called Data Engineering team in many companies, some Data Engineering team also responsible for part of data transformation)</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="pain-1-complex-workflowsteps-from-raw-data-to-model-different-tools-needed-by-different-steps-hard-to-make-changes-to-workflow-and-not-error-proof">Pain <!-- -->#<!-- -->1 Complex workflow/steps from raw data to model, different tools needed by different steps, hard to make changes to workflow, and not error-proof<a class="hash-link" href="#pain-1-complex-workflowsteps-from-raw-data-to-model-different-tools-needed-by-different-steps-hard-to-make-changes-to-workflow-and-not-error-proof" title="Direct link to heading"></a></h3><p>It is a complex workflow from raw data to usable models, after talking to many different data scientists, we have learned that a typical procedure to train a new model and push to production can take months to 1-2 years. </p><p>It is also a wide skill set required by this workflow. For example, data transformation needs tools like Spark/Hive for large scale and tools like Pandas for a small scale. And model training needs to be switched between XGBoost, Tensorflow, Keras, PyTorch. Building a data pipeline requires Apache Airflow or Oozie. </p><p>Yes, there are great, standardized open-source tools built for many of such purposes. But how about changes need to be made for a particular part of the data pipeline? How about adding a few columns to the training data for experiments? How about training models, and push models to validation, A/B testing before rolling to production? All these steps need jumping between different tools, UIs, and very hard to make changes, and it is not error-proof during these procedures.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="pain-2-dependencies-of-underlying-resource-management-platform">Pain <!-- -->#<!-- -->2 Dependencies of underlying resource management platform<a class="hash-link" href="#pain-2-dependencies-of-underlying-resource-management-platform" title="Direct link to heading"></a></h3><p>To make jobs/services required by a machine learning platform to be able to run, we need an underlying resource management platform. There&#x27;re some choices of resource management platform, and they have distinct advantages and disadvantages. </p><p>For example, there&#x27;re many machine learning platform built on top of K8s. It is relatively easy to get a K8s from a cloud vendor, easy to orchestrate machine learning required services/daemons run on K8s. However, K8s doesn&#x27;t offer good support jobs like Spark/Flink/Hive. So if your company has Spark/Flink/Hive running on YARN, there&#x27;re gaps and a significant amount of work to move required jobs from YARN to K8s. Maintaining a separate K8s cluster is also overhead to Hadoop-based data infrastructure.</p><p>Similarly, if your company&#x27;s data pipelines are mostly built on top of cloud resources and SaaS offerings, asking you to install a separate YARN cluster to run a new machine learning platform doesn&#x27;t make a lot of sense.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="pain-3-data-scientist-are-forced-to-interact-with-lower-level-platform-components">Pain <!-- -->#<!-- -->3 Data scientist are forced to interact with lower-level platform components<a class="hash-link" href="#pain-3-data-scientist-are-forced-to-interact-with-lower-level-platform-components" title="Direct link to heading"></a></h3><p>In addition to the above pain, we do see Data Scientists are forced to learn underlying platform knowledge to be able to build a real-world machine learning workflow.</p><p>For most of the data scientists we talked with, they&#x27;re experts of ML algorithms/libraries, feature engineering, etc. They&#x27;re also most familiar with Python, R, and some of them understand Spark, Hive, etc. </p><p>If they&#x27;re asked to do interactions with lower-level components like fine-tuning a Spark job&#x27;s performance; or troubleshooting job failed to launch because of resource constraints; or write a K8s/YARN job spec and mount volumes, set networks properly. They will scratch their heads and typically cannot perform these operations efficiently.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="pain-4-comply-with-data-securitygovernance-requirements">Pain <!-- -->#<!-- -->4 Comply with data security/governance requirements<a class="hash-link" href="#pain-4-comply-with-data-securitygovernance-requirements" title="Direct link to heading"></a></h3><p>TODO: Add more details.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="pain-5-no-good-way-to-reduce-routine-ml-code-development">Pain <!-- -->#<!-- -->5 No good way to reduce routine ML code development<a class="hash-link" href="#pain-5-no-good-way-to-reduce-routine-ml-code-development" title="Direct link to heading"></a></h3><p>After the data is prepared, the data scientist needs to do several routine tasks to build the ML pipeline. To get a sense of the existing the data set, it usually needs a split of the data set, the statistics of data set. These tasks have a common duplicate part of code, which reduces the efficiency of data scientists.</p><p>An abstraction layer/framework to help the developer to boost ML pipeline development could be valuable. It&#x27;s better than the developer only needs to fill callback function to focus on their key logic.</p><h1>Submarine</h1><h2 class="anchor anchorWithStickyNavbar_mojV" id="overview">Overview<a class="hash-link" href="#overview" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="a-little-bit-history">A little bit history<a class="hash-link" href="#a-little-bit-history" title="Direct link to heading"></a></h3><p>Initially, Submarine is built to solve problems of running deep learning jobs like Tensorflow/PyTorch on Apache Hadoop YARN, allows admin to monitor launched deep learning jobs, and manage generated models. </p><p>It was part of YARN initially, and code resides under <code>hadoop-yarn-applications</code>. Later, the community decided to convert it to be a subproject within Hadoop (Sibling project of YARN, HDFS, etc.) because we want to support other resource management platforms like K8s. And finally, we&#x27;re reconsidering Submarine&#x27;s charter, and the Hadoop community voted that it is the time to moved Submarine to a separate Apache TLP.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="why-submarine">Why Submarine?<a class="hash-link" href="#why-submarine" title="Direct link to heading"></a></h3><p><code>ONE PLATFORM</code></p><p>Submarine is the ONE PLATFORM to allow Data Scientists to create end-to-end machine learning workflow. <code>ONE PLATFORM</code> means it supports Data Scientists and data engineers to finish their jobs on the same platform without frequently switching their toolsets. From dataset exploring data pipeline creation, model training, and tuning, and push model to production. All these steps can be completed within the <code>ONE PLATFORM</code>.</p><p><code>Resource Management Independent</code></p><p>It is also designed to be resource management independent, no matter if you have Apache Hadoop YARN, K8s, or just a container service, you will be able to run Submarine on top it.</p><h2 class="anchor anchorWithStickyNavbar_mojV" id="requirements-and-non-requirements">Requirements and non-requirements<a class="hash-link" href="#requirements-and-non-requirements" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="notebook">Notebook<a class="hash-link" href="#notebook" title="Direct link to heading"></a></h3><p>1) Users should be able to create, edit, delete a notebook. (P0)
2) Notebooks can be persisted to storage and can be recovered if failure happens. (P0)
3) Users can trace back to history versions of a notebook. (P1)
4) Notebooks can be shared with different users. (P1)
5) Users can define a list of parameters of a notebook (looks like parameters of the notebook&#x27;s main function) to allow executing a notebook like a job. (P1)
6) Different users can collaborate on the same notebook at the same time. (P2)</p><p>A running notebook instance is called notebook session (or session for short).</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="experiment">Experiment<a class="hash-link" href="#experiment" title="Direct link to heading"></a></h3><p>Experiments of Submarine is an offline task. It could be a shell command, a Python command, a Spark job, a SQL query, or even a workflow. </p><p>The primary purposes of experiments under Submarine&#x27;s context is to do training tasks, offline scoring, etc. However, experiment can be generalized to do other tasks as well.</p><p>Major requirement of experiment: </p><p>1) Experiments can be submitted from UI/CLI/SDK.
2) Experiments can be monitored/managed from UI/CLI/SDK.
3) Experiments should not bind to one resource management platform (K8s/YARN).</p><h4 class="anchor anchorWithStickyNavbar_mojV" id="type-of-experiments">Type of experiments<a class="hash-link" href="#type-of-experiments" title="Direct link to heading"></a></h4><p><img loading="lazy" src="/assets/images/experiments-7a09831687ecbc0e1dcf01b0c6f45445.png" width="946" height="734" class="img_E7b_"></p><p>There&#x27;re two types of experiments:
<code>Adhoc experiments</code>: which includes a Python/R/notebook, or even an adhoc Tensorflow/PyTorch task, etc. </p><p><code>Predefined experiment library</code>: This is specialized experiments, which including developed libraries such as CTR, BERT, etc. Users are only required to specify a few parameters such as input, output, hyper parameters, etc. Instead of worrying about where&#x27;s training script/dependencies located.</p><h4 class="anchor anchorWithStickyNavbar_mojV" id="adhoc-experiment">Adhoc experiment<a class="hash-link" href="#adhoc-experiment" title="Direct link to heading"></a></h4><p>Requirements:</p><ul><li>Allow run adhoc scripts.</li><li>Allow model engineer, data scientist to run Tensorflow/Pytorch programs on YARN/K8s/Container-cloud. </li><li>Allow jobs easy access data/models in HDFS/s3, etc. </li><li>Support run distributed Tensorflow/Pytorch jobs with simple configs.</li><li>Support run user-specified Docker images.</li><li>Support specify GPU and other resources.</li></ul><h4 class="anchor anchorWithStickyNavbar_mojV" id="predefined-experiment-library">Predefined experiment library<a class="hash-link" href="#predefined-experiment-library" title="Direct link to heading"></a></h4><p>Here&#x27;s an example of predefined experiment library to train deepfm model: </p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">{</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;input&quot;: {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;train_data&quot;: [&quot;hdfs:///user/submarine/data/tr.libsvm&quot;],</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;valid_data&quot;: [&quot;hdfs:///user/submarine/data/va.libsvm&quot;],</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;test_data&quot;: [&quot;hdfs:///user/submarine/data/te.libsvm&quot;],</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;type&quot;: &quot;libsvm&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;output&quot;: {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;save_model_dir&quot;: &quot;hdfs:///user/submarine/deepfm&quot;,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;metric&quot;: &quot;auc&quot;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> },</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;training&quot;: {</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;batch_size&quot; : 512,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;field_size&quot;: 39,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;num_epochs&quot;: 3,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> &quot;feature_size&quot;: 117581,</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> ...</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> }</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">}</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>Predefined experiment libraries can be shared across users on the same platform, users can also add new or modified predefined experiment library via UI/REST API.</p><p>We will also model AutoML, auto hyper-parameter tuning to predefined experiment library.</p><h4 class="anchor anchorWithStickyNavbar_mojV" id="pipeline">Pipeline<a class="hash-link" href="#pipeline" title="Direct link to heading"></a></h4><p>Pipeline is a special kind of experiment:</p><ul><li>A pipeline is a DAG of experiments. </li><li>Can be also treated as a special kind of experiment.</li><li>Users can submit/terminate a pipeline.</li><li>Pipeline can be created/submitted via UI/API.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="environment-profiles">Environment Profiles<a class="hash-link" href="#environment-profiles" title="Direct link to heading"></a></h3><p>Environment profiles (or environment for short) defines a set of libraries and when Docker is being used, a Docker image in order to run an experiment or a notebook. </p><p>Docker or VM image (such as AMI: Amazon Machine Images) defines the base layer of the environment. </p><p>On top of that, users can define a set of libraries (such as Python/R) to install.</p><p>Users can save different environment configs which can be also shared across the platform. Environment profiles can be used to run a notebook (e.g. by choosing different kernel from Jupyter), or an experiment. Predefined experiment library includes what environment to use so users don&#x27;t have to choose which environment to use.</p><p>Environments can be added/listed/deleted/selected through CLI/SDK.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="model">Model<a class="hash-link" href="#model" title="Direct link to heading"></a></h3><h4 class="anchor anchorWithStickyNavbar_mojV" id="model-management">Model management<a class="hash-link" href="#model-management" title="Direct link to heading"></a></h4><ul><li>Model artifacts are generated by experiments or notebook.</li><li>A model consists of artifacts from one or multiple files. </li><li>Users can choose to save, tag, version a produced model.</li><li>Once The Model is saved, Users can do the online model serving or offline scoring of the model.</li></ul><h4 class="anchor anchorWithStickyNavbar_mojV" id="model-serving">Model serving<a class="hash-link" href="#model-serving" title="Direct link to heading"></a></h4><p>After model saved, users can specify a serving script, a model and create a web service to serve the model. </p><p>We call the web service to &quot;endpoint&quot;. Users can manage (add/stop) model serving endpoints via CLI/API/UI.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="metrics-for-training-job-and-model">Metrics for training job and model<a class="hash-link" href="#metrics-for-training-job-and-model" title="Direct link to heading"></a></h3><p>Submarine-SDK provides tracking/metrics APIs, which allows developers to add tracking/metrics and view tracking/metrics from Submarine Workbench UI.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="deployment">Deployment<a class="hash-link" href="#deployment" title="Direct link to heading"></a></h3><p>Submarine Services (See architecture overview below) should be deployed easily on-prem / on-cloud. Since there&#x27;re more and more public cloud offering for compute/storage management on cloud, we need to support deploy Submarine compute-related workloads (such as notebook session, experiments, etc.) to cloud-managed clusters. </p><p>This also include Submarine may need to take input parameters from customers and create/manage clusters if needed. It is also a common requirement to use hybrid of on-prem/on-cloud clusters.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="security--access-control--user-management--quota-management">Security / Access Control / User Management / Quota Management<a class="hash-link" href="#security--access-control--user-management--quota-management" title="Direct link to heading"></a></h3><p>There&#x27;re 4 kinds of objects need access-control: </p><ul><li>Assets belong to Submarine system, which includes notebook, experiments and results, models, predefined experiment libraries, environment profiles.</li><li>Data security. (Who owns what data, and what data can be accessed by each users). </li><li>User credentials. (Such as LDAP).</li><li>Other security, such as Git repo access, etc.</li></ul><p>For the data security / user credentials / other security, it will be delegated to 3rd libraries such as Apache Ranger, IAM roles, etc. </p><p>Assets belong to Submarine system will be handled by Submarine itself.</p><p>Here&#x27;re operations which Submarine admin can do for users / teams which can be used to access Submarine&#x27;s assets. </p><p><strong>Operations for admins</strong> </p><ul><li>Admin uses &quot;User Management System&quot; to onboard new users, upload user credentials, assign resource quotas, etc. </li><li>Admins can create new users, new teams, update user/team mappings. Or remove users/teams. </li><li>Admin can set resource quotas (if different from system default), permissions, upload/update necessary credentials (like Kerberos keytab) of a user.</li><li>A DE/DS can also be an admin if the DE/DS has admin access. (Like a privileged user). This will be useful when a cluster is exclusively shared by a user or only shared by a small team.</li><li><code>Resource Quota Management System</code> helps admin to manage resources quotas of teams, organizations. Resources can be machine resources like CPU/Memory/Disk, etc. It can also include non-machine resources like $$-based budgets.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="dataset">Dataset<a class="hash-link" href="#dataset" title="Direct link to heading"></a></h3><p>There&#x27;s also need to tag dataset which will be used for training and shared across the platform by different users. </p><p>Like mentioned above, access to the actual data will be handled by 3rd party system like Apache Ranger / Hive Metastore which is out of the Submarine&#x27;s scope.</p><h2 class="anchor anchorWithStickyNavbar_mojV" id="architecture-overview">Architecture Overview<a class="hash-link" href="#architecture-overview" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="architecture-diagram">Architecture Diagram<a class="hash-link" href="#architecture-diagram" title="Direct link to heading"></a></h3><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain"> +-----------------------------------------------------------------+</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | Submarine UI / CLI / REST API / SDK |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | Mini-Submarine |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> +-----------------------------------------------------------------+</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> +--------------------Submarine Server-----------------------------+</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | +---------+ +---------+ +----------+ +----------+ +------------+|</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Data set | |Notebooks| |Experiment| |Models | |Servings ||</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | +---------+ +---------+ +----------+ +----------+ +------------+|</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> |-----------------------------------------------------------------|</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | +-----------------+ +-----------------+ +---------------------+ |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Experiment | |Compute Resource | |Other Management | |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Manager | | Manager | |Services | |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | +-----------------+ +-----------------+ +---------------------+ |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | Spark, template YARN/K8s/Docker |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | TF, PyTorch, pipeline |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> + +-----------------+ +</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | |Submarine Meta | |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | | Store | |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | +-----------------+ |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> | |</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> +-----------------------------------------------------------------+</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> (You can use http://stable.ascii-flow.appspot.com/#Draw</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> to draw such diagrams)</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p><code>Compute Resource Manager</code> Helps to manage compute resources on-prem/on-cloud, this module can also handle cluster creation / management, etc.</p><p><code>Experiment Manager</code> Work with &quot;Compute Resource Manager&quot; to submit different kinds of workloads such as (distributed) Tensorflow / Pytorch, etc.</p><p><code>Submarine SDK</code> provides Java/Python/REST API to allow DS or other engineers to integrate into Submarine services. It also includes a <code>mini-submarine</code> component that launches Submarine components from a single Docker container (or a VM image).</p><p>Details of Submarine Server design can be found at <a href="/docs/designDocs/submarine-server/architecture">submarine-server-design</a>.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/designDocs/architecture-and-requirements.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_dcUD" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_foO9"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"><a class="pagination-nav__link" href="/docs/community/contributing"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">How To Contribute to Submarine</div></a></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/designDocs/implementation-notes"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Implementation Notes</div></a></div></nav></div></div><div class="col col--3"><div class="tableOfContents_cNA8 thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#terminology" class="table-of-contents__link toc-highlight">Terminology</a></li><li><a href="#background" class="table-of-contents__link toc-highlight">Background</a></li><li><a href="#machine-learning-workflows--pain-points" class="table-of-contents__link toc-highlight">Machine Learning Workflows &amp; Pain points</a><ul><li><a href="#pain-1-complex-workflowsteps-from-raw-data-to-model-different-tools-needed-by-different-steps-hard-to-make-changes-to-workflow-and-not-error-proof" class="table-of-contents__link toc-highlight">Pain #1 Complex workflow/steps from raw data to model, different tools needed by different steps, hard to make changes to workflow, and not error-proof</a></li><li><a href="#pain-2-dependencies-of-underlying-resource-management-platform" class="table-of-contents__link toc-highlight">Pain #2 Dependencies of underlying resource management platform</a></li><li><a href="#pain-3-data-scientist-are-forced-to-interact-with-lower-level-platform-components" class="table-of-contents__link toc-highlight">Pain #3 Data scientist are forced to interact with lower-level platform components</a></li><li><a href="#pain-4-comply-with-data-securitygovernance-requirements" class="table-of-contents__link toc-highlight">Pain #4 Comply with data security/governance requirements</a></li><li><a href="#pain-5-no-good-way-to-reduce-routine-ml-code-development" class="table-of-contents__link toc-highlight">Pain #5 No good way to reduce routine ML code development</a></li></ul></li><li><a href="#overview" class="table-of-contents__link toc-highlight">Overview</a><ul><li><a href="#a-little-bit-history" class="table-of-contents__link toc-highlight">A little bit history</a></li><li><a href="#why-submarine" class="table-of-contents__link toc-highlight">Why Submarine?</a></li></ul></li><li><a href="#requirements-and-non-requirements" class="table-of-contents__link toc-highlight">Requirements and non-requirements</a><ul><li><a href="#notebook" class="table-of-contents__link toc-highlight">Notebook</a></li><li><a href="#experiment" class="table-of-contents__link toc-highlight">Experiment</a></li><li><a href="#environment-profiles" class="table-of-contents__link toc-highlight">Environment Profiles</a></li><li><a href="#model" class="table-of-contents__link toc-highlight">Model</a></li><li><a href="#metrics-for-training-job-and-model" class="table-of-contents__link toc-highlight">Metrics for training job and model</a></li><li><a href="#deployment" class="table-of-contents__link toc-highlight">Deployment</a></li><li><a href="#security--access-control--user-management--quota-management" class="table-of-contents__link toc-highlight">Security / Access Control / User Management / Quota Management</a></li><li><a href="#dataset" class="table-of-contents__link toc-highlight">Dataset</a></li></ul></li><li><a href="#architecture-overview" class="table-of-contents__link toc-highlight">Architecture Overview</a><ul><li><a href="#architecture-diagram" class="table-of-contents__link toc-highlight">Architecture Diagram</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/gettingStarted/quickstart">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_gHmE"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--light_TfLj footer__logo"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--dark_oUvU footer__logo"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are
either registered trademarks or trademarks of the Apache Software Foundation in the United States and other
countries.<br> Copyright © 2022 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.c55a74e9.js"></script>
<script src="/assets/js/main.47923baa.js"></script>
</body>
</html>