blob: 0f7ba8f89444ee3c45a7dc309fe1fdb767c84a50 [file] [log] [blame]
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-alpha.70">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Submarine Blog RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Submarine Blog Atom Feed"><title data-react-helmet="true">Quickstart | Apache Submarine</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" name="docusaurus_locale" content="en"><meta data-react-helmet="true" name="docusaurus_version" content="0.6.0"><meta data-react-helmet="true" name="docusaurus_tag" content="docs-default-0.6.0"><meta data-react-helmet="true" property="og:title" content="Quickstart | Apache Submarine"><meta data-react-helmet="true" name="description" content="&lt;!--"><meta data-react-helmet="true" property="og:description" content="&lt;!--"><meta data-react-helmet="true" property="og:url" content="https://submarine.apache.org//docs/gettingStarted/quickstart"><link data-react-helmet="true" rel="shortcut icon" href="/img/submarine.ico"><link data-react-helmet="true" rel="canonical" href="https://submarine.apache.org//docs/gettingStarted/quickstart"><link rel="stylesheet" href="/styles.39775f96.css">
<link rel="preload" href="/styles.f6b0c2f2.js" as="script">
<link rel="preload" href="/runtime~main.13a9404d.js" as="script">
<link rel="preload" href="/main.1c145c17.js" as="script">
<link rel="preload" href="/1.d23d1451.js" as="script">
<link rel="preload" href="/2.45bcb8a0.js" as="script">
<link rel="preload" href="/1f391b9e.785b37ba.js" as="script">
<link rel="preload" href="/127.875bba76.js" as="script">
<link rel="preload" href="/58f10d9f.e974ccf6.js" as="script">
<link rel="preload" href="/17896441.faf04472.js" as="script">
<link rel="preload" href="/a9744973.fb870be8.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<nav aria-label="Skip navigation links"><button type="button" tabindex="0" class="skipToContent_11B0">Skip to main content</button></nav><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><div aria-label="Navigation bar toggle" class="navbar__toggle" role="button" tabindex="0"><svg aria-label="Menu" width="30" height="30" viewBox="0 0 30 30" role="img" focusable="false"><title>Menu</title><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></div><a class="navbar__brand" href="/"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a><a aria-current="page" class="navbar__item navbar__link navbar__link--active navbar__link--active" href="/docs/gettingStarted/quickstart">Docs</a><a class="navbar__item navbar__link" href="/docs/api/environment">API</a><a class="navbar__item navbar__link" href="/docs/download">Download</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__item navbar__link" href="/docs/gettingStarted/quickstart">0.6.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/gettingStarted/quickstart">master ๐Ÿƒ</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/gettingStarted/quickstart">0.6.0</a></li><li><a class="dropdown__link" href="/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__item navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation</a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events</a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License</a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks</a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security</a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship</a></li></ul></div><div class="react-toggle react-toggle--disabled displayOnlyInLargeViewport_2N3Q"><div class="react-toggle-track"><div class="react-toggle-track-check"><span class="toggle_3NWk">๐ŸŒœ</span></div><div class="react-toggle-track-x"><span class="toggle_3NWk">๐ŸŒž</span></div></div><div class="react-toggle-thumb"></div><input type="checkbox" disabled="" aria-label="Dark mode toggle" class="react-toggle-screenreader-only"></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a></div><div class="navbar-sidebar__items"><div class="menu"><ul class="menu__list"><li class="menu__list-item"><a aria-current="page" class="menu__link navbar__link--active navbar__link--active" href="/docs/gettingStarted/quickstart">Docs</a></li><li class="menu__list-item"><a class="menu__link" href="/docs/api/environment">API</a></li><li class="menu__list-item"><a class="menu__link" href="/docs/download">Download</a></li><li class="menu__list-item"><a role="button" class="menu__link menu__link--sublist">Versions</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/next/gettingStarted/quickstart">master ๐Ÿƒ</a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/docs/gettingStarted/quickstart">0.6.0</a></li><li class="menu__list-item"><a class="menu__link" href="/versions">All versions</a></li></ul></li><li class="menu__list-item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="menu__link">GitHub</a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist">Apache</a><ul class="menu__list"><li class="menu__list-item"><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="menu__link">Apache Software Foundation</a></li><li class="menu__list-item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="menu__link">Events</a></li><li class="menu__list-item"><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="menu__link">Apache License</a></li><li class="menu__list-item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="menu__link">Thanks</a></li><li class="menu__list-item"><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="menu__link">Security</a></li><li class="menu__list-item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="menu__link">Sponsorship</a></li></ul></li></ul></div></div></div></nav><div class="main-wrapper"><div class="docPage_vMrn"><div class="docSidebarContainer_3Ak5" role="complementary"><div class="sidebar_3gvy"><div class="menu menu--responsive thin-scrollbar menu_1yIk"><button aria-label="Open Menu" aria-haspopup="true" class="button button--secondary button--sm menu__button" type="button"><svg aria-label="Menu" class="sidebarMenuIcon_1CUI" width="24" height="24" viewBox="0 0 30 30" role="img" focusable="false"><title>Menu</title><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><ul class="menu__list"><li class="menu__list-item"><a class="menu__link menu__link--sublist menu__link--active" href="#!">Getting Started</a><ul class="menu__list"><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active active" tabindex="0" href="/docs/gettingStarted/quickstart">Quickstart</a></li><li class="menu__list-item"><a class="menu__link" tabindex="0" href="/docs/gettingStarted/notebook">Jupyter Notebook</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">User Docs</a><ul class="menu__list"><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">API documentation</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/experiment">Experiment REST API</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/environment">Environment REST API</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/experiment-template">Experiment Template REST API</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/api/notebook">Notebook REST API</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Submarine SDK</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-sdk/experiment-client">Experiment Client</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-sdk/model-client">Model Client</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-sdk/tracking">Tracking</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Submarine Security</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-security/spark-security/README">Submarine Spark Security Plugin</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/submarine-security/spark-security/build-submarine-spark-security-plugin">Building Submarine Spark Security Plugin</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Others</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/others/mlflow">MLflow UI</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/userDocs/others/tensorboard">Tensorboard</a></li></ul></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Administrator Docs</a><ul class="menu__list"><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Submarine on Yarn</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/adminDocs/yarn/README">Running Submarine on YARN</a></li></ul></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Developer Docs</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/README">Project Architecture</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/Dependencies">Dependencies for Submarine</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/BuildFromCode">How to Build Submarine</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/Development">Development Guide</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/IntegrationTestK8s">How to Run Integration K8s Test</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/devDocs/IntegrationTestE2E">How to Run Frontend Integration Test</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Community</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/community/README">Apache Submarine Community</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/community/HowToCommit">Guide for Apache Submarine Committers</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/community/contributing">How To Contribute to Submarine</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!">Design Docs</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/architecture-and-requirements">Architecture and Requirment</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/implementation-notes">Implementation Notes</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/environments-implementation">Environments Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/experiment-implementation">Experiment Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/notebook-implementation">Notebook Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/storage-implementation">Storage Implementation</a></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">Submarine Server</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/submarine-server/architecture">Submarine Server Implementation</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/submarine-server/experimentSpec">Generic Expeiment Spec</a></li></ul></li><li class="menu__list-item menu__list-item--collapsed"><a class="menu__link menu__link--sublist" href="#!" tabindex="-1">WIP Design Docs</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/wip-designs/submarine-launcher">Submarine Launcher</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/wip-designs/submarine-clusterServer">Cluster Server Design - High-Availability</a></li><li class="menu__list-item"><a class="menu__link" tabindex="-1" href="/docs/designDocs/wip-designs/security-implementation">Security Implementation</a></li></ul></li></ul></li></ul></div></div></div><main class="docMainContainer_2iGs"><div class="container padding-vert--lg docItemWrapper_1bxp"><div class="row"><div class="col docItemCol_U38p"><div class="docItemContainer_a7m4"><article><div><span class="badge badge--secondary">Version: 0.6.0</span></div><header><h1 class="docTitle_Oumm">Quickstart</h1></header><div class="markdown"><p>This document gives you a quick view on the basic usage of Submarine platform. You can finish each step of ML model lifecycle on the platform without messing up with the troublesome environment problems.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="installation"></a>Installation<a class="hash-link" href="#installation" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="prepare-a-kubernetes-cluster"></a>Prepare a Kubernetes cluster<a class="hash-link" href="#prepare-a-kubernetes-cluster" title="Direct link to heading">#</a></h3><ol><li>Prerequisite</li></ol><ul><li>Check <a href="/docs/devDocs/Dependencies">dependency page</a> for the compatible version</li><li><a href="https://kubernetes.io/docs/tasks/tools/install-kubectl/" target="_blank" rel="noopener noreferrer">kubectl</a></li><li><a href="https://helm.sh/docs/intro/install/" target="_blank" rel="noopener noreferrer">helm</a> (Helm v3 is minimum requirement.)</li><li><a href="https://minikube.sigs.k8s.io/docs/start/" target="_blank" rel="noopener noreferrer">minikube</a>.</li></ul><ol start="2"><li>Start minikube cluster</li></ol><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">$ minikube start --vm-driver=docker --cpus 8 --memory 4096 --kubernetes-version v1.15.11</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="launch-submarine-in-the-cluster"></a>Launch submarine in the cluster<a class="hash-link" href="#launch-submarine-in-the-cluster" title="Direct link to heading">#</a></h3><ol><li>Clone the project</li></ol><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">$ git clone https://github.com/apache/submarine.git</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><ol start="2"><li>Install the resources by helm chart</li></ol><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">$ cd submarine</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">$ helm install submarine ./helm-charts/submarine</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="ensure-submarine-is-ready"></a>Ensure submarine is ready<a class="hash-link" href="#ensure-submarine-is-ready" title="Direct link to heading">#</a></h3><ol><li>Use kubectl to query the status of pods</li></ol><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">$ kubectl get pods</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><ol start="2"><li>Make sure each pod is <code>Running</code></li></ol><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">NAME READY STATUS RESTARTS AGE</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">notebook-controller-deployment-5d4f5f874c-vwds8 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">pytorch-operator-844c866d54-q5ztd 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">submarine-database-674987ff7d-r8zqs 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">submarine-minio-5fdd957785-xd987 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">submarine-mlflow-76bbf5c7b-g2ntd 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">submarine-server-66f7b8658b-sfmv8 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">submarine-tensorboard-6c44944dfb-tvbr9 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">submarine-traefik-7cbcfd4bd9-4bczn 1/1 Running 0 3h33m</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">tf-job-operator-6bb69fd44-mc8ww 1/1 Running 0 3h33m</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="connect-to-workbench"></a>Connect to workbench<a class="hash-link" href="#connect-to-workbench" title="Direct link to heading">#</a></h3><ol><li>Port-forwarding</li></ol><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain"># using port-forwarding</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">$ kubectl port-forward --address 0.0.0.0 service/submarine-traefik 32080:80</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><ol start="2"><li>Open <code>http://0.0.0.0:32080</code></li></ol><p><img src="/assets/images/quickstart-worbench-0d8c2f6217f22460d4cf8e9b05d06f6b.png"></p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="example-submit-a-mnist-distributed-example"></a>Example: Submit a mnist distributed example<a class="hash-link" href="#example-submit-a-mnist-distributed-example" title="Direct link to heading">#</a></h2><p>We put the code of this example <a href="https://github.com/apache/submarine/tree/master/dev-support/examples/quickstart" target="_blank" rel="noopener noreferrer">here</a>. <code>train.py</code> is our training script, and <code>build.sh</code> is the script to build a docker image.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="1-write-a-python-script-for-distributed-training"></a>1. Write a python script for distributed training<a class="hash-link" href="#1-write-a-python-script-for-distributed-training" title="Direct link to heading">#</a></h3><p>Take a simple mnist tensorflow script as an example. We choose <code>MultiWorkerMirroredStrategy</code> as our distributed strategy.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-python codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token triple-quoted-string string" style="color:rgb(195, 232, 141)">&quot;&quot;&quot;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token triple-quoted-string string" style="color:rgb(195, 232, 141)">./dev-support/examples/quickstart/train.py</span></div><div class="token-line" style="color:#bfc7d5"><span class="token triple-quoted-string string" style="color:rgb(195, 232, 141)">Reference: https://github.com/kubeflow/tf-operator/blob/master/examples/v1/distribution_strategy/keras-API/multi_worker_strategy-with-keras.py</span></div><div class="token-line" style="color:#bfc7d5"><span class="token triple-quoted-string string" style="color:rgb(195, 232, 141)">&quot;&quot;&quot;</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> tensorflow_datasets </span><span class="token keyword" style="font-style:italic">as</span><span class="token plain"> tfds</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> tensorflow </span><span class="token keyword" style="font-style:italic">as</span><span class="token plain"> tf</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">from</span><span class="token plain"> tensorflow</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">keras </span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> models</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">from</span><span class="token plain"> submarine </span><span class="token keyword" style="font-style:italic">import</span><span class="token plain"> ModelsClient</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">make_datasets_unbatched</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> BUFFER_SIZE </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">10000</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Scaling MNIST data from (0, 255] to (0., 1.]</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">scale</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">image</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> label</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> image </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> tf</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">cast</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">image</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> tf</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">float32</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> image </span><span class="token operator" style="color:rgb(137, 221, 255)">/=</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">255</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">return</span><span class="token plain"> image</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> label</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> datasets</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> _ </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> tfds</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">load</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">name</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;mnist&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> with_info</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token boolean" style="color:rgb(255, 88, 116)">True</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> as_supervised</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token boolean" style="color:rgb(255, 88, 116)">True</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">return</span><span class="token plain"> datasets</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;train&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token builtin" style="color:rgb(130, 170, 255)">map</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">scale</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">cache</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">shuffle</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">BUFFER_SIZE</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">build_and_compile_cnn_model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> models</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Sequential</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Conv2D</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">32</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> activation</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;relu&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> input_shape</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">28</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">28</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">1</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">MaxPooling2D</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">2</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">2</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Conv2D</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">64</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> activation</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;relu&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">MaxPooling2D</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">2</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">2</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Conv2D</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">64</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> activation</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;relu&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Flatten</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Dense</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">64</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> activation</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;relu&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">add</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">layers</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Dense</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token number" style="color:rgb(247, 140, 108)">10</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> activation</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;softmax&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">summary</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token builtin" style="color:rgb(130, 170, 255)">compile</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">optimizer</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;adam&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> loss</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;sparse_categorical_crossentropy&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> metrics</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;accuracy&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">return</span><span class="token plain"> model</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">main</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> strategy </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> tf</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">distribute</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">experimental</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">MultiWorkerMirroredStrategy</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> communication</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain">tf</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">distribute</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">experimental</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">CollectiveCommunication</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">AUTO</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> BATCH_SIZE_PER_REPLICA </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">4</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> BATCH_SIZE </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> BATCH_SIZE_PER_REPLICA </span><span class="token operator" style="color:rgb(137, 221, 255)">*</span><span class="token plain"> strategy</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">num_replicas_in_sync</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">with</span><span class="token plain"> strategy</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">scope</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> ds_train </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> make_datasets_unbatched</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">batch</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">BATCH_SIZE</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">repeat</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> options </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> tf</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">data</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Options</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> options</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">experimental_distribute</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">auto_shard_policy </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> tf</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">data</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">experimental</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">AutoShardPolicy</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">DATA</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> ds_train </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> ds_train</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">with_options</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">options</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># Model building/compiling need to be within `strategy.scope()`.</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> multi_worker_model </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> build_and_compile_cnn_model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">class</span><span class="token plain"> </span><span class="token class-name" style="color:rgb(255, 203, 107)">MyCallback</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">tf</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">keras</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">callbacks</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">Callback</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">on_epoch_end</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">self</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> epoch</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> logs</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token comment" style="color:rgb(105, 112, 152);font-style:italic"># monitor the loss and accuracy</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">logs</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> modelClient</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">log_metrics</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;loss&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> logs</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;loss&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&quot;accuracy&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> logs</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(195, 232, 141)">&quot;accuracy&quot;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> epoch</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> </span><span class="token keyword" style="font-style:italic">with</span><span class="token plain"> modelClient</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">start</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token keyword" style="font-style:italic">as</span><span class="token plain"> run</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> multi_worker_model</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">fit</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">ds_train</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> epochs</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">10</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> steps_per_epoch</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token number" style="color:rgb(247, 140, 108)">70</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> callbacks</span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain">MyCallback</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"></span><span class="token keyword" style="font-style:italic">if</span><span class="token plain"> __name__ </span><span class="token operator" style="color:rgb(137, 221, 255)">==</span><span class="token plain"> </span><span class="token string" style="color:rgb(195, 232, 141)">&#x27;__main__&#x27;</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> modelClient </span><span class="token operator" style="color:rgb(137, 221, 255)">=</span><span class="token plain"> ModelsClient</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> main</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="2-prepare-an-environment-compatible-with-the-training"></a>2. Prepare an environment compatible with the training<a class="hash-link" href="#2-prepare-an-environment-compatible-with-the-training" title="Direct link to heading">#</a></h3><p>Build a docker image equipped with the requirement of the environment.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-bash codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">$ ./dev-support/examples/quickstart/build.sh </span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="3-submit-the-experiment"></a>3. Submit the experiment<a class="hash-link" href="#3-submit-the-experiment" title="Direct link to heading">#</a></h3><ol><li><p>Open submarine workbench and click <code>+ New Experiment</code></p></li><li><p>Fill the form accordingly. Here we set 3 workers.</p><ol><li>Step 1
<img src="/assets/images/quickstart-submit-1-666fc27e30765ab0ddad117e3a354814.png"></li><li>Step 2
<img src="/assets/images/quickstart-submit-2-9c78114774db9e88702cc8e72722ceca.png"></li><li>Step 3
<img src="/assets/images/quickstart-submit-3-c412f456d672e509be26040750826a76.png"></li><li>The experiment is successfully submitted
<img src="/assets/images/quickstart-submit-4-00c8dfb8d7d8a1bd1fe52f43712fc4a9.png"></li></ol></li></ol><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="4-monitor-the-process-modelclient"></a>4. Monitor the process (modelClient)<a class="hash-link" href="#4-monitor-the-process-modelclient" title="Direct link to heading">#</a></h3><ol><li><p>In our code, we use <code>modelClient</code> from <code>submarine-sdk</code> to record the metrics. To see the result, click <code>MLflow UI</code> in the workbench.</p></li><li><p>To compare the metrics of each worker, you can select all workers and then click <code>compare</code></p><p><img src="/assets/images/quickstart-mlflow-055f1780671b88621e0c6324f408dbbe.png"></p><p><img src="/assets/images/quickstart-mlflow-2-d83b03b1ba4bdefc55540a54f6214a13.png"></p></li></ol><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="5-serve-the-model-in-development"></a>5. Serve the model (In development)<a class="hash-link" href="#5-serve-the-model-in-development" title="Direct link to heading">#</a></h3></div></article><div class="margin-vert--xl"><div class="row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/gettingStarted/quickstart.md" target="_blank" rel="noreferrer noopener"><svg fill="currentColor" height="1.2em" width="1.2em" preserveAspectRatio="xMidYMid meet" role="img" viewBox="0 0 40 40" class="iconEdit_2LL7"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div></div></div><div class="margin-vert--lg"><nav class="pagination-nav" aria-label="Blog list page navigation"><div class="pagination-nav__item"></div><div class="pagination-nav__item pagination-nav__item--next"><a class="pagination-nav__link" href="/docs/gettingStarted/notebook"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Jupyter Notebook ยป</div></a></div></nav></div></div></div><div class="col col--3"><div class="tableOfContents_2xL- thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#installation" class="table-of-contents__link">Installation</a><ul><li><a href="#prepare-a-kubernetes-cluster" class="table-of-contents__link">Prepare a Kubernetes cluster</a></li><li><a href="#launch-submarine-in-the-cluster" class="table-of-contents__link">Launch submarine in the cluster</a></li><li><a href="#ensure-submarine-is-ready" class="table-of-contents__link">Ensure submarine is ready</a></li><li><a href="#connect-to-workbench" class="table-of-contents__link">Connect to workbench</a></li></ul></li><li><a href="#example-submit-a-mnist-distributed-example" class="table-of-contents__link">Example: Submit a mnist distributed example</a><ul><li><a href="#1-write-a-python-script-for-distributed-training" class="table-of-contents__link">1. Write a python script for distributed training</a></li><li><a href="#2-prepare-an-environment-compatible-with-the-training" class="table-of-contents__link">2. Prepare an environment compatible with the training</a></li><li><a href="#3-submit-the-experiment" class="table-of-contents__link">3. Submit the experiment</a></li><li><a href="#4-monitor-the-process-modelclient" class="table-of-contents__link">4. Monitor the process (modelClient)</a></li><li><a href="#5-serve-the-model-in-development" class="table-of-contents__link">5. Serve the model (In development)</a></li></ul></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container"><div class="row footer__links"><div class="col footer__col"><h4 class="footer__title">Docs</h4><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/gettingStarted/quickstart">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">Community</h4><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow</a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">More</h4><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_31Aa"><img class="footer__logo" alt="Apache Open Source Logo" src="https://hadoop.apache.org/asf_logo_wide.png"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are
either registered trademarks or trademarks of the Apache Software Foundation in the United States and other
countries.<br> Copyright ยฉ 2022 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/styles.f6b0c2f2.js"></script>
<script src="/runtime~main.13a9404d.js"></script>
<script src="/main.1c145c17.js"></script>
<script src="/1.d23d1451.js"></script>
<script src="/2.45bcb8a0.js"></script>
<script src="/1f391b9e.785b37ba.js"></script>
<script src="/127.875bba76.js"></script>
<script src="/58f10d9f.e974ccf6.js"></script>
<script src="/17896441.faf04472.js"></script>
<script src="/a9744973.fb870be8.js"></script>
</body>
</html>