| <!doctype html> |
| <html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-0.6.0 plugin-docs plugin-id-default docs-doc-id-userDocs/yarn/YARNRuntimeGuide"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| <meta name="generator" content="Docusaurus v2.0.0-beta.18"> |
| <title data-rh="true">YARN Runtime Quick Start Guide | Apache Submarine</title><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="0.6.0"><meta data-rh="true" name="docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" name="docsearch:version" content="0.6.0"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" property="og:title" content="YARN Runtime Quick Start Guide | Apache Submarine"><meta data-rh="true" name="description" content="<!--"><meta data-rh="true" property="og:description" content="<!--"><link data-rh="true" rel="icon" href="/img/submarine.ico"><link data-rh="true" rel="canonical" href="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide" hreflang="en"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide" hreflang="x-default"><link rel="stylesheet" href="/assets/css/styles.80258812.css"> |
| <link rel="preload" href="/assets/js/runtime~main.c55a74e9.js" as="script"> |
| <link rel="preload" href="/assets/js/main.47923baa.js" as="script"> |
| </head> |
| <body class="navigation-with-keyboard"> |
| <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus"> |
| <div role="region"><a href="#" class="skipToContent_ZgBM">Skip to main content</a></div><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--light_TfLj"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--dark_oUvU"></div><b class="navbar__title">Apache Submarine</b></a><a class="navbar__item navbar__link" href="/docs/gettingStarted/quickstart">Docs</a><a class="navbar__item navbar__link" href="/docs/api/environment">API</a><a class="navbar__item navbar__link" href="/docs/download">Download</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link" aria-haspopup="true" aria-expanded="false" role="button" href="/docs/gettingStarted/quickstart">0.6.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/gettingStarted/quickstart">master 🏃</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/userDocs/yarn/YARNRuntimeGuide">0.6.0</a></li><li><a class="dropdown__link" href="/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="toggle_S7eR colorModeToggle_vKtC"><button class="clean-btn toggleButton_rCf9 toggleButtonDisabled_Pu9x" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_v35p"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_nQuB"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper"><div class="docPage_P2Lg"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_RiI4" type="button"></button><main class="docMainContainer_TCnq docMainContainerEnhanced_WDCb"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_DM6M"><div class="docItemContainer_vinB"><article><span class="theme-doc-version-badge badge badge--secondary">Version: 0.6.0</span><div class="tocCollapsible_jdIR theme-doc-toc-mobile tocMobile_TmEX"><button type="button" class="clean-btn tocCollapsibleButton_Fzxq">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>YARN Runtime Quick Start Guide</h1></header><h2 class="anchor anchorWithStickyNavbar_mojV" id="prerequisite">Prerequisite<a class="hash-link" href="#prerequisite" title="Direct link to heading"></a></h2><p>Check out the <a href="/docs/adminDocs/yarn/">Running Submarine on YARN</a></p><h2 class="anchor anchorWithStickyNavbar_mojV" id="build-your-own-docker-image">Build your own Docker image<a class="hash-link" href="#build-your-own-docker-image" title="Direct link to heading"></a></h2><p>When you follow the documents below, and want to build your own Docker image for Tensorflow/PyTorch/MXNet? Please check out <a href="/docs/userDocs/yarn/Dockerfiles">Build your Docker image</a> for more details.</p><h2 class="anchor anchorWithStickyNavbar_mojV" id="launch-tensorflow-application">Launch TensorFlow Application:<a class="hash-link" href="#launch-tensorflow-application" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="without-docker">Without Docker<a class="hash-link" href="#without-docker" title="Direct link to heading"></a></h3><p>You need:</p><ul><li>Build a Python virtual environment with TensorFlow 1.13.1 installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="building-a-python-virtual-environment-with-tensorflow">Building a Python virtual environment with TensorFlow<a class="hash-link" href="#building-a-python-virtual-environment-with-tensorflow" title="Direct link to heading"></a></h3><p>TonY requires a Python virtual environment zip with TensorFlow and any needed Python libraries already installed.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Make sure to install using Python 3, as TensorFlow only provides Python 3 artifacts</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">pip install tensorflow==1.13.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>The above commands will produced a myvenv.zip and it will be used in below example. There's no need to copy it to other nodes. And it is not needed when using Docker to run the job.</p><p><strong>Note:</strong> If you require a version of TensorFlow and TensorBoard prior to <code>1.13.1</code>, take a look at <a href="https://github.com/linkedin/TonY/issues/42" target="_blank" rel="noopener noreferrer">this</a> issue.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="get-the-training-examples">Get the training examples<a class="hash-link" href="#get-the-training-examples" title="Direct link to heading"></a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow</a></p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=<REPLACE_VERSION></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path "" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd "myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd "myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="with-docker">With Docker<a class="hash-link" href="#with-docker" title="Direct link to heading"></a></h3><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=<REPLACE_VERSION></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image hadoopsubmarine/tf-1.8.0-cpu:0.0.1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path hdfs://pi-aw:9000/dataset/cifar-10-data \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd "export CLASSPATH=\$(/hadoop-3.1.0/bin/hadoop classpath --glob) && cd /test/models/tutorials/image/cifar10_estimator && python cifar10_main.py --data-dir=%input_path% --job-dir=%checkpoint_path% --train-steps=10000 --eval-batch-size=16 --train-batch-size=16 --variable-strategy=CPU --num-gpus=0 --sync" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.0/etc/hadoop \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h4 class="anchor anchorWithStickyNavbar_mojV" id="notes">Notes:<a class="hash-link" href="#notes" title="Direct link to heading"></a></h4><p>1) <code>DOCKER_JAVA_HOME</code> points to JAVA_HOME inside Docker image.</p><p>2) <code>DOCKER_HADOOP_HDFS_HOME</code> points to HADOOP_HDFS_HOME inside Docker image.</p><p>We removed TonY submodule after applying <a href="https://issues.apache.org/jira/browse/SUBMARINE-371" target="_blank" rel="noopener noreferrer">SUBMARINE-371</a> and changed to use TonY dependency directly.</p><p>After Submarine v0.2.0, there is a uber jar <code>submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar</code> released together with |
| the <code>submarine-core-${SUBMARINE_VERSION}.jar</code>, <code>submarine-yarnservice-runtime-${SUBMARINE_VERSION}.jar</code> and <code>submarine-tony-runtime-${SUBMARINE_VERSION}.jar</code>.</p><br><h2 class="anchor anchorWithStickyNavbar_mojV" id="launch-pytorch-application">Launch PyTorch Application:<a class="hash-link" href="#launch-pytorch-application" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="without-docker-1">Without Docker<a class="hash-link" href="#without-docker-1" title="Direct link to heading"></a></h3><p>You need:</p><ul><li>Build a Python virtual environment with PyTorch 0.4.0+ installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="building-a-python-virtual-environment-with-pytorch">Building a Python virtual environment with PyTorch<a class="hash-link" href="#building-a-python-virtual-environment-with-pytorch" title="Direct link to heading"></a></h3><p>TonY requires a Python virtual environment zip with PyTorch and any needed Python libraries already installed.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">pip install pytorch==0.4.0</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="get-the-training-examples-1">Get the training examples<a class="hash-link" href="#get-the-training-examples-1" title="Direct link to heading"></a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch</a></p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=<REPLACE_VERSION></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd "myvenv.zip/venv/bin/python mnist_distributed.py" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd "myvenv.zip/venv/bin/python mnist_distributed.py" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py, \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="with-docker-1">With Docker<a class="hash-link" href="#with-docker-1" title="Direct link to heading"></a></h3><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=<REPLACE_VERSION></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image pytorch-latest-gpu:0.0.1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path "" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd "cd /test/ && python cifar10_tutorial.py" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.2/etc/hadoop \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h2 class="anchor anchorWithStickyNavbar_mojV" id="launch-mxnet-application">Launch MXNet Application:<a class="hash-link" href="#launch-mxnet-application" title="Direct link to heading"></a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="without-docker-2">Without Docker<a class="hash-link" href="#without-docker-2" title="Direct link to heading"></a></h3><p>You need:</p><ul><li>Build a Python virtual environment with MXNet installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="building-a-python-virtual-environment-with-mxnet">Building a Python virtual environment with MXNet<a class="hash-link" href="#building-a-python-virtual-environment-with-mxnet" title="Direct link to heading"></a></h3><p>TonY requires a Python virtual environment zip with MXNet and any needed Python libraries already installed.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">pip install mxnet==1.5.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="get-the-training-examples-2">Get the training examples<a class="hash-link" href="#get-the-training-examples-2" title="Direct link to heading"></a></h3><p>Get image_classification.py from this <a href="https://github.com/apache/submarine/blob/master/dev-support/mini-submarine/submarine/image_classification.py" target="_blank" rel="noopener noreferrer">link</a></p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=<REPLACE_VERSION></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path "" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd "myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd "myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd="myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/image_classification.py, \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for scheduler 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for server 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for server 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,723 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,736 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: SUCCEEDED</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="with-docker-2">With Docker<a class="hash-link" href="#with-docker-2" title="Direct link to heading"></a></h3><p>You could refer to this <a target="_blank" href="/assets/files/Dockerfile.cifar10.mx_1.5-cff207e9070bfca947922e0977637093.1">sample Dockerfile</a> for building your own Docker image.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=<REPLACE_VERSION></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image <your_docker_image> \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path "" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers 1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd "/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=2G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd "/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=2G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd "/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync" \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/image_classification.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h2 class="anchor anchorWithStickyNavbar_mojV" id="use-yarn-service-to-run-submarine-deprecated">Use YARN Service to run Submarine: Deprecated<a class="hash-link" href="#use-yarn-service-to-run-submarine-deprecated" title="Direct link to heading"></a></h2><p>Historically, Submarine supports to use <a href="https://hadoop.apache.org/docs/r3.1.0/hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html" target="_blank" rel="noopener noreferrer">YARN Service</a> to submit deep learning jobs. Now we stop supporting it because YARN service is not actively developed by community, and extra dependencies such as RegistryDNS/ATS-v2 causes lots of issues for setup.</p><p>As of now, you can still use YARN service to run Submarine, but code will be removed in the future release. We will only support use TonY when use Submarine on YARN.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/userDocs/yarn/YARNRuntimeGuide.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_dcUD" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_foO9"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"></div><div class="pagination-nav__item pagination-nav__item--next"></div></nav></div></div><div class="col col--3"><div class="tableOfContents_cNA8 thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#prerequisite" class="table-of-contents__link toc-highlight">Prerequisite</a></li><li><a href="#build-your-own-docker-image" class="table-of-contents__link toc-highlight">Build your own Docker image</a></li><li><a href="#launch-tensorflow-application" class="table-of-contents__link toc-highlight">Launch TensorFlow Application:</a><ul><li><a href="#without-docker" class="table-of-contents__link toc-highlight">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-tensorflow" class="table-of-contents__link toc-highlight">Building a Python virtual environment with TensorFlow</a></li><li><a href="#get-the-training-examples" class="table-of-contents__link toc-highlight">Get the training examples</a></li><li><a href="#with-docker" class="table-of-contents__link toc-highlight">With Docker</a></li></ul></li><li><a href="#launch-pytorch-application" class="table-of-contents__link toc-highlight">Launch PyTorch Application:</a><ul><li><a href="#without-docker-1" class="table-of-contents__link toc-highlight">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-pytorch" class="table-of-contents__link toc-highlight">Building a Python virtual environment with PyTorch</a></li><li><a href="#get-the-training-examples-1" class="table-of-contents__link toc-highlight">Get the training examples</a></li><li><a href="#with-docker-1" class="table-of-contents__link toc-highlight">With Docker</a></li></ul></li><li><a href="#launch-mxnet-application" class="table-of-contents__link toc-highlight">Launch MXNet Application:</a><ul><li><a href="#without-docker-2" class="table-of-contents__link toc-highlight">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-mxnet" class="table-of-contents__link toc-highlight">Building a Python virtual environment with MXNet</a></li><li><a href="#get-the-training-examples-2" class="table-of-contents__link toc-highlight">Get the training examples</a></li><li><a href="#with-docker-2" class="table-of-contents__link toc-highlight">With Docker</a></li></ul></li><li><a href="#use-yarn-service-to-run-submarine-deprecated" class="table-of-contents__link toc-highlight">Use YARN Service to run Submarine: Deprecated</a></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/gettingStarted/quickstart">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_gHmE"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--light_TfLj footer__logo"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--dark_oUvU footer__logo"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are |
| either registered trademarks or trademarks of the Apache Software Foundation in the United States and other |
| countries.<br> Copyright © 2022 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div> |
| <script src="/assets/js/runtime~main.c55a74e9.js"></script> |
| <script src="/assets/js/main.47923baa.js"></script> |
| </body> |
| </html> |