blob: bed9889f6b0dcf90283bcea75fd298228d54218a [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper docs-doc-page docs-version-0.6.0 plugin-docs plugin-id-default docs-doc-id-userDocs/yarn/YARNRuntimeGuide">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-beta.18">
<title data-rh="true">YARN Runtime Quick Start Guide | Apache Submarine</title><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://submarine.apache.org//docs/0.6.0/userDocs/yarn/YARNRuntimeGuide"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="0.6.0"><meta data-rh="true" name="docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" name="docsearch:version" content="0.6.0"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-0.6.0"><meta data-rh="true" property="og:title" content="YARN Runtime Quick Start Guide | Apache Submarine"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/submarine.ico"><link data-rh="true" rel="canonical" href="https://submarine.apache.org//docs/0.6.0/userDocs/yarn/YARNRuntimeGuide"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/0.6.0/userDocs/yarn/YARNRuntimeGuide" hreflang="en"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//zh-cn/docs/0.6.0/userDocs/yarn/YARNRuntimeGuide" hreflang="zh-cn"><link data-rh="true" rel="alternate" href="https://submarine.apache.org//docs/0.6.0/userDocs/yarn/YARNRuntimeGuide" hreflang="x-default"><link rel="stylesheet" href="/assets/css/styles.80258812.css">
<link rel="preload" href="/assets/js/runtime~main.9d177e25.js" as="script">
<link rel="preload" href="/assets/js/main.7cd2eed3.js" as="script">
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<div role="region"><a href="#" class="skipToContent_ZgBM">Skip to main content</a></div><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--light_TfLj"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_W2Cr themedImage--dark_oUvU"></div><b class="navbar__title">Apache Submarine</b></a><a class="navbar__item navbar__link" href="/docs/0.6.0/gettingStarted/quickstart">Docs</a><a class="navbar__item navbar__link" href="/docs/0.6.0/api/environment">API</a><a class="navbar__item navbar__link" href="/docs/0.6.0/download">Download</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link"><span><svg viewBox="0 0 24 24" width="20" height="20" aria-hidden="true" class="iconLanguage_dNtB"><path fill="currentColor" d="M12.87 15.07l-2.54-2.51.03-.03c1.74-1.94 2.98-4.17 3.71-6.53H17V4h-7V2H8v2H1v1.99h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12zm-2.62 7l1.62-4.33L19.12 17h-3.24z"></path></svg><span>English</span></span></a><ul class="dropdown__menu"><li><a href="/docs/0.6.0/userDocs/yarn/YARNRuntimeGuide" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active">English</a></li><li><a href="/zh-cn/docs/0.6.0/userDocs/yarn/YARNRuntimeGuide" target="_self" rel="noopener noreferrer" class="dropdown__link">δΈ­ζ–‡</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__link" aria-haspopup="true" aria-expanded="false" role="button" href="/docs/0.6.0/gettingStarted/quickstart">0.6.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/gettingStarted/quickstart">master πŸƒ</a></li><li><a class="dropdown__link" href="/docs/gettingStarted/quickstart">0.8.0</a></li><li><a class="dropdown__link" href="/docs/0.7.0/gettingStarted/quickstart">0.7.0</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/0.6.0/userDocs/yarn/YARNRuntimeGuide">0.6.0</a></li><li><a class="dropdown__link" href="/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="toggle_S7eR colorModeToggle_vKtC"><button class="clean-btn toggleButton_rCf9 toggleButtonDisabled_Pu9x" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_v35p"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_nQuB"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper"><div class="docPage_P2Lg"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_RiI4" type="button"></button><main class="docMainContainer_TCnq docMainContainerEnhanced_WDCb"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_DM6M"><div class="theme-doc-version-banner alert alert--warning margin-bottom--md" role="alert"><div>This is documentation for <!-- -->Apache Submarine<!-- --> <b>0.6.0</b>, which is no longer actively maintained.</div><div class="margin-top--md">For up-to-date documentation, see the <b><a href="/docs/gettingStarted/quickstart">latest version</a></b> (<!-- -->0.8.0<!-- -->).</div></div><div class="docItemContainer_vinB"><article><span class="theme-doc-version-badge badge badge--secondary">Version: 0.6.0</span><div class="tocCollapsible_jdIR theme-doc-toc-mobile tocMobile_TmEX"><button type="button" class="clean-btn tocCollapsibleButton_Fzxq">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>YARN Runtime Quick Start Guide</h1></header><h2 class="anchor anchorWithStickyNavbar_mojV" id="prerequisite">Prerequisite<a class="hash-link" href="#prerequisite" title="Direct link to heading">​</a></h2><p>Check out the <a href="/docs/0.6.0/adminDocs/yarn/">Running Submarine on YARN</a></p><h2 class="anchor anchorWithStickyNavbar_mojV" id="build-your-own-docker-image">Build your own Docker image<a class="hash-link" href="#build-your-own-docker-image" title="Direct link to heading">​</a></h2><p>When you follow the documents below, and want to build your own Docker image for Tensorflow/PyTorch/MXNet? Please check out <a href="/docs/0.6.0/userDocs/yarn/Dockerfiles">Build your Docker image</a> for more details.</p><h2 class="anchor anchorWithStickyNavbar_mojV" id="launch-tensorflow-application">Launch TensorFlow Application:<a class="hash-link" href="#launch-tensorflow-application" title="Direct link to heading">​</a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="without-docker">Without Docker<a class="hash-link" href="#without-docker" title="Direct link to heading">​</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with TensorFlow 1.13.1 installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="building-a-python-virtual-environment-with-tensorflow">Building a Python virtual environment with TensorFlow<a class="hash-link" href="#building-a-python-virtual-environment-with-tensorflow" title="Direct link to heading">​</a></h3><p>TonY requires a Python virtual environment zip with TensorFlow and any needed Python libraries already installed.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"># Make sure to install using Python 3, as TensorFlow only provides Python 3 artifacts</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">pip install tensorflow==1.13.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>The above commands will produced a myvenv.zip and it will be used in below example. There&#x27;s no need to copy it to other nodes. And it is not needed when using Docker to run the job.</p><p><strong>Note:</strong> If you require a version of TensorFlow and TensorBoard prior to <code>1.13.1</code>, take a look at <a href="https://github.com/linkedin/TonY/issues/42" target="_blank" rel="noopener noreferrer">this</a> issue.</p><h3 class="anchor anchorWithStickyNavbar_mojV" id="get-the-training-examples">Get the training examples<a class="hash-link" href="#get-the-training-examples" title="Direct link to heading">​</a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow</a></p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="with-docker">With Docker<a class="hash-link" href="#with-docker" title="Direct link to heading">​</a></h3><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image hadoopsubmarine/tf-1.8.0-cpu:0.0.1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path hdfs://pi-aw:9000/dataset/cifar-10-data \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;export CLASSPATH=\$(/hadoop-3.1.0/bin/hadoop classpath --glob) &amp;&amp; cd /test/models/tutorials/image/cifar10_estimator &amp;&amp; python cifar10_main.py --data-dir=%input_path% --job-dir=%checkpoint_path% --train-steps=10000 --eval-batch-size=16 --train-batch-size=16 --variable-strategy=CPU --num-gpus=0 --sync&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.0/etc/hadoop \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h4 class="anchor anchorWithStickyNavbar_mojV" id="notes">Notes:<a class="hash-link" href="#notes" title="Direct link to heading">​</a></h4><p>1) <code>DOCKER_JAVA_HOME</code> points to JAVA_HOME inside Docker image.</p><p>2) <code>DOCKER_HADOOP_HDFS_HOME</code> points to HADOOP_HDFS_HOME inside Docker image.</p><p>We removed TonY submodule after applying <a href="https://issues.apache.org/jira/browse/SUBMARINE-371" target="_blank" rel="noopener noreferrer">SUBMARINE-371</a> and changed to use TonY dependency directly.</p><p>After Submarine v0.2.0, there is a uber jar <code>submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar</code> released together with
the <code>submarine-core-${SUBMARINE_VERSION}.jar</code>, <code>submarine-yarnservice-runtime-${SUBMARINE_VERSION}.jar</code> and <code>submarine-tony-runtime-${SUBMARINE_VERSION}.jar</code>.</p><br><h2 class="anchor anchorWithStickyNavbar_mojV" id="launch-pytorch-application">Launch PyTorch Application:<a class="hash-link" href="#launch-pytorch-application" title="Direct link to heading">​</a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="without-docker-1">Without Docker<a class="hash-link" href="#without-docker-1" title="Direct link to heading">​</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with PyTorch 0.4.0+ installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="building-a-python-virtual-environment-with-pytorch">Building a Python virtual environment with PyTorch<a class="hash-link" href="#building-a-python-virtual-environment-with-pytorch" title="Direct link to heading">​</a></h3><p>TonY requires a Python virtual environment zip with PyTorch and any needed Python libraries already installed.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">pip install pytorch==0.4.0</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="get-the-training-examples-1">Get the training examples<a class="hash-link" href="#get-the-training-examples-1" title="Direct link to heading">​</a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch</a></p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py, \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="with-docker-1">With Docker<a class="hash-link" href="#with-docker-1" title="Direct link to heading">​</a></h3><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image pytorch-latest-gpu:0.0.1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;cd /test/ &amp;&amp; python cifar10_tutorial.py&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.2/etc/hadoop \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h2 class="anchor anchorWithStickyNavbar_mojV" id="launch-mxnet-application">Launch MXNet Application:<a class="hash-link" href="#launch-mxnet-application" title="Direct link to heading">​</a></h2><h3 class="anchor anchorWithStickyNavbar_mojV" id="without-docker-2">Without Docker<a class="hash-link" href="#without-docker-2" title="Direct link to heading">​</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with MXNet installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3 class="anchor anchorWithStickyNavbar_mojV" id="building-a-python-virtual-environment-with-mxnet">Building a Python virtual environment with MXNet<a class="hash-link" href="#building-a-python-virtual-environment-with-mxnet" title="Direct link to heading">​</a></h3><p>TonY requires a Python virtual environment zip with MXNet and any needed Python libraries already installed.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">pip install mxnet==1.5.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="get-the-training-examples-2">Get the training examples<a class="hash-link" href="#get-the-training-examples-2" title="Direct link to heading">​</a></h3><p>Get image_classification.py from this <a href="https://github.com/apache/submarine/blob/master/dev-support/mini-submarine/submarine/image_classification.py" target="_blank" rel="noopener noreferrer">link</a></p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd=&quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/image_classification.py, \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: RUNNING</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for scheduler 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for server 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for server 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,723 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,736 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: SUCCEEDED</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: SUCCEEDED</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h3 class="anchor anchorWithStickyNavbar_mojV" id="with-docker-2">With Docker<a class="hash-link" href="#with-docker-2" title="Direct link to heading">​</a></h3><p>You could refer to this <a target="_blank" href="/assets/files/Dockerfile.cifar10.mx_1.5-cff207e9070bfca947922e0977637093.1">sample Dockerfile</a> for building your own Docker image.</p><div class="codeBlockContainer_I0IT theme-code-block"><div class="codeBlockContent_wNvx" style="color:#bfc7d5;background-color:#292d3e"><pre tabindex="0" class="prism-code language-text codeBlock_jd64 thin-scrollbar"><code class="codeBlockLines_mRuA"><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image &lt;your_docker_image&gt; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers 1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=2G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=2G,vcores=1 \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span><br></span><span class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/image_classification.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span><br></span></code></pre><button type="button" aria-label="Copy code to clipboard" title="Copy" class="copyButton_eDfN clean-btn"><span class="copyButtonIcons_W9eQ" aria-hidden="true"><svg class="copyButtonIcon_XEyF" viewBox="0 0 24 24"><path d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg class="copyButtonSuccessIcon_i9w9" viewBox="0 0 24 24"><path d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div><h2 class="anchor anchorWithStickyNavbar_mojV" id="use-yarn-service-to-run-submarine-deprecated">Use YARN Service to run Submarine: Deprecated<a class="hash-link" href="#use-yarn-service-to-run-submarine-deprecated" title="Direct link to heading">​</a></h2><p>Historically, Submarine supports to use <a href="https://hadoop.apache.org/docs/r3.1.0/hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html" target="_blank" rel="noopener noreferrer">YARN Service</a> to submit deep learning jobs. Now we stop supporting it because YARN service is not actively developed by community, and extra dependencies such as RegistryDNS/ATS-v2 causes lots of issues for setup.</p><p>As of now, you can still use YARN service to run Submarine, but code will be removed in the future release. We will only support use TonY when use Submarine on YARN.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="theme-doc-footer-edit-meta-row row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/userDocs/yarn/YARNRuntimeGuide.md" target="_blank" rel="noreferrer noopener" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_dcUD" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_foO9"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages navigation"><div class="pagination-nav__item"></div><div class="pagination-nav__item pagination-nav__item--next"></div></nav></div></div><div class="col col--3"><div class="tableOfContents_cNA8 thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#prerequisite" class="table-of-contents__link toc-highlight">Prerequisite</a></li><li><a href="#build-your-own-docker-image" class="table-of-contents__link toc-highlight">Build your own Docker image</a></li><li><a href="#launch-tensorflow-application" class="table-of-contents__link toc-highlight">Launch TensorFlow Application:</a><ul><li><a href="#without-docker" class="table-of-contents__link toc-highlight">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-tensorflow" class="table-of-contents__link toc-highlight">Building a Python virtual environment with TensorFlow</a></li><li><a href="#get-the-training-examples" class="table-of-contents__link toc-highlight">Get the training examples</a></li><li><a href="#with-docker" class="table-of-contents__link toc-highlight">With Docker</a></li></ul></li><li><a href="#launch-pytorch-application" class="table-of-contents__link toc-highlight">Launch PyTorch Application:</a><ul><li><a href="#without-docker-1" class="table-of-contents__link toc-highlight">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-pytorch" class="table-of-contents__link toc-highlight">Building a Python virtual environment with PyTorch</a></li><li><a href="#get-the-training-examples-1" class="table-of-contents__link toc-highlight">Get the training examples</a></li><li><a href="#with-docker-1" class="table-of-contents__link toc-highlight">With Docker</a></li></ul></li><li><a href="#launch-mxnet-application" class="table-of-contents__link toc-highlight">Launch MXNet Application:</a><ul><li><a href="#without-docker-2" class="table-of-contents__link toc-highlight">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-mxnet" class="table-of-contents__link toc-highlight">Building a Python virtual environment with MXNet</a></li><li><a href="#get-the-training-examples-2" class="table-of-contents__link toc-highlight">Get the training examples</a></li><li><a href="#with-docker-2" class="table-of-contents__link toc-highlight">With Docker</a></li></ul></li><li><a href="#use-yarn-service-to-run-submarine-deprecated" class="table-of-contents__link toc-highlight">Use YARN Service to run Submarine: Deprecated</a></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/gettingStarted/quickstart">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_I5OW"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_gHmE"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--light_TfLj footer__logo"><img src="https://hadoop.apache.org/asf_logo_wide.png" alt="Apache Open Source Logo" class="themedImage_W2Cr themedImage--dark_oUvU footer__logo"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are
either registered trademarks or trademarks of the Apache Software Foundation in the United States and other
countries.<br> Copyright Β© 2023 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/assets/js/runtime~main.9d177e25.js"></script>
<script src="/assets/js/main.7cd2eed3.js"></script>
</body>
</html>