blob: b658eee80da18dea96565e1dddb274d47cf1fbe0 [file] [log] [blame]
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-alpha.70">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Submarine Blog RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Submarine Blog Atom Feed"><title data-react-helmet="true">YARN Runtime Quick Start Guide | Apache Submarine</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" name="docusaurus_locale" content="en"><meta data-react-helmet="true" name="docusaurus_version" content="0.6.0"><meta data-react-helmet="true" name="docusaurus_tag" content="docs-default-0.6.0"><meta data-react-helmet="true" property="og:title" content="YARN Runtime Quick Start Guide | Apache Submarine"><meta data-react-helmet="true" name="description" content="&lt;!--"><meta data-react-helmet="true" property="og:description" content="&lt;!--"><meta data-react-helmet="true" property="og:url" content="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide"><link data-react-helmet="true" rel="shortcut icon" href="/img/submarine.ico"><link data-react-helmet="true" rel="canonical" href="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide"><link rel="stylesheet" href="/styles.39775f96.css">
<link rel="preload" href="/styles.6ddab7ad.js" as="script">
<link rel="preload" href="/runtime~main.943b9f3a.js" as="script">
<link rel="preload" href="/main.92db4fb0.js" as="script">
<link rel="preload" href="/1.dab80c77.js" as="script">
<link rel="preload" href="/2.6a1a8326.js" as="script">
<link rel="preload" href="/1f391b9e.f22e8160.js" as="script">
<link rel="preload" href="/128.10c43f42.js" as="script">
<link rel="preload" href="/58f10d9f.87a860a5.js" as="script">
<link rel="preload" href="/17896441.be8f95bf.js" as="script">
<link rel="preload" href="/27f12fe0.7cd2b4d7.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<nav aria-label="Skip navigation links"><button type="button" tabindex="0" class="skipToContent_11B0">Skip to main content</button></nav><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><div aria-label="Navigation bar toggle" class="navbar__toggle" role="button" tabindex="0"><svg aria-label="Menu" width="30" height="30" viewBox="0 0 30 30" role="img" focusable="false"><title>Menu</title><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></div><a class="navbar__brand" href="/"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a><a class="navbar__item navbar__link" href="/docs/gettingStarted/quickstart">Docs</a><a class="navbar__item navbar__link" href="/docs/api/environment">API</a><a class="navbar__item navbar__link navbar__link--active" href="/docs/download">Download</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__item navbar__link" href="/docs/">0.6.0</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/next/">master 🏃</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/userDocs/yarn/YARNRuntimeGuide">0.6.0</a></li><li><a class="dropdown__link" href="/versions">All versions</a></li></ul></div><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__item navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation</a></li><li><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events</a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License</a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks</a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security</a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship</a></li></ul></div><div class="react-toggle react-toggle--disabled displayOnlyInLargeViewport_2N3Q"><div class="react-toggle-track"><div class="react-toggle-track-check"><span class="toggle_3NWk">🌜</span></div><div class="react-toggle-track-x"><span class="toggle_3NWk">🌞</span></div></div><div class="react-toggle-thumb"></div><input type="checkbox" disabled="" aria-label="Dark mode toggle" class="react-toggle-screenreader-only"></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="/img/icons/128.png" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a></div><div class="navbar-sidebar__items"><div class="menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/gettingStarted/quickstart">Docs</a></li><li class="menu__list-item"><a class="menu__link" href="/docs/api/environment">API</a></li><li class="menu__list-item"><a class="menu__link navbar__link--active" href="/docs/download">Download</a></li><li class="menu__list-item"><a role="button" class="menu__link menu__link--sublist">Versions</a><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/next/">master 🏃</a></li><li class="menu__list-item"><a aria-current="page" class="menu__link menu__link--active" href="/docs/userDocs/yarn/YARNRuntimeGuide">0.6.0</a></li><li class="menu__list-item"><a class="menu__link" href="/versions">All versions</a></li></ul></li><li class="menu__list-item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="menu__link">GitHub</a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist">Apache</a><ul class="menu__list"><li class="menu__list-item"><a href="https://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="menu__link">Apache Software Foundation</a></li><li class="menu__list-item"><a href="https://www.apache.org/events/current-event" target="_blank" rel="noopener noreferrer" class="menu__link">Events</a></li><li class="menu__list-item"><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="menu__link">Apache License</a></li><li class="menu__list-item"><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="menu__link">Thanks</a></li><li class="menu__list-item"><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="menu__link">Security</a></li><li class="menu__list-item"><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="menu__link">Sponsorship</a></li></ul></li></ul></div></div></div></nav><div class="main-wrapper"><div class="docPage_vMrn"><main class="docMainContainer_2iGs"><div class="container padding-vert--lg docItemWrapper_1bxp"><div class="row"><div class="col docItemCol_U38p"><div class="docItemContainer_a7m4"><article><div><span class="badge badge--secondary">Version: 0.6.0</span></div><header><h1 class="docTitle_Oumm">YARN Runtime Quick Start Guide</h1></header><div class="markdown"><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="prerequisite"></a>Prerequisite<a class="hash-link" href="#prerequisite" title="Direct link to heading">#</a></h2><p>Check out the <a href="/docs/adminDocs/yarn/README">Running Submarine on YARN</a></p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="build-your-own-docker-image"></a>Build your own Docker image<a class="hash-link" href="#build-your-own-docker-image" title="Direct link to heading">#</a></h2><p>When you follow the documents below, and want to build your own Docker image for Tensorflow/PyTorch/MXNet? Please check out <a href="/docs/userDocs/yarn/Dockerfiles">Build your Docker image</a> for more details.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="launch-tensorflow-application"></a>Launch TensorFlow Application:<a class="hash-link" href="#launch-tensorflow-application" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="without-docker"></a>Without Docker<a class="hash-link" href="#without-docker" title="Direct link to heading">#</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with TensorFlow 1.13.1 installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="building-a-python-virtual-environment-with-tensorflow"></a>Building a Python virtual environment with TensorFlow<a class="hash-link" href="#building-a-python-virtual-environment-with-tensorflow" title="Direct link to heading">#</a></h3><p>TonY requires a Python virtual environment zip with TensorFlow and any needed Python libraries already installed.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"># Make sure to install using Python 3, as TensorFlow only provides Python 3 artifacts</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">pip install tensorflow==1.13.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>The above commands will produced a myvenv.zip and it will be used in below example. There&#x27;s no need to copy it to other nodes. And it is not needed when using Docker to run the job.</p><p><strong>Note:</strong> If you require a version of TensorFlow and TensorBoard prior to <code>1.13.1</code>, take a look at <a href="https://github.com/linkedin/TonY/issues/42" target="_blank" rel="noopener noreferrer">this</a> issue.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="get-the-training-examples"></a>Get the training examples<a class="hash-link" href="#get-the-training-examples" title="Direct link to heading">#</a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow</a></p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="with-docker"></a>With Docker<a class="hash-link" href="#with-docker" title="Direct link to heading">#</a></h3><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image hadoopsubmarine/tf-1.8.0-cpu:0.0.1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path hdfs://pi-aw:9000/dataset/cifar-10-data \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;export CLASSPATH=\$(/hadoop-3.1.0/bin/hadoop classpath --glob) &amp;&amp; cd /test/models/tutorials/image/cifar10_estimator &amp;&amp; python cifar10_main.py --data-dir=%input_path% --job-dir=%checkpoint_path% --train-steps=10000 --eval-batch-size=16 --train-batch-size=16 --variable-strategy=CPU --num-gpus=0 --sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.0/etc/hadoop \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="notes"></a>Notes:<a class="hash-link" href="#notes" title="Direct link to heading">#</a></h4><p>1) <code>DOCKER_JAVA_HOME</code> points to JAVA_HOME inside Docker image.</p><p>2) <code>DOCKER_HADOOP_HDFS_HOME</code> points to HADOOP_HDFS_HOME inside Docker image.</p><p>We removed TonY submodule after applying <a href="https://issues.apache.org/jira/browse/SUBMARINE-371" target="_blank" rel="noopener noreferrer">SUBMARINE-371</a> and changed to use TonY dependency directly.</p><p>After Submarine v0.2.0, there is a uber jar <code>submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar</code> released together with
the <code>submarine-core-${SUBMARINE_VERSION}.jar</code>, <code>submarine-yarnservice-runtime-${SUBMARINE_VERSION}.jar</code> and <code>submarine-tony-runtime-${SUBMARINE_VERSION}.jar</code>.</p><br><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="launch-pytorch-application"></a>Launch PyTorch Application:<a class="hash-link" href="#launch-pytorch-application" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="without-docker-1"></a>Without Docker<a class="hash-link" href="#without-docker-1" title="Direct link to heading">#</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with PyTorch 0.4.0+ installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="building-a-python-virtual-environment-with-pytorch"></a>Building a Python virtual environment with PyTorch<a class="hash-link" href="#building-a-python-virtual-environment-with-pytorch" title="Direct link to heading">#</a></h3><p>TonY requires a Python virtual environment zip with PyTorch and any needed Python libraries already installed.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">pip install pytorch==0.4.0</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="get-the-training-examples-1"></a>Get the training examples<a class="hash-link" href="#get-the-training-examples-1" title="Direct link to heading">#</a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch</a></p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py, \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="with-docker-1"></a>With Docker<a class="hash-link" href="#with-docker-1" title="Direct link to heading">#</a></h3><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image pytorch-latest-gpu:0.0.1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;cd /test/ &amp;&amp; python cifar10_tutorial.py&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.2/etc/hadoop \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="launch-mxnet-application"></a>Launch MXNet Application:<a class="hash-link" href="#launch-mxnet-application" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="without-docker-2"></a>Without Docker<a class="hash-link" href="#without-docker-2" title="Direct link to heading">#</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with MXNet installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="building-a-python-virtual-environment-with-mxnet"></a>Building a Python virtual environment with MXNet<a class="hash-link" href="#building-a-python-virtual-environment-with-mxnet" title="Direct link to heading">#</a></h3><p>TonY requires a Python virtual environment zip with MXNet and any needed Python libraries already installed.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">pip install mxnet==1.5.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="get-the-training-examples-2"></a>Get the training examples<a class="hash-link" href="#get-the-training-examples-2" title="Direct link to heading">#</a></h3><p>Get image_classification.py from this <a href="https://github.com/apache/submarine/blob/master/dev-support/mini-submarine/submarine/image_classification.py" target="_blank" rel="noopener noreferrer">link</a></p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd=&quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/image_classification.py, \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for scheduler 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for server 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for server 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,723 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,736 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: SUCCEEDED</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="with-docker-2"></a>With Docker<a class="hash-link" href="#with-docker-2" title="Direct link to heading">#</a></h3><p>You could refer to this <a target="_blank" href="/assets/files/Dockerfile.cifar10.mx_1.5-cff207e9070bfca947922e0977637093.1">sample Dockerfile</a> for building your own Docker image.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image &lt;your_docker_image&gt; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers 1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=2G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=2G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/image_classification.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="use-yarn-service-to-run-submarine-deprecated"></a>Use YARN Service to run Submarine: Deprecated<a class="hash-link" href="#use-yarn-service-to-run-submarine-deprecated" title="Direct link to heading">#</a></h2><p>Historically, Submarine supports to use <a href="https://hadoop.apache.org/docs/r3.1.0/hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html" target="_blank" rel="noopener noreferrer">YARN Service</a> to submit deep learning jobs. Now we stop supporting it because YARN service is not actively developed by community, and extra dependencies such as RegistryDNS/ATS-v2 causes lots of issues for setup.</p><p>As of now, you can still use YARN service to run Submarine, but code will be removed in the future release. We will only support use TonY when use Submarine on YARN.</p></div></article><div class="margin-vert--xl"><div class="row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/versioned_docs/version-0.6.0/userDocs/yarn/YARNRuntimeGuide.md" target="_blank" rel="noreferrer noopener"><svg fill="currentColor" height="1.2em" width="1.2em" preserveAspectRatio="xMidYMid meet" role="img" viewBox="0 0 40 40" class="iconEdit_2LL7"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div></div></div><div class="margin-vert--lg"><nav class="pagination-nav" aria-label="Blog list page navigation"><div class="pagination-nav__item"></div><div class="pagination-nav__item pagination-nav__item--next"></div></nav></div></div></div><div class="col col--3"><div class="tableOfContents_2xL- thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#prerequisite" class="table-of-contents__link">Prerequisite</a></li><li><a href="#build-your-own-docker-image" class="table-of-contents__link">Build your own Docker image</a></li><li><a href="#launch-tensorflow-application" class="table-of-contents__link">Launch TensorFlow Application:</a><ul><li><a href="#without-docker" class="table-of-contents__link">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-tensorflow" class="table-of-contents__link">Building a Python virtual environment with TensorFlow</a></li><li><a href="#get-the-training-examples" class="table-of-contents__link">Get the training examples</a></li><li><a href="#with-docker" class="table-of-contents__link">With Docker</a></li></ul></li><li><a href="#launch-pytorch-application" class="table-of-contents__link">Launch PyTorch Application:</a><ul><li><a href="#without-docker-1" class="table-of-contents__link">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-pytorch" class="table-of-contents__link">Building a Python virtual environment with PyTorch</a></li><li><a href="#get-the-training-examples-1" class="table-of-contents__link">Get the training examples</a></li><li><a href="#with-docker-1" class="table-of-contents__link">With Docker</a></li></ul></li><li><a href="#launch-mxnet-application" class="table-of-contents__link">Launch MXNet Application:</a><ul><li><a href="#without-docker-2" class="table-of-contents__link">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-mxnet" class="table-of-contents__link">Building a Python virtual environment with MXNet</a></li><li><a href="#get-the-training-examples-2" class="table-of-contents__link">Get the training examples</a></li><li><a href="#with-docker-2" class="table-of-contents__link">With Docker</a></li></ul></li><li><a href="#use-yarn-service-to-run-submarine-deprecated" class="table-of-contents__link">Use YARN Service to run Submarine: Deprecated</a></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container"><div class="row footer__links"><div class="col footer__col"><h4 class="footer__title">Docs</h4><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/gettingStarted/quickstart">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">Community</h4><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow</a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">More</h4><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub</a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="footerLogoLink_31Aa"><img class="footer__logo" alt="Apache Open Source Logo" src="https://hadoop.apache.org/asf_logo_wide.png"></a></div><div class="footer__copyright">Apache Submarine, Submarine, Apache, the Apache feather logo, and the Apache Submarine project logo are
either registered trademarks or trademarks of the Apache Software Foundation in the United States and other
countries.<br> Copyright © 2022 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/styles.6ddab7ad.js"></script>
<script src="/runtime~main.943b9f3a.js"></script>
<script src="/main.92db4fb0.js"></script>
<script src="/1.dab80c77.js"></script>
<script src="/2.6a1a8326.js"></script>
<script src="/1f391b9e.f22e8160.js"></script>
<script src="/128.10c43f42.js"></script>
<script src="/58f10d9f.87a860a5.js"></script>
<script src="/17896441.be8f95bf.js"></script>
<script src="/27f12fe0.7cd2b4d7.js"></script>
</body>
</html>