blob: a7797458f6e0f8cf44042a8be6c09d12b4214bd4 [file] [log] [blame]
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="generator" content="Docusaurus v2.0.0-alpha.70">
<link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Submarine Blog RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Submarine Blog Atom Feed"><title data-react-helmet="true">YARN Runtime Quick Start Guide | Apache Submarine</title><meta data-react-helmet="true" name="twitter:card" content="summary_large_image"><meta data-react-helmet="true" name="docusaurus_locale" content="en"><meta data-react-helmet="true" name="docusaurus_version" content="current"><meta data-react-helmet="true" name="docusaurus_tag" content="docs-default-current"><meta data-react-helmet="true" property="og:title" content="YARN Runtime Quick Start Guide | Apache Submarine"><meta data-react-helmet="true" name="description" content="&lt;!--"><meta data-react-helmet="true" property="og:description" content="&lt;!--"><meta data-react-helmet="true" property="og:url" content="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide"><link data-react-helmet="true" rel="shortcut icon" href="/img/favicon.ico"><link data-react-helmet="true" rel="canonical" href="https://submarine.apache.org//docs/userDocs/yarn/YARNRuntimeGuide"><link rel="stylesheet" href="/styles.058db332.css">
<link rel="preload" href="/styles.d28ad9a6.js" as="script">
<link rel="preload" href="/runtime~main.b7d20d9e.js" as="script">
<link rel="preload" href="/main.4fdf81d8.js" as="script">
<link rel="preload" href="/1.ecdfe063.js" as="script">
<link rel="preload" href="/2.ff74b3cd.js" as="script">
<link rel="preload" href="/1be78505.ae15da12.js" as="script">
<link rel="preload" href="/c4f5d8e4.93f03c17.js" as="script">
<link rel="preload" href="/79.c30128b5.js" as="script">
<link rel="preload" href="/78.25baa806.js" as="script">
<link rel="preload" href="/935f2afb.4fd644c9.js" as="script">
<link rel="preload" href="/17896441.bdc3ce75.js" as="script">
<link rel="preload" href="/fa111d7f.fb61093d.js" as="script">
</head>
<body>
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){var t=null;try{t=localStorage.getItem("theme")}catch(t){}return t}();t(null!==e?e:"light")}()</script><div id="__docusaurus">
<nav aria-label="Skip navigation links"><button type="button" tabindex="0" class="skipToContent_11B0">Skip to main content</button></nav><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><div aria-label="Navigation bar toggle" class="navbar__toggle" role="button" tabindex="0"><svg aria-label="Menu" width="30" height="30" viewBox="0 0 30 30" role="img" focusable="false"><title>Menu</title><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></div><a class="navbar__brand" href="/"><img src="https://github.com/apache/submarine/blob/master/website/docs/assets/128-black.png?raw=true" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="https://github.com/apache/submarine/blob/master/website/docs/assets/128-black.png?raw=true" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a><a class="navbar__item navbar__link" href="/docs/">Docs</a><a class="navbar__item navbar__link" href="/docs/api/environment">API</a><a class="navbar__item navbar__link navbar__link--active" href="/docs/download">Download</a></div><div class="navbar__items navbar__items--right"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a class="navbar__item navbar__link">Apache</a><ul class="dropdown__menu"><li><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache Software Foundation</a></li><li><a href="http://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Apache License</a></li><li><a href="http://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship</a></li><li><a href="http://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks</a></li></ul></div><div class="react-toggle react-toggle--disabled displayOnlyInLargeViewport_2N3Q"><div class="react-toggle-track"><div class="react-toggle-track-check"><span class="toggle_3NWk">🌜</span></div><div class="react-toggle-track-x"><span class="toggle_3NWk">🌞</span></div></div><div class="react-toggle-thumb"></div><input type="checkbox" disabled="" aria-label="Dark mode toggle" class="react-toggle-screenreader-only"></div><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input type="search" id="search_input_react" placeholder="Search" aria-label="Search" class="navbar__search-input search-bar"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div><div class="navbar-sidebar"><div class="navbar-sidebar__brand"><a class="navbar__brand" href="/"><img src="https://github.com/apache/submarine/blob/master/website/docs/assets/128-black.png?raw=true" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--light_3CMI navbar__logo"><img src="https://github.com/apache/submarine/blob/master/website/docs/assets/128-black.png?raw=true" alt="Apache Submarine Site Logo" class="themedImage_YANc themedImage--dark_3ARp navbar__logo"><strong class="navbar__title">Apache Submarine</strong></a></div><div class="navbar-sidebar__items"><div class="menu"><ul class="menu__list"><li class="menu__list-item"><a class="menu__link" href="/docs/">Docs</a></li><li class="menu__list-item"><a class="menu__link" href="/docs/api/environment">API</a></li><li class="menu__list-item"><a class="menu__link navbar__link--active" href="/docs/download">Download</a></li><li class="menu__list-item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="menu__link">GitHub</a></li><li class="menu__list-item menu__list-item--collapsed"><a role="button" class="menu__link menu__link--sublist">Apache</a><ul class="menu__list"><li class="menu__list-item"><a href="http://www.apache.org/foundation/how-it-works.html" target="_blank" rel="noopener noreferrer" class="menu__link">Apache Software Foundation</a></li><li class="menu__list-item"><a href="http://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="menu__link">Apache License</a></li><li class="menu__list-item"><a href="http://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="menu__link">Sponsorship</a></li><li class="menu__list-item"><a href="http://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="menu__link">Thanks</a></li></ul></li></ul></div></div></div></nav><div class="main-wrapper"><div class="docPage_vMrn"><main class="docMainContainer_2iGs"><div class="container padding-vert--lg docItemWrapper_1bxp"><div class="row"><div class="col docItemCol_U38p"><div class="docItemContainer_a7m4"><article><header><h1 class="docTitle_Oumm">YARN Runtime Quick Start Guide</h1></header><div class="markdown"><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="prerequisite"></a>Prerequisite<a class="hash-link" href="#prerequisite" title="Direct link to heading">#</a></h2><p>Check out the <a href="/docs/adminDocs/yarn/README">Running Submarine on YARN</a></p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="build-your-own-docker-image"></a>Build your own Docker image<a class="hash-link" href="#build-your-own-docker-image" title="Direct link to heading">#</a></h2><p>When you follow the documents below, and want to build your own Docker image for Tensorflow/PyTorch/MXNet? Please check out <a href="/docs/userDocs/yarn/Dockerfiles">Build your Docker image</a> for more details.</p><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="launch-tensorflow-application"></a>Launch TensorFlow Application:<a class="hash-link" href="#launch-tensorflow-application" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="without-docker"></a>Without Docker<a class="hash-link" href="#without-docker" title="Direct link to heading">#</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with TensorFlow 1.13.1 installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="building-a-python-virtual-environment-with-tensorflow"></a>Building a Python virtual environment with TensorFlow<a class="hash-link" href="#building-a-python-virtual-environment-with-tensorflow" title="Direct link to heading">#</a></h3><p>TonY requires a Python virtual environment zip with TensorFlow and any needed Python libraries already installed.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"># Make sure to install using Python 3, as TensorFlow only provides Python 3 artifacts</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">pip install tensorflow==1.13.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>The above commands will produced a myvenv.zip and it will be used in below example. There&#x27;s no need to copy it to other nodes. And it is not needed when using Docker to run the job.</p><p><strong>Note:</strong> If you require a version of TensorFlow and TensorBoard prior to <code>1.13.1</code>, take a look at <a href="https://github.com/linkedin/TonY/issues/42" target="_blank" rel="noopener noreferrer">this</a> issue.</p><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="get-the-training-examples"></a>Get the training examples<a class="hash-link" href="#get-the-training-examples" title="Direct link to heading">#</a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-tensorflow</a></p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 --data_dir /tmp/data --working_dir /tmp/mode&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="with-docker"></a>With Docker<a class="hash-link" href="#with-docker" title="Direct link to heading">#</a></h3><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name tf-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework tensorflow \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image hadoopsubmarine/tf-1.8.0-cpu:0.0.1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path hdfs://pi-aw:9000/dataset/cifar-10-data \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;export CLASSPATH=\$(/hadoop-3.1.0/bin/hadoop classpath --glob) &amp;&amp; cd /test/models/tutorials/image/cifar10_estimator &amp;&amp; python cifar10_main.py --data-dir=%input_path% --job-dir=%checkpoint_path% --train-steps=10000 --eval-batch-size=16 --train-batch-size=16 --variable-strategy=CPU --num-gpus=0 --sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.0 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.0/etc/hadoop \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h4><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="notes"></a>Notes:<a class="hash-link" href="#notes" title="Direct link to heading">#</a></h4><p>1) <code>DOCKER_JAVA_HOME</code> points to JAVA_HOME inside Docker image.</p><p>2) <code>DOCKER_HADOOP_HDFS_HOME</code> points to HADOOP_HDFS_HOME inside Docker image.</p><p>We removed TonY submodule after applying <a href="https://issues.apache.org/jira/browse/SUBMARINE-371" target="_blank" rel="noopener noreferrer">SUBMARINE-371</a> and changed to use TonY dependency directly.</p><p>After Submarine v0.2.0, there is a uber jar <code>submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar</code> released together with
the <code>submarine-core-${SUBMARINE_VERSION}.jar</code>, <code>submarine-yarnservice-runtime-${SUBMARINE_VERSION}.jar</code> and <code>submarine-tony-runtime-${SUBMARINE_VERSION}.jar</code>.</p><br><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="launch-pytorch-application"></a>Launch PyTorch Application:<a class="hash-link" href="#launch-pytorch-application" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="without-docker-1"></a>Without Docker<a class="hash-link" href="#without-docker-1" title="Direct link to heading">#</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with PyTorch 0.4.0+ installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="building-a-python-virtual-environment-with-pytorch"></a>Building a Python virtual environment with PyTorch<a class="hash-link" href="#building-a-python-virtual-environment-with-pytorch" title="Direct link to heading">#</a></h3><p>TonY requires a Python virtual environment zip with PyTorch and any needed Python libraries already installed.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">pip install pytorch==0.4.0</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="get-the-training-examples-1"></a>Get the training examples<a class="hash-link" href="#get-the-training-examples-1" title="Direct link to heading">#</a></h3><p>Get mnist_distributed.py from <a href="https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch" target="_blank" rel="noopener noreferrer">https://github.com/linkedin/TonY/tree/master/tony-examples/mnist-pytorch</a></p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python mnist_distributed.py&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/mnist_distributed.py, \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,611 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for ps 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:42,612 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: ps index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000002/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,625 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 0 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000003/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2019-04-22 20:30:44,626 INFO tony.TonyClient: Tasks Status Updated: [TaskInfo] name: worker index: 1 url: http://pi-aw:8042/node/containerlogs/container_1555916523933_0030_01_000004/pi status: FINISHED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="with-docker-1"></a>With Docker<a class="hash-link" href="#with-docker-1" title="Direct link to heading">#</a></h3><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name PyTorch-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework pytorch</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image pytorch-latest-gpu:0.0.1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;cd /test/ &amp;&amp; python cifar10_tutorial.py&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_YARN_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_COMMON_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_HDFS_HOME=/hadoop-3.1.2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --env HADOOP_CONF_DIR=/hadoop-3.1.2/etc/hadoop \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="launch-mxnet-application"></a>Launch MXNet Application:<a class="hash-link" href="#launch-mxnet-application" title="Direct link to heading">#</a></h2><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="without-docker-2"></a>Without Docker<a class="hash-link" href="#without-docker-2" title="Direct link to heading">#</a></h3><p>You need:</p><ul><li>Build a Python virtual environment with MXNet installed</li><li>A cluster with Hadoop 2.9 or above.</li></ul><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="building-a-python-virtual-environment-with-mxnet"></a>Building a Python virtual environment with MXNet<a class="hash-link" href="#building-a-python-virtual-environment-with-mxnet" title="Direct link to heading">#</a></h3><p>TonY requires a Python virtual environment zip with MXNet and any needed Python libraries already installed.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">wget https://files.pythonhosted.org/packages/33/bc/fa0b5347139cd9564f0d44ebd2b147ac97c36b2403943dbee8a25fd74012/virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">tar xf virtualenv-16.0.0.tar.gz</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain" style="display:inline-block">
</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">python virtualenv-16.0.0/virtualenv.py venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">. venv/bin/activate</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">pip install mxnet==1.5.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">zip -r myvenv.zip venv</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">deactivate</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="get-the-training-examples-2"></a>Get the training examples<a class="hash-link" href="#get-the-training-examples-2" title="Direct link to heading">#</a></h3><p>Get image_classification.py from this <a href="https://github.com/apache/submarine/blob/master/dev-support/mini-submarine/submarine/image_classification.py" target="_blank" rel="noopener noreferrer">link</a></p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=3G,vcores=2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd=&quot;myvenv.zip/venv/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/myvenv.zip#archive,path-to/image_classification.py, \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><p>You should then be able to see links and status of the jobs from command line:</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,834 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: RUNNING</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for scheduler 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,839 INFO tony.TonyClient: Logs for server 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for server 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 0 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 20:23:43,840 INFO tony.TonyClient: Logs for worker 1 at: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,723 INFO tony.TonyClient: Task status updated: [TaskInfo] name: scheduler, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000002/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,736 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000005/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000004/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: worker, index: 1, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000006/pi status: SUCCEEDED</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">2020-04-16 21:02:09,737 INFO tony.TonyClient: Task status updated: [TaskInfo] name: server, index: 0, url: http://pi-aw:8042/node/containerlogs/container_1587037749540_0005_01_000003/pi status: SUCCEEDED</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h3><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="with-docker-2"></a>With Docker<a class="hash-link" href="#with-docker-2" title="Direct link to heading">#</a></h3><p>You could refer to this <a target="_blank" href="/assets/files/Dockerfile.cifar10.mx_1.5-cff207e9070bfca947922e0977637093.1">sample Dockerfile</a> for building your own Docker image.</p><div class="mdxCodeBlock_1zKU"><div class="codeBlockContent_actS"><div tabindex="0" class="prism-code language-undefined codeBlock_tuNs thin-scrollbar"><div class="codeBlockLines_3uvA" style="color:#bfc7d5;background-color:#292d3e"><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_VERSION=&lt;REPLACE_VERSION&gt;</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">SUBMARINE_HADOOP_VERSION=3.1</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">CLASSPATH=$(hadoop classpath --glob):path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain">java org.apache.submarine.client.cli.Cli job run --name MXNet-job-001 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --framework mxnet</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --docker_image &lt;your_docker_image&gt; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --input_path &quot;&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_schedulers 1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_resources memory=1G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --scheduler_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_workers 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_resources memory=2G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --worker_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --num_ps 2 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_resources memory=2G,vcores=1 \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --ps_launch_cmd &quot;/usr/bin/python image_classification.py --dataset cifar10 --model vgg11 --epochs 1 --kvstore dist_sync&quot; \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --verbose \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --insecure \</span></div><div class="token-line" style="color:#bfc7d5"><span class="token plain"> --conf tony.containers.resources=path-to/image_classification.py,path-to/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar</span></div></div></div><button type="button" aria-label="Copy code to clipboard" class="copyButton_2GIj">Copy</button></div></div><h2><a aria-hidden="true" tabindex="-1" class="anchor enhancedAnchor_prK2" id="use-yarn-service-to-run-submarine-deprecated"></a>Use YARN Service to run Submarine: Deprecated<a class="hash-link" href="#use-yarn-service-to-run-submarine-deprecated" title="Direct link to heading">#</a></h2><p>Historically, Submarine supports to use <a href="https://hadoop.apache.org/docs/r3.1.0/hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html" target="_blank" rel="noopener noreferrer">YARN Service</a> to submit deep learning jobs. Now we stop supporting it because YARN service is not actively developed by community, and extra dependencies such as RegistryDNS/ATS-v2 causes lots of issues for setup.</p><p>As of now, you can still use YARN service to run Submarine, but code will be removed in the future release. We will only support use TonY when use Submarine on YARN.</p></div></article><div class="margin-vert--xl"><div class="row"><div class="col"><a href="https://github.com/apache/submarine/edit/master/website/docs/userDocs/yarn/YARNRuntimeGuide.md" target="_blank" rel="noreferrer noopener"><svg fill="currentColor" height="1.2em" width="1.2em" preserveAspectRatio="xMidYMid meet" role="img" viewBox="0 0 40 40" class="iconEdit_2LL7"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div></div></div><div class="margin-vert--lg"><nav class="pagination-nav" aria-label="Blog list page navigation"><div class="pagination-nav__item"></div><div class="pagination-nav__item pagination-nav__item--next"></div></nav></div></div></div><div class="col col--3"><div class="tableOfContents_2xL- thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#prerequisite" class="table-of-contents__link">Prerequisite</a></li><li><a href="#build-your-own-docker-image" class="table-of-contents__link">Build your own Docker image</a></li><li><a href="#launch-tensorflow-application" class="table-of-contents__link">Launch TensorFlow Application:</a><ul><li><a href="#without-docker" class="table-of-contents__link">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-tensorflow" class="table-of-contents__link">Building a Python virtual environment with TensorFlow</a></li><li><a href="#get-the-training-examples" class="table-of-contents__link">Get the training examples</a></li><li><a href="#with-docker" class="table-of-contents__link">With Docker</a></li></ul></li><li><a href="#launch-pytorch-application" class="table-of-contents__link">Launch PyTorch Application:</a><ul><li><a href="#without-docker-1" class="table-of-contents__link">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-pytorch" class="table-of-contents__link">Building a Python virtual environment with PyTorch</a></li><li><a href="#get-the-training-examples-1" class="table-of-contents__link">Get the training examples</a></li><li><a href="#with-docker-1" class="table-of-contents__link">With Docker</a></li></ul></li><li><a href="#launch-mxnet-application" class="table-of-contents__link">Launch MXNet Application:</a><ul><li><a href="#without-docker-2" class="table-of-contents__link">Without Docker</a></li><li><a href="#building-a-python-virtual-environment-with-mxnet" class="table-of-contents__link">Building a Python virtual environment with MXNet</a></li><li><a href="#get-the-training-examples-2" class="table-of-contents__link">Get the training examples</a></li><li><a href="#with-docker-2" class="table-of-contents__link">With Docker</a></li></ul></li><li><a href="#use-yarn-service-to-run-submarine-deprecated" class="table-of-contents__link">Use YARN Service to run Submarine: Deprecated</a></li></ul></div></div></div></div></main></div></div><footer class="footer footer--dark"><div class="container"><div class="row footer__links"><div class="col footer__col"><h4 class="footer__title">Docs</h4><ul class="footer__items"><li class="footer__item"><a class="footer__link-item" href="/docs/">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/api/environment">API docs</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">Community</h4><ul class="footer__items"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/apache-submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow</a></li><li class="footer__item"><a href="https://s.apache.org/slack-invite" target="_blank" rel="noopener noreferrer" class="footer__link-item">Slack</a></li></ul></div><div class="col footer__col"><h4 class="footer__title">More</h4><ul class="footer__items"><li class="footer__item"><a href="https://medium.com/@apache.submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog</a></li><li class="footer__item"><a href="https://github.com/apache/submarine" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub</a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">Copyright © 2021 Apache Submarine is Apache2 Licensed software.</div></div></div></footer></div>
<script src="/styles.d28ad9a6.js"></script>
<script src="/runtime~main.b7d20d9e.js"></script>
<script src="/main.4fdf81d8.js"></script>
<script src="/1.ecdfe063.js"></script>
<script src="/2.ff74b3cd.js"></script>
<script src="/1be78505.ae15da12.js"></script>
<script src="/c4f5d8e4.93f03c17.js"></script>
<script src="/79.c30128b5.js"></script>
<script src="/78.25baa806.js"></script>
<script src="/935f2afb.4fd644c9.js"></script>
<script src="/17896441.bdc3ce75.js"></script>
<script src="/fa111d7f.fb61093d.js"></script>
</body>
</html>