blob: 3d339504b2138a45ebba0a21c59d7782a6c4bc2d [file] [log] [blame]
<!DOCTYPE html>
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
<html lang=" en"><head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="/versions/1.9.1/assets/img/mxnet-icon.png" rel="icon" type="image/png"><!-- Begin Jekyll SEO tag v2.6.1 -->
<title>MXNet on the Cloud | Apache MXNet</title>
<meta name="generator" content="Jekyll v3.8.6" />
<meta property="og:title" content="MXNet on the Cloud" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="A flexible and efficient library for deep learning." />
<meta property="og:description" content="A flexible and efficient library for deep learning." />
<link rel="canonical" href="" />
<meta property="og:url" content="" />
<meta property="og:site_name" content="Apache MXNet" />
<script type="application/ld+json">
{"headline":"MXNet on the Cloud","description":"A flexible and efficient library for deep learning.","url":"","@type":"WebPage","@context":""}</script>
<!-- End Jekyll SEO tag -->
<link rel="stylesheet" href="/versions/1.9.1/assets/docsearch.min.css" /><link rel="stylesheet" href="/versions/1.9.1/assets/main.css"><link type="application/atom+xml" rel="alternate" href="" title="Apache MXNet" /><!-- Matomo -->
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
(function() {
var u="";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '23']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
<!-- End Matomo Code -->
<script src="/versions/1.9.1/assets/js/jquery-3.3.1.min.js"></script>
<script src="/versions/1.9.1/assets/js/docsearch.min.js"></script><script src="/versions/1.9.1/assets/js/globalSearch.js" defer></script>
<script src="/versions/1.9.1/assets/js/clipboard.js" defer></script>
<script src="/versions/1.9.1/assets/js/copycode.js" defer></script></head>
<body><header class="site-header" role="banner">
$(document).ready(function () {
function opacity_header() {
var value = "rgba(4,140,204," + ($(window).scrollTop() / 300 + 0.4) + ")"
$('.site-header').css("background-color", value)
$(window).scroll(function () {
$('.page-link').each( function () {
if (window.location.href.includes(this.href)) {
<div class="wrapper">
<a class="site-title" rel="author" href="/versions/1.9.1/"><img
src="/versions/1.9.1/assets/img/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
<div class="gs-search-border">
<div id="gs-search-icon"></div>
<form id="global-search-form">
<input id="global-search" type="text" title="Search" placeholder="Search" />
<div id="global-search-dropdown-container">
<button class="gs-current-version btn" type="button" data-toggle="dropdown">
<span id="gs-current-version-label">1.9.1</span>
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
<ul class="gs-opt-group gs-version-dropdown">
<li class="gs-opt gs-versions">master</li>
<li class="gs-opt gs-versions active">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
<span id="global-search-close">x</span>
<div class="trigger">
<div id="global-search-mobile-border">
<div id="gs-search-icon-mobile"></div>
<input id="global-search-mobile" placeholder="Search..." type="text"/>
<div id="global-search-dropdown-container-mobile">
<button class="gs-current-version-mobile btn" type="button" data-toggle="dropdown">
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
<ul class="gs-opt-group gs-version-dropdown-mobile">
<li class="gs-opt gs-versions">master</li>
<li class="gs-opt gs-versions active">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
<a class="page-link" href="/versions/1.9.1/get_started">Get Started</a>
<a class="page-link" href="/versions/1.9.1/features">Features</a>
<a class="page-link" href="/versions/1.9.1/ecosystem">Ecosystem</a>
<a class="page-link" href="/versions/1.9.1/api">Docs & Tutorials</a>
<a class="page-link" href="/versions/1.9.1/trusted_by">Trusted By</a>
<a class="page-link" href="">GitHub</a>
<div class="dropdown" style="min-width:100px">
<span class="dropdown-header">Apache
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
<div class="dropdown-content" style="min-width:250px">
<a href="">Apache Software Foundation</a>
<a href="">Apache Incubator</a>
<a href="">License</a>
<a href="/versions/1.9.1/api/faq/security.html">Security</a>
<a href="">Privacy</a>
<a href="">Events</a>
<a href="">Sponsorship</a>
<a href="">Thanks</a>
<div class="dropdown">
<span class="dropdown-header">1.9.1
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
<div class="dropdown-content">
<a href="/">master</a>
<a class="dropdown-option-active" href="/versions/1.9.1/">1.9.1</a>
<a href="/versions/1.8.0/">1.8.0</a>
<a href="/versions/1.7.0/">1.7.0</a>
<a href="/versions/1.6.0/">1.6.0</a>
<a href="/versions/1.5.0/">1.5.0</a>
<a href="/versions/1.4.1/">1.4.1</a>
<a href="/versions/1.3.1/">1.3.1</a>
<a href="/versions/1.2.1/">1.2.1</a>
<a href="/versions/1.1.0/">1.1.0</a>
<a href="/versions/1.0.0/">1.0.0</a>
<a href="/versions/0.12.1/">0.12.1</a>
<a href="/versions/0.11.0/">0.11.0</a>
<main class="page-content" aria-label="Content">
<article class="post">
<header class="post-header wrapper">
<h1 class="post-title">MXNet on the Cloud</h1>
<div class="post-content">
<div class="wrapper">
<div class="row">
<div class="col-3 docs-side-bar">
<h3 style="text-transform: capitalize; padding-left:10px">faq</h3>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/add_op_in_backend">A Beginner's Guide to Implementing Operators in MXNet Backend</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/caffe">Convert from Caffe to MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/cloud">MXNet on the Cloud</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/distributed_training">Distributed Training in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/env_var">Environment Variables</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/float16">Float16</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/large_tensor_support">Using MXNet with Large Tensor Support</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/model_parallel_lstm">Model Parallel</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/multi_device">Data Parallelism with Multiple CPU/GPUs on MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/new_op">Create New Operators</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/nnpack">NNPACK for Multi-Core CPU Support in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/perf">Some Tips for Improving MXNet Performance</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/recordio">Create a Dataset Using RecordIO</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/s3_integration">Use data from S3 for training</a></li>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/security">MXNet Security Best Practices</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/smart_device">Deep Learning at the Edge</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/visualize_graph">Visualize Neural Networks</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/why_mxnet">Why MXNet came to be?</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- resource-p -->
<div class="col-9">
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->
<!--- -->
<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->
<h1 id="mxnet-on-the-cloud">MXNet on the Cloud</h1>
<p>Deep learning can require extremely powerful hardware, often for unpredictable durations of time.
Moreover, <em>MXNet</em> can benefit from both multiple GPUs and multiple machines.
Accordingly, cloud computing, as offered by AWS and others,
is especially well suited to training deep learning models.
Using AWS, we can rapidly fire up multiple machines with multiple GPUs each at will
and maintain the resources for precisely the amount of time needed.</p>
<h2 id="set-up-an-aws-gpu-cluster-from-scratch">Set Up an AWS GPU Cluster from Scratch</h2>
<p>In this document, we provide a step-by-step guide that will teach you
how to set up an AWS cluster with <em>MXNet</em>. We show how to:</p>
<li><a href="#use-amazon-s3-to-host-data">Use Amazon S3 to host data</a></li>
<li><a href="#set-up-an-ec2-gpu-instance">Set up an EC2 GPU instance with all dependencies installed</a></li>
<li><a href="#build-and-run-mxnet-on-a-gpu-instance">Build and run MXNet on a single computer</a></li>
<li><a href="#set-up-an-ec2-gpu-cluster-for-distributed-training">Set up an EC2 GPU cluster for distributed training</a></li>
<h3 id="use-amazon-s3-to-host-data">Use Amazon S3 to Host Data</h3>
<p>Amazon S3 provides distributed data storage which proves especially convenient for hosting large datasets.
To use S3, you need <a href="">AWS credentials</a>,
including an <code>ACCESS_KEY_ID</code> and a <code>SECRET_ACCESS_KEY</code>.</p>
<p>To use <em>MXNet</em> with S3, set the environment variables <code>AWS_ACCESS_KEY_ID</code> and
<code>AWS_SECRET_ACCESS_KEY</code> by adding the following two lines in
<code>~/.bashrc</code> (replacing the strings with the correct ones):</p>
<div class="highlight"><pre><code class="language-bash" data-lang="bash"><span class="nb">export </span><span class="nv">AWS_ACCESS_KEY_ID</span><span class="o">=</span>AKIAIOSFODNN7EXAMPLE
<span class="nb">export </span><span class="nv">AWS_SECRET_ACCESS_KEY</span><span class="o">=</span>wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
<p>There are several ways to upload data to S3. One simple way is to use
<a href="">s3cmd</a>. For example:</p>
<div class="highlight"><pre><code class="language-bash" data-lang="bash">wget
unzip <span class="o">&amp;&amp;</span> s3cmd put t<span class="k">*</span><span class="nt">-ubyte</span> s3://dmlc/mnist/
<h3 id="use-pre-installed-ec2-gpu-instance">Use Pre-installed EC2 GPU Instance</h3>
<p>The <a href="">Deep Learning AMI</a> is an Amazon Linux image
supported and maintained by Amazon Web Services for use on Amazon Elastic Compute Cloud (Amazon EC2).
It contains <a href="">MXNet-v0.9.3 tag</a> and the necessary components to get going with deep learning,
including Nvidia drivers, CUDA, cuDNN, Anaconda, Python2 and Python3.<br>
The AMI IDs are the following:</p>
<li>us-east-1: ami-e7c96af1</li>
<li>us-west-2: ami-dfb13ebf</li>
<li>eu-west-1: ami-6e5d6808</li>
<p>Now you can launch <em>MXNet</em> directly on an EC2 GPU instance.<br>
You can also use <a href="">Jupyter</a> notebook on EC2 machine.
Here is a <a href="">good tutorial</a>
on how to connect to a Jupyter notebook running on an EC2 instance.</p>
<h3 id="set-up-an-ec2-gpu-instance-from-scratch">Set Up an EC2 GPU Instance from Scratch</h3>
<p><em>MXNet</em> requires the following libraries:</p>
<li>C++ compiler with C++11 support, such as <code>gcc &gt;= 4.8</code></li>
<li><code>CUDA</code> (<code>CUDNN</code> in optional) for GPU linear algebra</li>
<li><code>BLAS</code> (cblas, open-blas, atblas, mkl, or others) for CPU linear algebra</li>
<li><code>opencv</code> for image augmentations</li>
<li><code>curl</code> and <code>openssl</code> for the ability to read/write to Amazon S3</li>
<p>Installing <code>CUDA</code> on EC2 instances requires some effort. Caffe has a good
<a href=",-CUDA-7,-cuDNN-3)">tutorial</a>
on how to install CUDA 7.0 on Ubuntu 14.04.</p>
<p><strong><em>Note:</em></strong> We tried CUDA 7.5 on Nov 7, 2015, but found it problematic.</p>
<p>You can install the rest using the package manager. For example, on Ubuntu:</p>
<div class="highlight"><pre><code class="language-" data-lang="">sudo apt-get update
sudo apt-get install -y build-essential git libcurl4-openssl-dev libatlas-base-dev libopencv-dev python-numpy
<p>The Amazon Machine Image (AMI) <a href="">ami-12fd8178</a> has the packages listed above installed.</p>
<h3 id="build-and-run-mxnet-on-a-gpu-instance">Build and Run MXNet on a GPU Instance</h3>
<p>The following commands build <em>MXNet</em> with CUDA/CUDNN, Amazon S3, and distributed
<div class="highlight"><pre><code class="language-bash" data-lang="bash">git clone <span class="nt">--recursive</span>
<span class="nb">cd </span>mxnet<span class="p">;</span> <span class="nb">cp </span>make/ <span class="nb">.</span>
<span class="nb">echo</span> <span class="s2">"USE_CUDA=1"</span> <span class="o">&gt;&gt;</span>
<span class="nb">echo</span> <span class="s2">"USE_CUDA_PATH=/usr/local/cuda"</span> <span class="o">&gt;&gt;</span>
<span class="nb">echo</span> <span class="s2">"USE_CUDNN=1"</span> <span class="o">&gt;&gt;</span>
<span class="nb">echo</span> <span class="s2">"USE_BLAS=atlas"</span> <span class="o">&gt;&gt;</span>
<span class="nb">echo</span> <span class="s2">"USE_DIST_KVSTORE = 1"</span> <span class="o">&gt;&gt;</span>
<span class="nb">echo</span> <span class="s2">"USE_S3=1"</span> <span class="o">&gt;&gt;</span>
make <span class="nt">-j</span><span class="si">$(</span><span class="nb">nproc</span><span class="si">)</span>
<p>To test whether everything is installed properly, we can try training a convolutional neural network (CNN) on the MNIST dataset using a GPU:</p>
<div class="highlight"><pre><code class="language-bash" data-lang="bash">python example/image-classification/
<p>If you&#39;ve placed the MNIST data on <code>s3://dmlc/mnist</code>, you can read the data stored on Amazon S3 directly with the following command:</p>
<div class="highlight"><pre><code class="language-bash" data-lang="bash"><span class="nb">sed</span> <span class="nt">-i</span>.bak <span class="s2">"s!data_dir = 'data'!data_dir = 's3://dmlc/mnist'!"</span> example/image-classification/
<p><strong><em>Note:</em></strong> You can use <code>sudo ln /dev/null /dev/raw1394</code> to fix the opencv error <code>libdc1394 error: Failed to initialize libdc1394</code>.</p>
<h3 id="set-up-an-ec2-gpu-cluster-for-distributed-training">Set Up an EC2 GPU Cluster for Distributed Training</h3>
<p>A cluster consists of multiple computers.
You can use one computer with <em>MXNet</em> installed as the root computer for submitting jobs,and then launch several
slave computers to run the jobs. For example, launch multiple instances using an
AMI, e.g.,
<a href="">ami-12fd8178</a>,
with dependencies installed. There are two options:</p>
<li><p>Make all slaves&#39; ports accessible (same for the root) by setting type: All TCP,
Source: Anywhere in Configure Security Group.</p></li>
<li><p>Use the same <code>pem</code> as the root computer to access all slave computers, and
then copy the <code>pem</code> file into the root computer&#39;s <code>~/.ssh/id_rsa</code>. If you do this, all slave computers can be accessed with SSH from the root.</p></li>
<p>Now, run the CNN on multiple computers. Assume that we are on a working
directory of the root computer, such as <code>~/train</code>, and MXNet is built as <code>~/mxnet</code>.</p>
<li>Pack the <em>MXNet</em> Python library into this working directory for easy
<div class="highlight"><pre><code class="language-bash" data-lang="bash"> <span class="nb">cp</span> <span class="nt">-r</span> ~/mxnet/python/mxnet <span class="nb">.</span>
<span class="nb">cp</span> ~/mxnet/lib/ mxnet/
<p>And then copy the training program:</p>
<div class="highlight"><pre><code class="language-bash" data-lang="bash"> <span class="nb">cp</span> ~/mxnet/example/image-classification/<span class="k">*</span>.py <span class="nb">.</span>
<span class="nb">cp</span> <span class="nt">-r</span> ~/mxnet/example/image-classification/common <span class="nb">.</span>
<li>Prepare a host file with all slaves private IPs. For example, <code>cat hosts</code>:</li>
<div class="highlight"><pre><code class="language-bash" data-lang="bash">
<li>Assuming that there are two computers, train the CNN using two workers:</li>
<div class="highlight"><pre><code class="language-bash" data-lang="bash"> ../../tools/ <span class="nt">-n</span> 2 <span class="nt">-H</span> hosts <span class="nt">--sync-dir</span> /tmp/mxnet python <span class="nt">--kv-store</span> dist_sync
<p><strong><em>Note:</em></strong> Sometimes the jobs linger at the slave computers even though you&#39;ve pressed <code>Ctrl-c</code>
at the root node. To terminate them, use the following command:</p>
<div class="highlight"><pre><code class="language-bash" data-lang="bash"><span class="nb">cat </span>hosts | xargs <span class="nt">-I</span><span class="o">{}</span> ssh <span class="nt">-o</span> <span class="nv">StrictHostKeyChecking</span><span class="o">=</span>no <span class="o">{}</span> <span class="s1">'uname -a; pgrep python | xargs kill -9'</span>
<p><strong><em>Note:</em></strong> The preceding example is very simple to train and therefore isn&#39;t a good
benchmark for distributed training. Consider using other <a href="">examples</a>.</p>
<h3 id="more-options">More Options</h3>
<h4 id="use-multiple-data-shards">Use Multiple Data Shards</h4>
<p>It is common to pack a dataset into multiple files, especially when working in a distributed environment.
<em>MXNet</em> supports direct loading from multiple data shards.
Put all of the record files into a folder, and point the data path to the folder.</p>
<h4 id="use-yarn-and-sge">Use YARN and SGE</h4>
<p>Although using SSH can be simple when you don&#39;t have a cluster scheduling framework,
<em>MXNet</em> is designed to be portable to various platforms.<br>
We provide scripts available in <a href="">tracker</a>
to allow running on other cluster frameworks, including Hadoop (YARN) and SGE.
We welcome contributions from the community of examples of running <em>MXNet</em> on your favorite distributed platform.</p>
</main><footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a href="/versions/1.9.1/community/contribute#mxnet-dev-communications">Mailing lists</a></li>
<li><a href="">Developer Wiki</a></li>
<li><a href="">Jira Tracker</a></li>
<li><a href="">Github Roadmap</a></li>
<li><a href="">Blog</a></li>
<li><a href="">Forum</a></li>
<li><a href="/versions/1.9.1/community/contribute">Contribute</a></li>
<div class="col-4"><ul class="social-media-list"><li><a href=""><svg class="svg-icon"><use xlink:href="/versions/1.9.1/assets/minima-social-icons.svg#github"></use></svg> <span class="username">apache/incubator-mxnet</span></a></li><li><a href=""><svg class="svg-icon"><use xlink:href="/versions/1.9.1/assets/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href=""><svg class="svg-icon"><use xlink:href="/versions/1.9.1/assets/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="/versions/1.9.1/assets/img/apache_incubator_logo.png" class="footer-logo col-2">
<div class="footer-bottom-warning col-9">
<p>Apache MXNet is an effort undergoing incubation at <a href="">The Apache Software Foundation</a> (ASF), <span
style="font-weight:bold">sponsored by the <i>Apache Incubator</i></span>. Incubation is required
of all newly accepted projects until a further review indicates that the infrastructure,
communications, and decision making process have stabilized in a manner consistent with other
successful ASF projects. While incubation status is not necessarily a reflection of the completeness
or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
</p><p>"Copyright © 2017-2022, The Apache Software Foundation Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>