blob: 39c8d5fc9398396b5804069520ef5cb94a6c57a4 [file] [log] [blame]
<!DOCTYPE html>
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<html lang=" en"><head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="/versions/1.9.1/assets/img/mxnet-icon.png" rel="icon" type="image/png"><!-- Begin Jekyll SEO tag v2.6.1 -->
<title>Environment Variables | Apache MXNet</title>
<meta name="generator" content="Jekyll v3.8.6" />
<meta property="og:title" content="Environment Variables" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="A flexible and efficient library for deep learning." />
<meta property="og:description" content="A flexible and efficient library for deep learning." />
<link rel="canonical" href="https://mxnet.apache.org/versions/1.9.1/api/faq/env_var" />
<meta property="og:url" content="https://mxnet.apache.org/versions/1.9.1/api/faq/env_var" />
<meta property="og:site_name" content="Apache MXNet" />
<script type="application/ld+json">
{"description":"A flexible and efficient library for deep learning.","headline":"Environment Variables","@type":"WebPage","url":"https://mxnet.apache.org/versions/1.9.1/api/faq/env_var","@context":"https://schema.org"}</script>
<!-- End Jekyll SEO tag -->
<link rel="stylesheet" href="/versions/1.9.1/assets/docsearch.min.css" /><link rel="stylesheet" href="/versions/1.9.1/assets/main.css"><link type="application/atom+xml" rel="alternate" href="https://mxnet.apache.org/versions/1.9.1/feed.xml" title="Apache MXNet" /><!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '23']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
<script src="/versions/1.9.1/assets/js/jquery-3.3.1.min.js"></script>
<script src="/versions/1.9.1/assets/js/docsearch.min.js"></script><script src="/versions/1.9.1/assets/js/globalSearch.js" defer></script>
<script src="/versions/1.9.1/assets/js/clipboard.js" defer></script>
<script src="/versions/1.9.1/assets/js/copycode.js" defer></script></head>
<body><header class="site-header" role="banner">
<script>
$(document).ready(function () {
// HEADER OPACITY LOGIC
function opacity_header() {
var value = "rgba(4,140,204," + ($(window).scrollTop() / 300 + 0.4) + ")"
$('.site-header').css("background-color", value)
}
$(window).scroll(function () {
opacity_header()
})
opacity_header();
// MENU SELECTOR LOGIC
$('.page-link').each( function () {
if (window.location.href.includes(this.href)) {
$(this).addClass("page-current");
}
});
})
</script>
<div class="wrapper">
<a class="site-title" rel="author" href="/versions/1.9.1/"><img
src="/versions/1.9.1/assets/img/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
</svg>
</span>
</label>
<div class="gs-search-border">
<div id="gs-search-icon"></div>
<form id="global-search-form">
<input id="global-search" type="text" title="Search" placeholder="Search" />
<div id="global-search-dropdown-container">
<button class="gs-current-version btn" type="button" data-toggle="dropdown">
<span id="gs-current-version-label">1.9.1</span>
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
</svg>
</button>
<ul class="gs-opt-group gs-version-dropdown">
<li class="gs-opt gs-versions">master</li>
<li class="gs-opt gs-versions active">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
</ul>
</div>
<span id="global-search-close">x</span>
</form>
</div>
<div class="trigger">
<div id="global-search-mobile-border">
<div id="gs-search-icon-mobile"></div>
<input id="global-search-mobile" placeholder="Search..." type="text"/>
<div id="global-search-dropdown-container-mobile">
<button class="gs-current-version-mobile btn" type="button" data-toggle="dropdown">
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
</svg>
</button>
<ul class="gs-opt-group gs-version-dropdown-mobile">
<li class="gs-opt gs-versions">master</li>
<li class="gs-opt gs-versions active">1.9.1</li>
<li class="gs-opt gs-versions">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
</ul>
</div>
</div>
<a class="page-link" href="/versions/1.9.1/get_started">Get Started</a>
<a class="page-link" href="/versions/1.9.1/features">Features</a>
<a class="page-link" href="/versions/1.9.1/ecosystem">Ecosystem</a>
<a class="page-link" href="/versions/1.9.1/api">Docs & Tutorials</a>
<a class="page-link" href="/versions/1.9.1/trusted_by">Trusted By</a>
<a class="page-link" href="https://github.com/apache/mxnet">GitHub</a>
<div class="dropdown" style="min-width:100px">
<span class="dropdown-header">Apache
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content" style="min-width:250px">
<a href="https://www.apache.org/foundation/">Apache Software Foundation</a>
<a href="https://www.apache.org/licenses/">License</a>
<a href="/versions/1.9.1/api/faq/security.html">Security</a>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy</a>
<a href="https://www.apache.org/events/current-event">Events</a>
<a href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
<div class="dropdown">
<span class="dropdown-header">1.9.1
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content">
<a href="/">master</a>
<a class="dropdown-option-active" href="/versions/1.9.1/">1.9.1</a>
<a href="/versions/1.8.0/">1.8.0</a>
<a href="/versions/1.7.0/">1.7.0</a>
<a href="/versions/1.6.0/">1.6.0</a>
<a href="/versions/1.5.0/">1.5.0</a>
<a href="/versions/1.4.1/">1.4.1</a>
<a href="/versions/1.3.1/">1.3.1</a>
<a href="/versions/1.2.1/">1.2.1</a>
<a href="/versions/1.1.0/">1.1.0</a>
<a href="/versions/1.0.0/">1.0.0</a>
<a href="/versions/0.12.1/">0.12.1</a>
<a href="/versions/0.11.0/">0.11.0</a>
</div>
</div>
</div>
</nav>
</div>
</header>
<main class="page-content" aria-label="Content">
<script>
</script>
<article class="post">
<header class="post-header wrapper">
<h1 class="post-title">Environment Variables</h1>
<h3></h3></header>
<div class="post-content">
<div class="wrapper">
<div class="row">
<div class="col-3 docs-side-bar">
<h3 style="text-transform: capitalize; padding-left:10px">faq</h3>
<ul>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/add_op_in_backend">A Beginner's Guide to Implementing Operators in MXNet Backend</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/caffe">Convert from Caffe to MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/cloud">MXNet on the Cloud</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/distributed_training">Distributed Training in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/env_var">Environment Variables</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/float16">Float16</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/large_tensor_support">Using MXNet with Large Tensor Support</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/model_parallel_lstm">Model Parallel</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/multi_device">Data Parallelism with Multiple CPU/GPUs on MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/new_op">Create New Operators</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/nnpack">NNPACK for Multi-Core CPU Support in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/perf">Some Tips for Improving MXNet Performance</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/recordio">Create a Dataset Using RecordIO</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/s3_integration">Use data from S3 for training</a></li>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/security">MXNet Security Best Practices</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/smart_device">Deep Learning at the Edge</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/visualize_graph">Visualize Neural Networks</a></li>
<!-- page-category -->
<li><a href="/versions/1.9.1/api/faq/why_mxnet">Why MXNet came to be?</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- resource-p -->
</ul>
</div>
<div class="col-9">
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->
<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->
<h1 id="environment-variables">Environment Variables</h1>
<p>MXNet has several settings that you can change with environment variables.
Typically, you wouldn&#39;t need to change these settings, but they are listed here for reference.</p>
<p>For example, you can set these environment variables in Linux or macOS as follows:
<code>
export MXNET_GPU_WORKER_NTHREADS=3
</code></p>
<p>Or in powershell:
<code>
$env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
</code></p>
<h2 id="variables-controlling-the-execution-environment">Variables controlling the execution environment</h2>
<ul>
<li>MXNET_LIBRARY_PATH
Absolute path indicating where the mxnet dynamic library is to be located, this would be the absolute
path to <code>libmxnet.so</code> or <code>libmxnet.dll</code> depending on the platform. The logic for loading the
library is in <code>python/mxnet/libinfo.py</code></li>
</ul>
<h2 id="set-the-number-of-threads">Set the Number of Threads</h2>
<ul>
<li>MXNET_GPU_WORKER_NTHREADS
<ul>
<li>Values: Int <code>(default=2)</code></li>
<li>The maximum number of threads to use on each GPU. This parameter is used to parallelize the computation within a single GPU card.</li>
</ul></li>
<li>MXNET_GPU_COPY_NTHREADS
<ul>
<li>Values: Int <code>(default=2)</code></li>
<li>The maximum number of concurrent threads that do the memory copy job on each GPU.</li>
</ul></li>
<li>MXNET_CPU_WORKER_NTHREADS
<ul>
<li>Values: Int <code>(default=1)</code></li>
<li>The maximum number of scheduling threads on CPU. It specifies how many operators can be run in parallel. Note that most CPU operators are parallelized by OpenMP. To change the number of threads used by individual operators, please set <code>OMP_NUM_THREADS</code> instead.</li>
</ul></li>
<li>MXNET_CPU_PRIORITY_NTHREADS
<ul>
<li>Values: Int <code>(default=4)</code></li>
<li>The number of threads given to prioritized CPU jobs.</li>
</ul></li>
<li>MXNET_CPU_NNPACK_NTHREADS
<ul>
<li>Values: Int <code>(default=4)</code></li>
<li>The number of threads used for NNPACK. NNPACK package aims to provide high-performance implementations of some layers for multi-core CPUs. Checkout <a href="/versions/1.9.1/api/faq/nnpack">NNPACK</a> to know more about it.</li>
</ul></li>
<li>MXNET_MP_WORKER_NTHREADS
<ul>
<li>Values: Int <code>(default=1)</code></li>
<li>The number of scheduling threads on CPU given to multiprocess workers. Enlarge this number allows more operators to run in parallel in individual workers but please consider reducing the overall <code>num_workers</code> to avoid thread contention (not available on Windows).</li>
</ul></li>
<li>MXNET_MP_OPENCV_NUM_THREADS
<ul>
<li>Values: Int <code>(default=0)</code></li>
<li>The number of OpenCV execution threads given to multiprocess workers. OpenCV multithreading is disabled if <code>MXNET_MP_OPENCV_NUM_THREADS</code> &lt; 1 (default). Enlarge this number may boost the performance of individual workers when executing underlying OpenCV functions but please consider reducing the overall <code>num_workers</code> to avoid thread contention (not available on Windows).</li>
</ul></li>
</ul>
<h2 id="memory-options">Memory Options</h2>
<ul>
<li>MXNET_EXEC_ENABLE_INPLACE
<ul>
<li>Values: true or false <code>(default=true)</code></li>
<li>Whether to enable in-place optimization in symbolic execution. Checkout <a href="/versions/1.9.1/api/architecture/note_memory#in-place-operations">in-place optimization</a> to know more about it.</li>
</ul></li>
<li>NNVM_EXEC_MATCH_RANGE
<ul>
<li>Values: Int <code>(default=16)</code></li>
<li>The approximate matching scale in the symbolic execution memory allocator.</li>
<li>Set this to 0 if you don&#39;t want to enable memory sharing between graph nodes(for debugging purposes).</li>
<li>This variable has impact on the result of memory planning. So, MXNet sweep between [1, NNVM_EXEC_MATCH_RANGE], and selects the best value.</li>
</ul></li>
<li>MXNET_EXEC_NUM_TEMP
<ul>
<li>Values: Int <code>(default=1)</code></li>
<li>The maximum number of temporary workspaces to allocate to each device. This controls space replicas and in turn reduces the memory usage.</li>
<li>Setting this to a small number can save GPU memory. It will also likely decrease the level of parallelism, which is usually acceptable.</li>
<li>MXNet internally uses graph coloring algorithm to <a href="/versions/1.9.1/api/architecture/note_memory">optimize memory consumption</a>.</li>
<li>This parameter is also used to get number of matching colors in graph and in turn how much parallelism one can get in each GPU. Color based match usually costs more memory but also enables more parallelism.</li>
</ul></li>
<li><p>MXNET_GPU_MEM_POOL_RESERVE</p>
<ul>
<li>Values: Int <code>(default=5)</code></li>
<li>The percentage of GPU memory to reserve for things other than the GPU array, such as kernel launch or cudnn handle space.</li>
<li>If you see a strange out-of-memory error from the kernel launch, after multiple iterations, try setting this to a larger value.</li>
</ul></li>
<li><p>MXNET_GPU_MEM_POOL_TYPE</p>
<ul>
<li>Values: String <code>(default=Naive)</code></li>
<li>The type of memory pool.</li>
<li>Choices:</li>
<li>Naive: A simple memory pool that allocates memory for the exact requested size and cache memory buffers. If a buffered memory chunk matches the size of a new request, the chunk from the memory pool will be returned and reused.</li>
<li>Round: A memory pool that always rounds the requested memory size and allocates memory of the rounded size. MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF defines how to round up a memory size. Caching and allocating buffered memory works in the same way as the naive memory pool.</li>
<li>Unpooled: No memory pool is used.</li>
</ul></li>
<li><p>MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF</p>
<ul>
<li>Values: Int <code>(default=24)</code></li>
<li>The cutoff threshold that decides the rounding strategy. Let&#39;s denote the threshold as T. If the memory size is smaller than <code>2 ** T</code> (by default, it&#39;s 2 ** 24 = 16MB), it rounds to the smallest <code>2 ** n</code> that is larger than the requested memory size; if the memory size is larger than <code>2 ** T</code>, it rounds to the next k * 2 ** T.</li>
</ul></li>
<li><p>MXNET_GPU_MEM_LARGE_ALLOC_ROUND_SIZE</p>
<ul>
<li>Values: Int <code>(default=2097152)</code></li>
<li>When using the naive pool type, memory allocations larger than this threshhold are rounded up to a multiple of this value.</li>
<li>The default was chosen to minimize global memory fragmentation within the GPU driver. Set this to 1 to disable.</li>
</ul></li>
</ul>
<h2 id="engine-type">Engine Type</h2>
<ul>
<li>MXNET_ENGINE_TYPE
<ul>
<li>Values: String <code>(default=ThreadedEnginePerDevice)</code></li>
<li>The type of underlying execution engine of MXNet.</li>
<li>Choices:</li>
<li>NaiveEngine: A very simple engine that uses the master thread to do the computation synchronously. Setting this engine disables multi-threading. You can use this type for debugging in case of any error. Backtrace will give you the series of calls that lead to the error. Remember to set MXNET_ENGINE_TYPE back to empty after debugging.</li>
<li>ThreadedEngine: A threaded engine that uses a global thread pool to schedule jobs.</li>
<li>ThreadedEnginePerDevice: A threaded engine that allocates thread per GPU and executes jobs asynchronously.</li>
</ul></li>
</ul>
<h2 id="execution-options">Execution Options</h2>
<ul>
<li>MXNET_EXEC_BULK_EXEC_INFERENCE
<ul>
<li>Values: 0(false) or 1(true) <code>(default=1)</code></li>
<li>If set to <code>1</code>, during inference MXNet executes the entire computation graph in bulk mode, which reduces kernel launch gaps in between symbolic operators.</li>
</ul></li>
<li>MXNET_EXEC_BULK_EXEC_TRAIN
<ul>
<li>Values: 0(false) or 1(true) <code>(default=1)</code></li>
<li>If set to <code>1</code>, during training MXNet executes the computation graph as several subgraphs in bulk mode.</li>
</ul></li>
<li>MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN
<ul>
<li>Values: Int <code>(default=15)</code></li>
<li>The maximum number of nodes in the subgraph executed in bulk during training (not inference). Setting this to a larger number may reduce the degree of parallelism for multi-GPU training.</li>
</ul></li>
<li>MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_FWD
<ul>
<li>Values: Int <code>(default=&lt;value of MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN&gt;)</code></li>
<li>The maximum number of nodes in the subgraph executed in bulk during training (not inference) in the forward pass.</li>
</ul></li>
<li>MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN_BWD
<ul>
<li>Values: Int <code>(default=&lt;value of MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN&gt;)</code></li>
<li>The maximum number of nodes in the subgraph executed in bulk during training (not inference) in the backward pass.</li>
</ul></li>
<li>MXNET_ENABLE_CUDA_GRAPHS
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If set to <code>1</code>, MXNet will utilize CUDA graphs when executing models on the GPU when possible.</li>
<li>For CUDA graphs execution, one needs to use either symbolic model or Gluon model hybridized with options <code>static_alloc</code> and <code>static_shape</code> set to True.</li>
</ul></li>
<li>MXNET_CUDA_GRAPHS_VERBOSE
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If set to <code>1</code>, CUDA graphs executor will provide information about the graph being captured and executed.</li>
</ul></li>
<li>MXNET_CUDA_GRAPHS_MAX_LOG_ENTRIES
<ul>
<li>Values: Int <code>(default=0)</code></li>
<li>The maximum number of log messages generated by CUDA graphs executor.</li>
</ul></li>
</ul>
<h2 id="control-the-data-communication">Control the Data Communication</h2>
<ul>
<li><p>MXNET_KVSTORE_REDUCTION_NTHREADS</p>
<ul>
<li>Values: Int <code>(default=4)</code></li>
<li>The number of CPU threads used for summing up big arrays on a single machine</li>
<li>This will also be used for <code>dist_sync</code> kvstore to sum up arrays from different contexts on a single machine.</li>
<li>This does not affect summing up of arrays from different machines on servers.</li>
<li>Summing up of arrays for <code>dist_sync_device</code> kvstore is also unaffected as that happens on GPUs.</li>
</ul></li>
<li><p>MXNET_KVSTORE_BIGARRAY_BOUND</p>
<ul>
<li>Values: Int <code>(default=1000000)</code></li>
<li>The minimum size of a &quot;big array&quot;.</li>
<li>When the array size is bigger than this threshold, MXNET_KVSTORE_REDUCTION_NTHREADS threads are used for reduction.</li>
<li>This parameter is also used as a load balancer in kvstore. It controls when to partition a single weight to all the servers. If the size of a single weight is less than MXNET_KVSTORE_BIGARRAY_BOUND then, it is sent to a single randomly picked server otherwise it is partitioned to all the servers.</li>
</ul></li>
<li><p>MXNET_KVSTORE_USETREE</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If true, MXNet tries to use tree reduction for Push and Pull communication.</li>
<li>Otherwise, MXNet uses the default Push and Pull implementation.</li>
<li>Tree reduction technology has been shown to be faster than the standard <code>--kv-store device</code> Push/Pull and <code>--kv-store nccl</code> Push/Pull for small batch sizes.</li>
</ul></li>
<li><p>MXNET_KVSTORE_LOGTREE</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If true and MXNET_KVSTORE_USETREE is set to 1, MXNet will log the reduction trees that have been generated.</li>
</ul></li>
<li><p>MXNET_KVSTORE_TREE_ARRAY_BOUND</p>
<ul>
<li>Values: Int <code>(default=10000000)</code></li>
<li>The minimum size of a &quot;big array&quot;.</li>
<li>When the array size is bigger than this threshold and MXNET_KVSTORE_USETREE is set to 1, multiple trees are used to load balance the big gradient being communicated in order to better saturate link bandwidth.</li>
<li>Note: This environmental variable only takes effect if Tree KVStore is being used (MXNET_KVSTORE_USETREE=1).</li>
</ul></li>
<li><p>MXNET_KVSTORE_TREE_BACKTRACK</p>
<ul>
<li>Values: 0(false) or 1(true) ```(default=0)</li>
<li>If true and MXNET_KVSTORE_USETREE is set to 1, MXNet tries to use backtracking to generate the trees required for tree reduction.</li>
<li>If false and MXNET_KVSTORE_USETREE is set to 1, MXNet tries to use Kernighan-Lin heuristic to generate the trees required for tree reduction.</li>
</ul></li>
<li><p>MXNET_KVSTORE_TREE_LINK_USAGE_PENALTY</p>
<ul>
<li>Values: Float <code>(default=0.7)</code></li>
<li>The multiplicative penalty term to a link being used once.</li>
</ul></li>
<li><p>MXNET_ENABLE_GPU_P2P</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=1)</code></li>
<li>If true, MXNet tries to use GPU peer-to-peer communication, if available on your device,
when kvstore&#39;s type is <code>device</code>.</li>
</ul></li>
<li><p>MXNET_UPDATE_ON_KVSTORE</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=1)</code></li>
<li>If true, weight updates are performed during the communication step, if possible.</li>
</ul></li>
<li><p>MXNET_KVSTORE_SLICE_THRESHOLD</p>
<ul>
<li>Values: Int <code>(default=40000)</code></li>
<li>The maximum size of an NDArray slice in terms of number of parameters.</li>
<li>This parameter is used to slice an NDArray before synchronizing through P3Store (dist_p3).</li>
</ul></li>
</ul>
<h2 id="memonger">Memonger</h2>
<ul>
<li>MXNET_BACKWARD_DO_MIRROR
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>MXNet uses mirroring concept to save memory. Normally backward pass needs some forward input and it is stored in memory but you can choose to release this saved input and recalculate it in backward pass when needed. This basically trades off the computation for memory consumption.</li>
<li>This parameter decides whether to do <code>mirror</code> during training for saving device memory.</li>
<li>When set to <code>1</code>, during forward propagation, graph executor will <code>mirror</code> some layer&#39;s feature map and drop others, but it will re-compute this dropped feature maps when needed.</li>
<li><code>MXNET_BACKWARD_DO_MIRROR=1</code> will save 30%~50% of device memory, but retains about 95% of running speed.</li>
<li>One extension of <code>mirror</code> in MXNet is called <a href="https://arxiv.org/abs/1604.06174">memonger technology</a>, it will only use O(sqrt(N)) memory at 75% running speed. Checkout the code <a href="https://github.com/dmlc/mxnet-memonger">here</a>.</li>
</ul></li>
</ul>
<h2 id="control-the-profiler">Control the profiler</h2>
<p>The following environments can be used to profile the application without changing code. Execution options may affect the granularity of profiling result. If you need profiling result of every operator, please set <code>MXNET_EXEC_BULK_EXEC_INFERENCE</code>, <code>MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN</code> and <code>MXNET_EXEC_BULK_EXEC_TRAIN</code> to 0.</p>
<ul>
<li><p>MXNET_PROFILER_AUTOSTART</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>Set to 1, MXNet starts the profiler automatically. The profiling result is stored into profile.json in the working directory.</li>
</ul></li>
<li><p>MXNET_PROFILER_MODE</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If set to &#39;0&#39;, profiler records the events of the symbolic operators.</li>
<li>If set to &#39;1&#39;, profiler records the events of all operators.</li>
</ul></li>
</ul>
<h2 id="interface-between-python-and-the-c-api">Interface between Python and the C API</h2>
<ul>
<li><p>MXNET_ENABLE_CYTHON</p>
<ul>
<li>Values: 0(false), 1(true) <code>(default=1)</code></li>
<li>If set to 0, MXNet uses the ctypes to interface with the C API.</li>
<li>If set to 1, MXNet tries to use the cython modules for the ndarray and symbol. If it fails, the ctypes is used or an error occurs depending on MXNET_ENFORCE_CYTHON.</li>
</ul></li>
<li><p>MXNET_ENFORCE_CYTHON</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>This has an effect only if MXNET_ENABLE_CYTHON is 1.</li>
<li>If set to 0, MXNet fallbacks to the ctypes if importing the cython modules fails.</li>
<li>If set to 1, MXNet raises an error if importing the cython modules fails.</li>
</ul></li>
</ul>
<p>If cython modules are used, <code>mx.nd._internal.NDArrayBase</code> must be <code>mxnet._cy3.ndarray.NDArrayBase</code> for python 3 or <code>mxnet._cy2.ndarray.NDArrayBase</code> for python 2.
If ctypes is used, it must be <code>mxnet._ctypes.ndarray.NDArrayBase</code>.</p>
<h2 id="logging">Logging</h2>
<ul>
<li>DMLC_LOG_STACK_TRACE_DEPTH
<ul>
<li>Values: Int <code>(default=0)</code></li>
<li>The depth of stack trace information to log when exception happens.</li>
</ul></li>
</ul>
<h2 id="other-environment-variables">Other Environment Variables</h2>
<ul>
<li><p>MXNET_GPU_WORKER_NSTREAMS</p>
<ul>
<li>Values: 1, or 2 <code>(default=1)</code></li>
<li>Determines the number of GPU streams available to operators for their functions.</li>
<li>Setting this to 2 may yield a modest performance increase, since ops like the cuDNN convolution op can then calculate their data- and weight-gradients in parallel.</li>
<li>Setting this to 2 may also increase a model&#39;s demand for GPU global memory.</li>
</ul></li>
<li><p>MXNET_CUDNN_AUTOTUNE_DEFAULT</p>
<ul>
<li>Values: 0, 1, or 2 <code>(default=1)</code></li>
<li>The default value of cudnn auto tuning for convolution layers.</li>
<li>Value of 0 means there is no auto tuning to pick the convolution algo</li>
<li>Performance tests are run to pick the convolution algo when value is 1 or 2</li>
<li>Value of 1 chooses the best algo in a limited workspace</li>
<li>Value of 2 chooses the fastest algo whose memory requirements may be larger than the default workspace threshold</li>
</ul></li>
<li><p>MXNET_CUDA_ALLOW_TENSOR_CORE</p>
<ul>
<li>0(false) or 1(true) <code>(default=1)</code></li>
<li>If set to &#39;0&#39;, disallows Tensor Core use in CUDA ops.</li>
<li>If set to &#39;1&#39;, allows Tensor Core use in CUDA ops.</li>
<li>This variable can only be set once in a session.</li>
</ul></li>
<li><p>MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION</p>
<ul>
<li>0(false) or 1(true) <code>(default=0)</code></li>
<li>If set to &#39;0&#39;, disallows implicit type conversions to Float16 to use Tensor Cores</li>
<li>If set to &#39;1&#39;, allows CUDA ops like RNN and Convolution to use TensorCores even with Float32 input data by using implicit type casting to Float16. Only has an effect if <code>MXNET_CUDA_ALLOW_TENSOR_CORE</code> is <code>1</code>.</li>
</ul></li>
<li><p>MXNET_CUDA_LIB_CHECKING</p>
<ul>
<li>0(false) or 1(true) <code>(default=1)</code></li>
<li>If set to &#39;0&#39;, disallows various runtime checks of the cuda library version and associated warning messages.</li>
<li>If set to &#39;1&#39;, permits these checks (e.g. compile vs. link mismatch, old version no longer CI-tested)</li>
</ul></li>
<li><p>MXNET_CUDNN_LIB_CHECKING</p>
<ul>
<li>0(false) or 1(true) <code>(default=1)</code></li>
<li>If set to &#39;0&#39;, disallows various runtime checks of the cuDNN library version and associated warning messages.</li>
<li>If set to &#39;1&#39;, permits these checks (e.g. compile vs. link mismatch, old version no longer CI-tested)</li>
</ul></li>
<li><p>MXNET_GLUON_REPO</p>
<ul>
<li>Values: String <code>(default=&#39;https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/&#39;</code></li>
<li>The repository url to be used for Gluon datasets and pre-trained models.</li>
</ul></li>
<li><p>MXNET_HOME</p>
<ul>
<li>Data directory in the filesystem for storage, for example when downloading gluon models.</li>
<li>Default in *nix is .mxnet APPDATA/mxnet in windows.</li>
</ul></li>
<li><p>MXNET_MKLDNN_ENABLED</p>
<ul>
<li>Values: 0, 1 <code>(default=1)</code></li>
<li>Flag to enable or disable MKLDNN accelerator. On by default.</li>
<li>Only applies to mxnet that has been compiled with MKLDNN (<code>pip install mxnet-mkl</code> or built from source with <code>USE_MKLDNN=1</code>)</li>
</ul></li>
<li><p>MXNET_MKLDNN_CACHE_NUM</p>
<ul>
<li>Values: Int <code>(default=-1)</code></li>
<li>Flag to set num of elements that MKLDNN cache can hold. Default is -1 which means cache size is unbounded. Should only be set if your model has variable input shapes, as cache size may grow unbounded. The number represents the number of items in the cache and is proportional to the number of layers that use MKLDNN and different input shape.</li>
</ul></li>
<li><p>MXNET_ENFORCE_DETERMINISM</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If set to true, MXNet will only use deterministic algorithms in forward and backward computation.
If no such algorithm exists given other constraints, MXNet will error out. This variable affects the choice
of CUDNN convolution algorithms. Please see <a href="https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html">CUDNN developer guide</a> for more details.</li>
</ul></li>
<li><p>MXNET_MKLDNN_FORCE_FC_AB_FORMAT</p>
<ul>
<li>Values: 0, 1 <code>(default=0)</code></li>
<li>If set to true, FullyConnected will use only AB format for weights, thus MXNet won&#39;t use BRGEMM implementation of FC on machines with AVX512-VNNI support which requires special weights format.</li>
</ul></li>
<li><p>MXNET_CPU_PARALLEL_SIZE</p>
<ul>
<li>Values: Int <code>(default=200000)</code></li>
<li>The minimum size to call parallel operations by OpenMP for CPU context.</li>
<li>When the array size is bigger than or equal to this threshold, the operation implemented by OpenMP is executed with the Recommended OMP Thread Count.</li>
<li>When the array size is less than this threshold, the operation is implemented naively in single thread.</li>
</ul></li>
<li><p>MXNET_OPTIMIZER_AGGREGATION_SIZE</p>
<ul>
<li>Values: Int <code>(default=4)</code></li>
<li>Maximum value is 60.</li>
<li>This variable controls how many weights will be updated in a single call to optimizer (for optimizers that support aggregation, currently limited to SGD).</li>
</ul></li>
<li><p>MXNET_CPU_TEMP_COPY</p>
<ul>
<li>Values: Int <code>(default=4)</code></li>
<li>This variable controls how many temporary memory resources to create for all CPU context for use in operator.</li>
</ul></li>
<li><p>MXNET_GPU_TEMP_COPY</p>
<ul>
<li>Values: Int <code>(default=1)</code></li>
<li>This variable controls how many temporary memory resources to create for each GPU context for use in operator.</li>
</ul></li>
<li><p>MXNET_CPU_PARALLEL_RAND_COPY</p>
<ul>
<li>Values: Int <code>(default=1)</code></li>
<li>This variable controls how many parallel random number generator resources to create for all CPU context for use in operator.</li>
</ul></li>
<li><p>MXNET_GPU_PARALLEL_RAND_COPY</p>
<ul>
<li>Values: Int <code>(default=4)</code></li>
<li>This variable controls how many parallel random number generator resources to create for each GPU context for use in operator.</li>
</ul></li>
<li><p>MXNET_GPU_CUDNN_DROPOUT_STATE_COPY</p>
<ul>
<li>Values: Int <code>(default=4)</code></li>
<li>This variable controls how many CuDNN dropout state resources to create for each GPU context for use in operator.</li>
</ul></li>
<li><p>MXNET_SUBGRAPH_BACKEND</p>
<ul>
<li>Values: String <code>(default=&quot;MKLDNN&quot;)</code> if MKLDNN is avaliable, otherwise <code>(default=&quot;&quot;)</code></li>
<li>This variable controls the subgraph partitioning in MXNet.</li>
<li>This variable is used to perform MKL-DNN FP32 operator fusion and quantization. Please refer to the <a href="https://github.com/apache/mxnet/blob/v1.5.x/docs/tutorials/mkldnn/operator_list.md">MKL-DNN operator list</a> for how this variable is used and the list of fusion passes.</li>
<li>Set <code>MXNET_SUBGRAPH_BACKEND=NONE</code> to disable subgraph backend.</li>
</ul></li>
<li><p>MXNET_SAFE_ACCUMULATION</p>
<ul>
<li>Values: Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If this variable is set, the accumulation will enter the safe mode, meaning accumulation is done in a data type of higher precision than
the input data type, leading to more accurate accumulation results with a possible performance loss and backward compatibility loss.
For example, when the variable is set to 1(true), if the input data type is float16, then the accumulation will be done
with float32.</li>
<li>Model accuracies do not necessarily improve with this environment variable turned on.</li>
</ul></li>
<li><p>MXNET_USE_FUSION</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=1)</code></li>
<li>If this variable is set, MXNet will try fusing some of the operations (pointwise operations only for now).</li>
<li>It works in Symbolic execution as well as in Gluon models hybridized with <code>static_alloc=True</code> option.</li>
<li>Only applies to MXNet that has been compiled with CUDA (<code>pip install mxnet-cuXX</code> or built from source with <code>USE_CUDA=1</code>) and running on GPU.</li>
</ul></li>
<li><p>MXNET_FUSION_VERBOSE</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>Only applies to MXNet that has been compiled with CUDA and when <code>MXNET_USE_FUSION</code> option is enabled.</li>
<li>If this variable is set, MXNet will print the code for fused operators that it generated.</li>
</ul></li>
<li><p>MXNET_ELIMINATE_COMMON_EXPR</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=1)</code></li>
<li>If this variable is set, MXNet will simplify the computation graph, eliminating duplicated operations on the same inputs.</li>
</ul></li>
<li><p>MXNET_USE_MKLDNN_RNN</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=1)</code></li>
<li>This variable controls whether to use the MKL-DNN backend in fused RNN operator for CPU context. There are two fusion implementations of RNN operator in MXNet. The MKL-DNN implementation has a better performance than the naive one, but the latter is more stable in the backward operation currently.</li>
</ul></li>
<li><p>MXNET_FC_TRUE_FP16</p>
<ul>
<li>Values: 0(false) or 1(true) <code>(default=0)</code></li>
<li>If this variable is set to true, MXNet will perform fp16 accumulation when using cuBLAS and input datatype is set to float16. This could increase the speed of the computation, but might result in loss of accuracy. This makes this setting useful mainly for inference usecases.</li>
</ul></li>
</ul>
<h2 id="settings-for-minimum-memory-usage">Settings for Minimum Memory Usage</h2>
<ul>
<li>Make sure <code>min(MXNET_EXEC_NUM_TEMP, MXNET_GPU_WORKER_NTHREADS) = 1</code>
<ul>
<li>The default setting satisfies this.</li>
</ul></li>
</ul>
<h2 id="settings-for-more-gpu-parallelism">Settings for More GPU Parallelism</h2>
<ul>
<li>Set <code>MXNET_GPU_WORKER_NTHREADS</code> to a larger number (e.g., 2)
<ul>
<li>To reduce memory usage, consider setting <code>MXNET_EXEC_NUM_TEMP</code>.</li>
<li>This might not speed things up, especially for image applications, because GPU is usually fully utilized even with serialized jobs.</li>
</ul></li>
</ul>
<h2 id="settings-for-controlling-omp-tuning">Settings for controlling OMP tuning</h2>
<ul>
<li><p>Set <code>MXNET_USE_OPERATOR_TUNING=0</code> to disable Operator tuning code which decides whether to use OMP or not for operator</p>
<ul>
<li>Values: String representation of MXNET_ENABLE_OPERATOR_TUNING environment variable</li>
<li> 0=disable all</li>
<li> 1=enable all</li>
<li> float32, float16, float32=list of types to enable, and disable those not listed</li>
<li>refer : <a href="https://github.com/apache/mxnet/blob/master/src/operator/operator_tune-inl.h#L444">https://github.com/apache/mxnet/blob/master/src/operator/operator_tune-inl.h#L444</a></li>
</ul></li>
<li><p>Set <code>MXNET_USE_NUM_CORES_OPERATOR_TUNING</code> to define num_cores to be used by operator tuning code.</p>
<ul>
<li>This reduces operator tuning overhead when there are multiple instances of mxnet running in the system and we know that
each mxnet will take only partial num_cores available with system.</li>
<li>refer: <a href="https://github.com/apache/mxnet/pull/13602">https://github.com/apache/mxnet/pull/13602</a></li>
</ul></li>
</ul>
</div>
</div>
</div>
</div>
</article>
</main><footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a href="/versions/1.9.1/community/contribute#mxnet-dev-communications">Mailing lists</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
<li><a href="https://issues.apache.org/jira/projects/MXNET/issues">Jira Tracker</a></li>
<li><a href="https://github.com/apache/mxnet/labels/Roadmap">Github Roadmap</a></li>
<li><a href="https://medium.com/apache-mxnet">Blog</a></li>
<li><a href="https://discuss.mxnet.io">Forum</a></li>
<li><a href="/versions/1.9.1/community/contribute">Contribute</a></li>
</ul>
</div>
<div class="col-4"><ul class="social-media-list"><li><a href="https://github.com/apache/mxnet"><svg class="svg-icon"><use xlink:href="/versions/1.9.1/assets/minima-social-icons.svg#github"></use></svg> <span class="username">apache/mxnet</span></a></li><li><a href="https://www.twitter.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/1.9.1/assets/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href="https://youtube.com/apachemxnet"><svg class="svg-icon"><use xlink:href="/versions/1.9.1/assets/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
</div>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
</div>
</div>
</div>
</footer>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="/versions/1.9.1/assets/img/asf_logo.svg" class="footer-logo col-2">
</div>
<div class="footer-bottom-warning col-9">
</p><p>"Copyright © 2017-2022, The Apache Software Foundation. Licensed under the Apache License, Version 2.0. Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>
</div>
</div>
</div>
</footer>
</body>
</html>