blob: bd1d0bf2c32bc2e01957c4a5bcabf565006c1310 [file] [log] [blame]
<!DOCTYPE html>
<html lang=" en"><head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="" rel="icon" type="image/png"><!-- Begin Jekyll SEO tag v2.6.1 -->
<title>Convert from Caffe to MXNet | Apache MXNet</title>
<meta name="generator" content="Jekyll v3.8.6" />
<meta property="og:title" content="Convert from Caffe to MXNet" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="A flexible and efficient library for deep learning." />
<meta property="og:description" content="A flexible and efficient library for deep learning." />
<link rel="canonical" href="" />
<meta property="og:url" content="" />
<meta property="og:site_name" content="Apache MXNet" />
<script type="application/ld+json">
{"url":"","@type":"WebPage","description":"A flexible and efficient library for deep learning.","headline":"Convert from Caffe to MXNet","@context":""}</script>
<!-- End Jekyll SEO tag -->
<script src=""></script>
<link rel="stylesheet" href="" />
<link rel="stylesheet" href="/versions/1.8.0/assets/main.css"><link type="application/atom+xml" rel="alternate" href="" title="Apache MXNet" /><script>
if(!(window.doNotTrack === "1" || navigator.doNotTrack === "1" || navigator.doNotTrack === "yes" || navigator.msDoNotTrack === "1")) {
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
ga('create', 'UA-96378503-1', 'auto');
ga('send', 'pageview');
<script src="/versions/1.8.0/assets/js/jquery-3.3.1.min.js"></script><script src="" defer></script>
<script src="/versions/1.8.0/assets/js/globalSearch.js" defer></script>
<script src="/versions/1.8.0/assets/js/clipboard.js" defer></script>
<script src="/versions/1.8.0/assets/js/copycode.js" defer></script></head>
<body><header class="site-header" role="banner">
$(document).ready(function () {
function opacity_header() {
var value = "rgba(4,140,204," + ($(window).scrollTop() / 300 + 0.4) + ")"
$('.site-header').css("background-color", value)
$(window).scroll(function () {
$('.page-link').each( function () {
if (window.location.href.includes(this.href)) {
<div class="wrapper">
<a class="site-title" rel="author" href="/versions/1.8.0/"><img
src="/versions/1.8.0/assets/img/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
<div class="gs-search-border">
<div id="gs-search-icon"></div>
<form id="global-search-form">
<input id="global-search" type="text" title="Search" placeholder="Search" />
<div id="global-search-dropdown-container">
<button class="gs-current-version btn" type="button" data-toggle="dropdown">
<span id="gs-current-version-label">1.8.0</span>
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
<ul class="gs-opt-group gs-version-dropdown">
<li class="gs-opt gs-versions">master</li>
<li class="gs-opt gs-versions active">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
<span id="global-search-close">x</span>
<div class="trigger">
<div id="global-search-mobile-border">
<div id="gs-search-icon-mobile"></div>
<input id="global-search-mobile" placeholder="Search..." type="text"/>
<div id="global-search-dropdown-container-mobile">
<button class="gs-current-version-mobile btn" type="button" data-toggle="dropdown">
<svg class="gs-dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true">
<path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path>
<ul class="gs-opt-group gs-version-dropdown-mobile">
<li class="gs-opt gs-versions">master</li>
<li class="gs-opt gs-versions active">1.8.0</li>
<li class="gs-opt gs-versions">1.7.0</li>
<li class="gs-opt gs-versions">1.6.0</li>
<li class="gs-opt gs-versions">1.5.0</li>
<li class="gs-opt gs-versions">1.4.1</li>
<li class="gs-opt gs-versions">1.3.1</li>
<li class="gs-opt gs-versions">1.2.1</li>
<li class="gs-opt gs-versions">1.1.0</li>
<li class="gs-opt gs-versions">1.0.0</li>
<li class="gs-opt gs-versions">0.12.1</li>
<li class="gs-opt gs-versions">0.11.0</li>
<a class="page-link" href="/versions/1.8.0/get_started">Get Started</a>
<a class="page-link" href="/versions/1.8.0/blog">Blog</a>
<a class="page-link" href="/versions/1.8.0/features">Features</a>
<a class="page-link" href="/versions/1.8.0/ecosystem">Ecosystem</a>
<a class="page-link" href="/versions/1.8.0/api">Docs & Tutorials</a>
<a class="page-link" href="">GitHub</a>
<div class="dropdown">
<span class="dropdown-header">1.8.0
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
<div class="dropdown-content">
<a href="/">master</a>
<a class="dropdown-option-active" href="/versions/1.8.0/">1.8.0</a>
<a href="/versions/1.7.0/">1.7.0</a>
<a href="/versions/1.6.0/">1.6.0</a>
<a href="/versions/1.5.0/">1.5.0</a>
<a href="/versions/1.4.1/">1.4.1</a>
<a href="/versions/1.3.1/">1.3.1</a>
<a href="/versions/1.2.1/">1.2.1</a>
<a href="/versions/1.1.0/">1.1.0</a>
<a href="/versions/1.0.0/">1.0.0</a>
<a href="/versions/0.12.1/">0.12.1</a>
<a href="/versions/0.11.0/">0.11.0</a>
<main class="page-content" aria-label="Content">
<article class="post">
<header class="post-header wrapper">
<h1 class="post-title">Convert from Caffe to MXNet</h1>
<div class="post-content">
<div class="wrapper">
<div class="row">
<div class="col-3 docs-side-bar">
<h3 style="text-transform: capitalize; padding-left:10px">faq</h3>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/add_op_in_backend">A Beginner's Guide to Implementing Operators in MXNet Backend</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/caffe">Convert from Caffe to MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/cloud">MXNet on the Cloud</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/distributed_training">Distributed Training in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/env_var">Environment Variables</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/float16">Float16</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/large_tensor_support">Using MXNet with Large Tensor Support</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/model_parallel_lstm">Model Parallel</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/multi_device">Data Parallelism with Multiple CPU/GPUs on MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/new_op">Create New Operators</a></li>
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/nnpack">NNPACK for Multi-Core CPU Support in MXNet</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/perf">Some Tips for Improving MXNet Performance</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/recordio">Create a Dataset Using RecordIO</a></li>
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/s3_integration">Use data from S3 for training</a></li>
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/security">MXNet Security Best Practices</a></li>
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/smart_device">Deep Learning at the Edge</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/visualize_graph">Visualize Neural Networks</a></li>
<!-- page-category -->
<li><a href="/versions/1.8.0/api/faq/why_mxnet">Why MXNet came to be?</a></li>
<!-- page-category -->
<!-- page-category -->
<!-- page-category -->
<!-- resource-p -->
<div class="col-9">
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->
<!--- -->
<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->
<h1 id="how-to-convert-from-caffe-to-mxnet">How to | Convert from Caffe to MXNet</h1>
<p>Key topics covered include the following:</p>
<li><a href="#converting-caffe-trained-models-to-mxnet">Converting Caffe trained models to MXNet</a></li>
<li><a href="#calling-caffe-operators-in-mxnet">Calling Caffe operators in MXNet</a></li>
<h2 id="converting-caffe-trained-models-to-mxnet">Converting Caffe trained models to MXNet</h2>
<p>The converting tool is available at
<a href="">tools/caffe_converter</a>. On
the remaining of this section, we assume we are on the <code>tools/caffe_converter</code>
<h3 id="how-to-build">How to build</h3>
<p>If Caffe&#39;s python package is installed, namely we can run <code>import caffe</code> in
python, then we are ready to go.</p>
<p>For example, we can used
<a href="">AWS Deep Learning AMI</a> with
both Caffe and MXNet installed.</p>
<p>Otherwise we can install the
<a href="">Google protobuf</a>
compiler and its python binding. It is easier to install, but may be slower
during running.</p>
<li><p>Install the compiler:</p>
<li>Linux: install <code>protobuf-compiler</code> e.g. <code>sudo apt-get install
protobuf-compiler</code> for Ubuntu and <code>sudo yum install protobuf-compiler</code> for
<li>Windows: Download the win32 build of
<a href="">protobuf</a>. Make sure to
download the version that corresponds to the version of the python binding
on the next step. Extract to any location then add that location to your
<li>Mac OS X: <code>brew install protobuf</code></li>
<li><p>Install the python binding by either <code>conda install -c conda-forge protobuf</code>
or <code>pip install protobuf</code>.</p></li>
<li><p>Compile Caffe proto definition. Run <code>make</code> in Linux or Mac OS X, or
<code>make_win32.bat</code> in Windows</p></li>
<h3 id="how-to-use">How to use</h3>
<p>There are three tools:</p>
<li><code></code> : convert Caffe model definition in protobuf into MXNet&#39;s
Symbol in JSON format.</li>
<li><code></code> : convert Caffe model parameters into MXNet&#39;s NDArray format</li>
<li><code></code> : convert Caffe input mean file into MXNet&#39;s NDArray format</li>
<p>In addition, there are two tools:
- <code></code> : download and convert models from Caffe model
- <code></code> : test the converted models by checking the prediction
<h2 id="calling-caffe-operators-in-mxnet">Calling Caffe operators in MXNet</h2>
<p>Besides converting Caffe models, MXNet supports calling most Caffe operators,
including network layer, data layer, and loss function, directly. It is
particularly useful if there are customized operators implemented in Caffe, then
we do not need to re-implement them in MXNet.</p>
<h3 id="how-to-install">How to install</h3>
<p>This feature requires Caffe. In particular, we need to re-compile Caffe before
<a href="">PR #4527</a> is merged into Caffe. There
are the steps of how to rebuild Caffe:</p>
<li>Download <a href="">Caffe</a>. E.g. <code>git clone</code></li>
<li>Download the
<a href="">patch for the MXNet interface</a>
and apply to Caffe. E.g.
cd caffe &amp;&amp; wget &amp;&amp; git apply 4527.patch
<li>Build and install Caffe by following the
<a href="">official guide</a>.</li>
<p>Next we need to compile MXNet with Caffe supports</p>
<li>Copy <code>make/</code> (for Linux) or <code>make/</code>
(for Mac) into the MXNet root folder as <code></code> if you have not done it yet</li>
<li>Open the copied <code></code> and uncomment these two lines
CAFFE_PATH = $(HOME)/caffe
MXNET_PLUGINS += plugin/caffe/
Modify <code>CAFFE_PATH</code> to your Caffe installation, if necessary.</li>
<li>Then build with 8 threads <code>make clean &amp;&amp; make -j8</code>.</li>
<h3 id="how-to-use">How to use</h3>
<p>This Caffe plugin adds three components into MXNet:</p>
<li><code>sym.CaffeOp</code> : Caffe neural network layer</li>
<li><code>sym.CaffeLoss</code> : Caffe loss functions</li>
<li><code>io.CaffeDataIter</code> : Caffe data layer</li>
<h4 id="use-sym-caffeop">Use <code>sym.CaffeOp</code></h4>
<p>The following example shows the definition of a 10 classes multi-layer perceptron:</p>
<div class="highlight"><pre><code class="language-Python" data-lang="Python">data = mx.sym.Variable('data')
fc1 = mx.sym.CaffeOp(data_0=data, num_weight=2, name='fc1', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")
act1 = mx.sym.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
fc2 = mx.sym.CaffeOp(data_0=act1, num_weight=2, name='fc2', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 64} }")
act2 = mx.sym.CaffeOp(data_0=fc2, prototxt="layer{type:\"TanH\"}")
fc3 = mx.sym.CaffeOp(data_0=act2, num_weight=2, name='fc3', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 10}}")
mlp = mx.sym.SoftmaxOutput(data=fc3, name='softmax')
<p>Let&#39;s break it down. First, <code>data = mx.sym.Variable(&#39;data&#39;)</code> defines a variable
as a placeholder for input. Then, it&#39;s fed through Caffe operators with <code>fc1 =
mx.sym.CaffeOp(...)</code>. <code>CaffeOp</code> accepts several arguments:</p>
<li>The inputs to Caffe operators are named as <code>data_i</code> for <em>i=0, ..., num_data-1</em></li>
<li><code>num_data</code> is the number of inputs. In default it is 1, and therefore
skipped in the above example.</li>
<li><code>num_out</code> is the number of outputs. In default it is 1 and also skipped.</li>
<li><code>num_weight</code> is the number of weights (<code>blobs_</code>). Its default value is 0. We
need to explicitly specify it for a non-zero value.</li>
<li><code>prototxt</code> is the protobuf configuration string.</li>
<h4 id="use-sym-caffeloss">Use <code>sym.CaffeLoss</code></h4>
<p>Using Caffe loss is similar.
We can replace the MXNet loss with Caffe loss.
We can replace</p>
<p>Replacing the last line of the above example with the following two lines we can
call Caffe loss instead of MXNet loss.</p>
<div class="highlight"><pre><code class="language-Python" data-lang="Python">label = mx.sym.Variable('softmax_label')
mlp = mx.sym.CaffeLoss(data=fc3, label=label, grad_scale=1, name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
<p>Similar to <code>CaffeOp</code>, <code>CaffeLoss</code> has arguments <code>num_data</code> (2 in default) and
<code>num_out</code> (1 in default). But there are two differences</p>
<li>Inputs are <code>data</code> and <code>label</code>. And we need to explicitly create a variable
placeholder for label, which is implicitly done in MXNet loss.</li>
<li><code>grad_scale</code> is the weight of this loss.</li>
<h4 id="use-io-caffedataiter">Use <code>io.CaffeDataIter</code></h4>
<p>We can also wrap a Caffe data layer into MXNet&#39;s data iterator. Below is an
example for creating a data iterator for MNIST</p>
<div class="highlight"><pre><code class="language-python" data-lang="python"><span class="n">train</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">CaffeDataIter</span><span class="p">(</span>
<span class="n">prototxt</span> <span class="o">=</span>
<span class="s">'layer { </span><span class="se">\
</span><span class="s"> name: "mnist" </span><span class="se">\
</span><span class="s"> type: "Data" </span><span class="se">\
</span><span class="s"> top: "data" </span><span class="se">\
</span><span class="s"> top: "label" </span><span class="se">\
</span><span class="s"> include { </span><span class="se">\
</span><span class="s"> phase: TEST </span><span class="se">\
</span><span class="s"> } </span><span class="se">\
</span><span class="s"> transform_param { </span><span class="se">\
</span><span class="s"> scale: 0.00390625 </span><span class="se">\
</span><span class="s"> } </span><span class="se">\
</span><span class="s"> data_param { </span><span class="se">\
</span><span class="s"> source: "caffe/examples/mnist/mnist_test_lmdb" </span><span class="se">\
</span><span class="s"> batch_size: 100 </span><span class="se">\
</span><span class="s"> backend: LMDB </span><span class="se">\
</span><span class="s"> } </span><span class="se">\
</span><span class="s"> }'</span><span class="p">,</span>
<span class="n">flat</span> <span class="o">=</span> <span class="n">flat</span><span class="p">,</span>
<span class="n">num_examples</span> <span class="o">=</span> <span class="mi">60000</span><span class="p">,</span>
<span class="p">)</span>
<h3 id="put-it-all-together">Put it all together</h3>
<p>The complete example is available at
<a href="">example/caffe</a></p>
</main><footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a href="/versions/1.8.0/community/contribute#mxnet-dev-communications">Mailing lists</a></li>
<li><a href="">Developer Wiki</a></li>
<li><a href="">Jira Tracker</a></li>
<li><a href="">Github Roadmap</a></li>
<li><a href="">MXNet Discuss forum</a></li>
<li><a href="/versions/1.8.0/community/contribute">Contribute To MXNet</a></li>
<div class="col-4"><ul class="social-media-list"><li><a href=""><svg class="svg-icon"><use xlink:href="/versions/1.8.0/assets/minima-social-icons.svg#github"></use></svg> <span class="username">apache/incubator-mxnet</span></a></li><li><a href=""><svg class="svg-icon"><use xlink:href="/versions/1.8.0/assets/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href=""><svg class="svg-icon"><use xlink:href="/versions/1.8.0/assets/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="/versions/1.8.0/assets/img/apache_incubator_logo.png" class="footer-logo col-2">
<div class="footer-bottom-warning col-9">
<p>Apache MXNet is an effort undergoing incubation at The Apache Software Foundation (ASF), <span
style="font-weight:bold">sponsored by the <i>Apache Incubator</i></span>. Incubation is required
of all newly accepted projects until a further review indicates that the infrastructure,
communications, and decision making process have stabilized in a manner consistent with other
successful ASF projects. While incubation status is not necessarily a reflection of the completeness
or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
</p><p>"Copyright © 2017-2018, The Apache Software Foundation Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>