blob: e43312bae1f7e577f7cc7d5cf03744b7616e1a8b [file] [log] [blame]
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8" />
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta http-equiv="x-ua-compatible" content="ie=edge">
<style>
.dropdown {
position: relative;
display: inline-block;
}
.dropdown-content {
display: none;
position: absolute;
background-color: #f9f9f9;
min-width: 160px;
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
padding: 12px 16px;
z-index: 1;
text-align: left;
}
.dropdown:hover .dropdown-content {
display: block;
}
.dropdown-option:hover {
color: #FF4500;
}
.dropdown-option-active {
color: #FF4500;
font-weight: lighter;
}
.dropdown-option {
color: #000000;
font-weight: lighter;
}
.dropdown-header {
color: #FFFFFF;
display: inline-flex;
}
.dropdown-caret {
width: 18px;
}
.dropdown-caret-path {
fill: #FFFFFF;
}
</style>
<title>Custom Loss Blocks &#8212; Apache MXNet documentation</title>
<link rel="stylesheet" href="../../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/mxnet.css" />
<link rel="stylesheet" href="../../../../_static/material-design-lite-1.3.0/material.blue-deep_orange.min.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/sphinx_materialdesign_theme.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/fontawesome/all.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/fonts.css" type="text/css" />
<link rel="stylesheet" href="../../../../_static/feedback.css" type="text/css" />
<script id="documentation_options" data-url_root="../../../../" src="../../../../_static/documentation_options.js"></script>
<script src="../../../../_static/jquery.js"></script>
<script src="../../../../_static/underscore.js"></script>
<script src="../../../../_static/doctools.js"></script>
<script src="../../../../_static/language_data.js"></script>
<script src="../../../../_static/matomo_analytics.js"></script>
<script src="../../../../_static/autodoc.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="shortcut icon" href="../../../../_static/mxnet-icon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<link rel="next" title="Kullback-Leibler (KL) Divergence" href="kl_divergence.html" />
<link rel="prev" title="Losses" href="index.html" />
</head>
<body><header class="site-header" role="banner">
<div class="wrapper">
<a class="site-title" rel="author" href="/versions/1.9.1/"><img
src="../../../../_static/mxnet_logo.png" class="site-header-logo"></a>
<nav class="site-nav">
<input type="checkbox" id="nav-trigger" class="nav-trigger"/>
<label for="nav-trigger">
<span class="menu-icon">
<svg viewBox="0 0 18 15" width="18px" height="15px">
<path d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.032C17.335,0,18,0.665,18,1.484L18,1.484z M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.032C17.335,6.031,18,6.696,18,7.516L18,7.516z M18,13.516C18,14.335,17.335,15,16.516,15H1.484 C0.665,15,0,14.335,0,13.516l0,0c0-0.82,0.665-1.483,1.484-1.483h15.032C17.335,12.031,18,12.695,18,13.516L18,13.516z"/>
</svg>
</span>
</label>
<div class="trigger">
<a class="page-link" href="/versions/1.9.1/get_started">Get Started</a>
<a class="page-link" href="/versions/1.9.1/features">Features</a>
<a class="page-link" href="/versions/1.9.1/ecosystem">Ecosystem</a>
<a class="page-link page-current" href="/versions/1.9.1/api">Docs & Tutorials</a>
<a class="page-link" href="/versions/1.9.1/trusted_by">Trusted By</a>
<a class="page-link" href="https://github.com/apache/mxnet">GitHub</a>
<div class="dropdown" style="min-width:100px">
<span class="dropdown-header">Apache
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content" style="min-width:250px">
<a href="https://www.apache.org/foundation/">Apache Software Foundation</a>
<a href="https://incubator.apache.org/">Apache Incubator</a>
<a href="https://www.apache.org/licenses/">License</a>
<a href="/versions/1.9.1/api/faq/security.html">Security</a>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html">Privacy</a>
<a href="https://www.apache.org/events/current-event">Events</a>
<a href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a>
<a href="https://www.apache.org/foundation/thanks.html">Thanks</a>
</div>
</div>
<div class="dropdown">
<span class="dropdown-header">1.9.1
<svg class="dropdown-caret" viewBox="0 0 32 32" class="icon icon-caret-bottom" aria-hidden="true"><path class="dropdown-caret-path" d="M24 11.305l-7.997 11.39L8 11.305z"></path></svg>
</span>
<div class="dropdown-content">
<a class="dropdown-option" href="/">master</a><br>
<a class="dropdown-option-active" href="/versions/1.9.1/">1.9.1</a><br>
<a class="dropdown-option" href="/versions/1.8.0/">1.8.0</a><br>
<a class="dropdown-option" href="/versions/1.7.0/">1.7.0</a><br>
<a class="dropdown-option" href="/versions/1.6.0/">1.6.0</a><br>
<a class="dropdown-option" href="/versions/1.5.0/">1.5.0</a><br>
<a class="dropdown-option" href="/versions/1.4.1/">1.4.1</a><br>
<a class="dropdown-option" href="/versions/1.3.1/">1.3.1</a><br>
<a class="dropdown-option" href="/versions/1.2.1/">1.2.1</a><br>
<a class="dropdown-option" href="/versions/1.1.0/">1.1.0</a><br>
<a class="dropdown-option" href="/versions/1.0.0/">1.0.0</a><br>
<a class="dropdown-option" href="/versions/0.12.1/">0.12.1</a><br>
<a class="dropdown-option" href="/versions/0.11.0/">0.11.0</a>
</div>
</div>
</div>
</nav>
</div>
</header>
<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header mdl-layout--fixed-drawer"><header class="mdl-layout__header mdl-layout__header--waterfall ">
<div class="mdl-layout__header-row">
<nav class="mdl-navigation breadcrumb">
<a class="mdl-navigation__link" href="../../../index.html">Python Tutorials</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="../../index.html">Packages</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="../index.html">Gluon</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link" href="index.html">Losses</a><i class="material-icons">navigate_next</i>
<a class="mdl-navigation__link is-active">Custom Loss Blocks</a>
</nav>
<div class="mdl-layout-spacer"></div>
<nav class="mdl-navigation">
<form class="form-inline pull-sm-right" action="../../../../search.html" method="get">
<div class="mdl-textfield mdl-js-textfield mdl-textfield--expandable mdl-textfield--floating-label mdl-textfield--align-right">
<label id="quick-search-icon" class="mdl-button mdl-js-button mdl-button--icon" for="waterfall-exp">
<i class="material-icons">search</i>
</label>
<div class="mdl-textfield__expandable-holder">
<input class="mdl-textfield__input" type="text" name="q" id="waterfall-exp" placeholder="Search" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</div>
</div>
<div class="mdl-tooltip" data-mdl-for="quick-search-icon">
Quick search
</div>
</form>
<a id="button-show-source"
class="mdl-button mdl-js-button mdl-button--icon"
href="../../../../_sources/tutorials/packages/gluon/loss/custom-loss.ipynb" rel="nofollow">
<i class="material-icons">code</i>
</a>
<div class="mdl-tooltip" data-mdl-for="button-show-source">
Show Source
</div>
</nav>
</div>
<div class="mdl-layout__header-row header-links">
<div class="mdl-layout-spacer"></div>
<nav class="mdl-navigation">
</nav>
</div>
</header><header class="mdl-layout__drawer">
<div class="globaltoc">
<span class="mdl-layout-title toc">Table Of Contents</span>
<nav class="mdl-navigation">
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="../../../index.html">Python Tutorials</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../../getting-started/index.html">Getting Started</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/crash-course/index.html">Crash Course</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/1-ndarray.html">Manipulate data with <code class="docutils literal notranslate"><span class="pre">ndarray</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/2-nn.html">Create a neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/3-autograd.html">Automatic differentiation with <code class="docutils literal notranslate"><span class="pre">autograd</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/4-train.html">Train the neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/5-predict.html">Predict with a pre-trained model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/6-use_gpus.html">Use GPUs</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/to-mxnet/index.html">Moving to MXNet from Other Frameworks</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/to-mxnet/pytorch.html">PyTorch vs Apache MXNet</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/gluon_from_experiment_to_deployment.html">Gluon: from experiment to deployment</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/logistic_regression_explained.html">Logistic regression explained</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/image/mnist.html">MNIST</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="reference internal" href="../../index.html">Packages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../../autograd/index.html">Automatic Differentiation</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="../index.html">Gluon</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="../blocks/index.html">Blocks</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../blocks/custom-layer.html">Custom Layers</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/custom_layer_beginners.html">Customer Layers (Beginners)</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/hybridize.html">Hybridize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/init.html">Initialization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/naming.html">Parameter and Block Naming</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/nn.html">Layers and Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/parameters.html">Parameter Management</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/save_load_params.html">Saving and Loading Gluon Models</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/activations/activations.html">Activation Blocks</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../data/index.html">Data Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html#Spatial-Augmentation">Spatial Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html#Color-Augmentation">Color Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html#Composed-Augmentations">Composed Augmentations</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html">Gluon <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html#Using-own-data-with-custom-Datasets">Using own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html#Appendix:-Upgrading-from-Module-DataIter-to-Gluon-DataLoader">Appendix: Upgrading from Module <code class="docutils literal notranslate"><span class="pre">DataIter</span></code> to Gluon <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../image/index.html">Image Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../image/image-augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../image/info_gan.html">Image similarity search with InfoGAN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../image/mnist.html">Handwritten Digit Recognition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../image/pretrained_models.html">Using pre-trained models in MXNet</a></li>
</ul>
</li>
<li class="toctree-l4 current"><a class="reference internal" href="index.html">Losses</a><ul class="current">
<li class="toctree-l5 current"><a class="current reference internal" href="#">Custom Loss Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="kl_divergence.html">Kullback-Leibler (KL) Divergence</a></li>
<li class="toctree-l5"><a class="reference internal" href="loss.html">Loss functions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../text/index.html">Text Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../text/gnmt.html">Google Neural Machine Translation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../text/transformer.html">Machine Translation with Transformer</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../training/index.html">Training</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../training/fit_api_tutorial.html">MXNet Gluon Fit API</a></li>
<li class="toctree-l5"><a class="reference internal" href="../training/trainer.html">Trainer</a></li>
<li class="toctree-l5"><a class="reference internal" href="../training/learning_rates/index.html">Learning Rates</a><ul>
<li class="toctree-l6"><a class="reference internal" href="../training/learning_rates/learning_rate_finder.html">Learning Rate Finder</a></li>
<li class="toctree-l6"><a class="reference internal" href="../training/learning_rates/learning_rate_schedules.html">Learning Rate Schedules</a></li>
<li class="toctree-l6"><a class="reference internal" href="../training/learning_rates/learning_rate_schedules_advanced.html">Advanced Learning Rate Schedules</a></li>
</ul>
</li>
<li class="toctree-l5"><a class="reference internal" href="../training/normalization/index.html">Normalization Blocks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../kvstore/index.html">KVStore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../kvstore/kvstore.html">Distributed Key-Value Store</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../ndarray/index.html">NDArray</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/01-ndarray-intro.html">An Intro: Manipulate Data the MXNet Way with NDArray</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/02-ndarray-operations.html">NDArray Operations</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/03-ndarray-contexts.html">NDArray Contexts</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/gotchas_numpy_in_mxnet.html">Gotchas using NumPy in Apache MXNet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/sparse/index.html">Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/csr.html">CSRNDArray - NDArray in Compressed Sparse Row Storage Format</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/row_sparse.html">RowSparseNDArray - NDArray for Sparse Gradient Updates</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/train.html">Train a Linear Regression Model with Sparse Symbols</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/train_gluon.html">Sparse NDArrays with Gluon</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../onnx/index.html">ONNX</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../onnx/fine_tuning_gluon.html">Fine-tuning an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../onnx/inference_on_onnx_model.html">Running inference on MXNet/Gluon from an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../onnx/super_resolution.html">Importing an ONNX model into MXNet</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/deploy/export/onnx.html">Export ONNX Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../optimizer/index.html">Optimizers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../viz/index.html">Visualization</a><ul>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/visualize_graph">Visualize networks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../performance/index.html">Performance</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../performance/compression/index.html">Compression</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/compression/int8.html">Deploy with int-8</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/float16">Float16</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/gradient_compression">Gradient Compression</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/int8_inference.html">GluonCV with Quantized Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../performance/backend/index.html">Accelerated Backend Tools</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/mkldnn/index.html">Intel MKL-DNN</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/mkldnn/mkldnn_quantization.html">Quantize with MKL-DNN backend</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/mkldnn/mkldnn_quantization.html#Improving-accuracy-with-Intel®-Neural-Compressor">Improving accuracy with Intel® Neural Compressor</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/mkldnn/mkldnn_readme.html">Install MXNet with MKL-DNN</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/tensorrt/index.html">TensorRT</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/tensorrt/tensorrt.html">Optimizing Deep Learning Computation Graphs with TensorRT</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/tvm.html">Use TVM</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/profiler.html">Profiling MXNet Models</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/amp.html">Using AMP: Automatic Mixed Precision</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../deploy/index.html">Deployment</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../deploy/export/index.html">Export</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/export/onnx.html">Exporting to ONNX format</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/export_network.html">Export Gluon CV Models</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html">Save / Load Parameters</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../deploy/inference/index.html">Inference</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/cpp.html">Deploy into C++</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/image_classification_jetson.html">Image Classication using pretrained ResNet-50 model on Jetson module</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/scala.html">Deploy into a Java or Scala Environment</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/wine_detector.html">Real-time Object Detection with MXNet On The Raspberry Pi</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../deploy/run-on-aws/index.html">Run on AWS</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/run-on-aws/use_ec2.html">Run on an EC2 Instance</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/run-on-aws/use_sagemaker.html">Run on Amazon SageMaker</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/run-on-aws/cloud.html">MXNet on the Cloud</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../extend/index.html">Extend</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../extend/custom_layer.html">Custom Layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../extend/customop.html">Custom Numpy Operators</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/new_op">New Operator Creation</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/add_op_in_backend">New Operator in MXNet Backend</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/index.html">Python API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/ndarray/index.html">mxnet.ndarray</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/ndarray.html">ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/contrib/index.html">ndarray.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/image/index.html">ndarray.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/linalg/index.html">ndarray.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/op/index.html">ndarray.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/random/index.html">ndarray.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/register/index.html">ndarray.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/sparse/index.html">ndarray.sparse</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/utils/index.html">ndarray.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/gluon/index.html">mxnet.gluon</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/block.html">gluon.Block</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/hybrid_block.html">gluon.HybridBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/symbol_block.html">gluon.SymbolBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/constant.html">gluon.Constant</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/parameter.html">gluon.Parameter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/parameter_dict.html">gluon.ParameterDict</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/trainer.html">gluon.Trainer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/contrib/index.html">gluon.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/data/index.html">gluon.data</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../api/gluon/data/vision/index.html">data.vision</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../api/gluon/data/vision/datasets/index.html">vision.datasets</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../api/gluon/data/vision/transforms/index.html">vision.transforms</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/loss/index.html">gluon.loss</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/model_zoo/index.html">gluon.model_zoo.vision</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/nn/index.html">gluon.nn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/rnn/index.html">gluon.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/utils/index.html">gluon.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/autograd/index.html">mxnet.autograd</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/initializer/index.html">mxnet.initializer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/optimizer/index.html">mxnet.optimizer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/lr_scheduler/index.html">mxnet.lr_scheduler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/metric/index.html">mxnet.metric</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/kvstore/index.html">mxnet.kvstore</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/symbol/index.html">mxnet.symbol</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/symbol.html">symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/contrib/index.html">symbol.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/image/index.html">symbol.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/linalg/index.html">symbol.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/op/index.html">symbol.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/random/index.html">symbol.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/register/index.html">symbol.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/sparse/index.html">symbol.sparse</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/module/index.html">mxnet.module</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/contrib/index.html">mxnet.contrib</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/autograd/index.html">contrib.autograd</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/io/index.html">contrib.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/ndarray/index.html">contrib.ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/onnx/index.html">contrib.onnx</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/quantization/index.html">contrib.quantization</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/symbol/index.html">contrib.symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/tensorboard/index.html">contrib.tensorboard</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/tensorrt/index.html">contrib.tensorrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/text/index.html">contrib.text</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/mxnet/index.html">mxnet</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/attribute/index.html">mxnet.attribute</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/base/index.html">mxnet.base</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/callback/index.html">mxnet.callback</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/context/index.html">mxnet.context</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/engine/index.html">mxnet.engine</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/executor/index.html">mxnet.executor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/executor_manager/index.html">mxnet.executor_manager</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/image/index.html">mxnet.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/io/index.html">mxnet.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/kvstore_server/index.html">mxnet.kvstore_server</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/libinfo/index.html">mxnet.libinfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/log/index.html">mxnet.log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/model/index.html">mxnet.model</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/monitor/index.html">mxnet.monitor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/name/index.html">mxnet.name</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/notebook/index.html">mxnet.notebook</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/operator/index.html">mxnet.operator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/profiler/index.html">mxnet.profiler</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/random/index.html">mxnet.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/recordio/index.html">mxnet.recordio</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/registry/index.html">mxnet.registry</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/rtc/index.html">mxnet.rtc</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/runtime/index.html">mxnet.runtime</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/test_utils/index.html">mxnet.test_utils</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/torch/index.html">mxnet.torch</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/util/index.html">mxnet.util</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/visualization/index.html">mxnet.visualization</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</div>
</header>
<main class="mdl-layout__content" tabIndex="0">
<script type="text/javascript" src="../../../../_static/sphinx_materialdesign_theme.js "></script>
<script type="text/javascript" src="../../../../_static/feedback.js"></script>
<header class="mdl-layout__drawer">
<div class="globaltoc">
<span class="mdl-layout-title toc">Table Of Contents</span>
<nav class="mdl-navigation">
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="../../../index.html">Python Tutorials</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../../getting-started/index.html">Getting Started</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/crash-course/index.html">Crash Course</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/1-ndarray.html">Manipulate data with <code class="docutils literal notranslate"><span class="pre">ndarray</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/2-nn.html">Create a neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/3-autograd.html">Automatic differentiation with <code class="docutils literal notranslate"><span class="pre">autograd</span></code></a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/4-train.html">Train the neural network</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/5-predict.html">Predict with a pre-trained model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/crash-course/6-use_gpus.html">Use GPUs</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/to-mxnet/index.html">Moving to MXNet from Other Frameworks</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../getting-started/to-mxnet/pytorch.html">PyTorch vs Apache MXNet</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/gluon_from_experiment_to_deployment.html">Gluon: from experiment to deployment</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../getting-started/logistic_regression_explained.html">Logistic regression explained</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/image/mnist.html">MNIST</a></li>
</ul>
</li>
<li class="toctree-l2 current"><a class="reference internal" href="../../index.html">Packages</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../../autograd/index.html">Automatic Differentiation</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="../index.html">Gluon</a><ul class="current">
<li class="toctree-l4"><a class="reference internal" href="../blocks/index.html">Blocks</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../blocks/custom-layer.html">Custom Layers</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/custom_layer_beginners.html">Customer Layers (Beginners)</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/hybridize.html">Hybridize</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/init.html">Initialization</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/naming.html">Parameter and Block Naming</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/nn.html">Layers and Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/parameters.html">Parameter Management</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/save_load_params.html">Saving and Loading Gluon Models</a></li>
<li class="toctree-l5"><a class="reference internal" href="../blocks/activations/activations.html">Activation Blocks</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../data/index.html">Data Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html#Spatial-Augmentation">Spatial Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html#Color-Augmentation">Color Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/data_augmentation.html#Composed-Augmentations">Composed Augmentations</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html">Gluon <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s and <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html#Using-own-data-with-included-Datasets">Using own data with included <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html#Using-own-data-with-custom-Datasets">Using own data with custom <code class="docutils literal notranslate"><span class="pre">Dataset</span></code>s</a></li>
<li class="toctree-l5"><a class="reference internal" href="../data/datasets.html#Appendix:-Upgrading-from-Module-DataIter-to-Gluon-DataLoader">Appendix: Upgrading from Module <code class="docutils literal notranslate"><span class="pre">DataIter</span></code> to Gluon <code class="docutils literal notranslate"><span class="pre">DataLoader</span></code></a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../image/index.html">Image Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../image/image-augmentation.html">Image Augmentation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../image/info_gan.html">Image similarity search with InfoGAN</a></li>
<li class="toctree-l5"><a class="reference internal" href="../image/mnist.html">Handwritten Digit Recognition</a></li>
<li class="toctree-l5"><a class="reference internal" href="../image/pretrained_models.html">Using pre-trained models in MXNet</a></li>
</ul>
</li>
<li class="toctree-l4 current"><a class="reference internal" href="index.html">Losses</a><ul class="current">
<li class="toctree-l5 current"><a class="current reference internal" href="#">Custom Loss Blocks</a></li>
<li class="toctree-l5"><a class="reference internal" href="kl_divergence.html">Kullback-Leibler (KL) Divergence</a></li>
<li class="toctree-l5"><a class="reference internal" href="loss.html">Loss functions</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../text/index.html">Text Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../text/gnmt.html">Google Neural Machine Translation</a></li>
<li class="toctree-l5"><a class="reference internal" href="../text/transformer.html">Machine Translation with Transformer</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../training/index.html">Training</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../training/fit_api_tutorial.html">MXNet Gluon Fit API</a></li>
<li class="toctree-l5"><a class="reference internal" href="../training/trainer.html">Trainer</a></li>
<li class="toctree-l5"><a class="reference internal" href="../training/learning_rates/index.html">Learning Rates</a><ul>
<li class="toctree-l6"><a class="reference internal" href="../training/learning_rates/learning_rate_finder.html">Learning Rate Finder</a></li>
<li class="toctree-l6"><a class="reference internal" href="../training/learning_rates/learning_rate_schedules.html">Learning Rate Schedules</a></li>
<li class="toctree-l6"><a class="reference internal" href="../training/learning_rates/learning_rate_schedules_advanced.html">Advanced Learning Rate Schedules</a></li>
</ul>
</li>
<li class="toctree-l5"><a class="reference internal" href="../training/normalization/index.html">Normalization Blocks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../kvstore/index.html">KVStore</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../kvstore/kvstore.html">Distributed Key-Value Store</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../ndarray/index.html">NDArray</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/01-ndarray-intro.html">An Intro: Manipulate Data the MXNet Way with NDArray</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/02-ndarray-operations.html">NDArray Operations</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/03-ndarray-contexts.html">NDArray Contexts</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/gotchas_numpy_in_mxnet.html">Gotchas using NumPy in Apache MXNet</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../ndarray/sparse/index.html">Tutorials</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/csr.html">CSRNDArray - NDArray in Compressed Sparse Row Storage Format</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/row_sparse.html">RowSparseNDArray - NDArray for Sparse Gradient Updates</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/train.html">Train a Linear Regression Model with Sparse Symbols</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../ndarray/sparse/train_gluon.html">Sparse NDArrays with Gluon</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../onnx/index.html">ONNX</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../onnx/fine_tuning_gluon.html">Fine-tuning an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../onnx/inference_on_onnx_model.html">Running inference on MXNet/Gluon from an ONNX model</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../onnx/super_resolution.html">Importing an ONNX model into MXNet</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/deploy/export/onnx.html">Export ONNX Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../optimizer/index.html">Optimizers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../viz/index.html">Visualization</a><ul>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/visualize_graph">Visualize networks</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../performance/index.html">Performance</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../performance/compression/index.html">Compression</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/compression/int8.html">Deploy with int-8</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/float16">Float16</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/faq/gradient_compression">Gradient Compression</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/int8_inference.html">GluonCV with Quantized Models</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../performance/backend/index.html">Accelerated Backend Tools</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/mkldnn/index.html">Intel MKL-DNN</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/mkldnn/mkldnn_quantization.html">Quantize with MKL-DNN backend</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/mkldnn/mkldnn_quantization.html#Improving-accuracy-with-Intel®-Neural-Compressor">Improving accuracy with Intel® Neural Compressor</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/mkldnn/mkldnn_readme.html">Install MXNet with MKL-DNN</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/tensorrt/index.html">TensorRT</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../performance/backend/tensorrt/tensorrt.html">Optimizing Deep Learning Computation Graphs with TensorRT</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/tvm.html">Use TVM</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/profiler.html">Profiling MXNet Models</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../performance/backend/amp.html">Using AMP: Automatic Mixed Precision</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../deploy/index.html">Deployment</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../deploy/export/index.html">Export</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/export/onnx.html">Exporting to ONNX format</a></li>
<li class="toctree-l4"><a class="reference external" href="https://gluon-cv.mxnet.io/build/examples_deployment/export_network.html">Export Gluon CV Models</a></li>
<li class="toctree-l4"><a class="reference external" href="https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html">Save / Load Parameters</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../deploy/inference/index.html">Inference</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/cpp.html">Deploy into C++</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/image_classification_jetson.html">Image Classication using pretrained ResNet-50 model on Jetson module</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/scala.html">Deploy into a Java or Scala Environment</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/inference/wine_detector.html">Real-time Object Detection with MXNet On The Raspberry Pi</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../deploy/run-on-aws/index.html">Run on AWS</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/run-on-aws/use_ec2.html">Run on an EC2 Instance</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/run-on-aws/use_sagemaker.html">Run on Amazon SageMaker</a></li>
<li class="toctree-l4"><a class="reference internal" href="../../../deploy/run-on-aws/cloud.html">MXNet on the Cloud</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../extend/index.html">Extend</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../extend/custom_layer.html">Custom Layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../extend/customop.html">Custom Numpy Operators</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/new_op">New Operator Creation</a></li>
<li class="toctree-l3"><a class="reference external" href="https://mxnet.apache.org/api/faq/add_op_in_backend">New Operator in MXNet Backend</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/index.html">Python API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/ndarray/index.html">mxnet.ndarray</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/ndarray.html">ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/contrib/index.html">ndarray.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/image/index.html">ndarray.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/linalg/index.html">ndarray.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/op/index.html">ndarray.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/random/index.html">ndarray.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/register/index.html">ndarray.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/sparse/index.html">ndarray.sparse</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/ndarray/utils/index.html">ndarray.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/gluon/index.html">mxnet.gluon</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/block.html">gluon.Block</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/hybrid_block.html">gluon.HybridBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/symbol_block.html">gluon.SymbolBlock</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/constant.html">gluon.Constant</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/parameter.html">gluon.Parameter</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/parameter_dict.html">gluon.ParameterDict</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/trainer.html">gluon.Trainer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/contrib/index.html">gluon.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/data/index.html">gluon.data</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../../../../api/gluon/data/vision/index.html">data.vision</a><ul>
<li class="toctree-l5"><a class="reference internal" href="../../../../api/gluon/data/vision/datasets/index.html">vision.datasets</a></li>
<li class="toctree-l5"><a class="reference internal" href="../../../../api/gluon/data/vision/transforms/index.html">vision.transforms</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/loss/index.html">gluon.loss</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/model_zoo/index.html">gluon.model_zoo.vision</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/nn/index.html">gluon.nn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/rnn/index.html">gluon.rnn</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/gluon/utils/index.html">gluon.utils</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/autograd/index.html">mxnet.autograd</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/initializer/index.html">mxnet.initializer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/optimizer/index.html">mxnet.optimizer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/lr_scheduler/index.html">mxnet.lr_scheduler</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/metric/index.html">mxnet.metric</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/kvstore/index.html">mxnet.kvstore</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/symbol/index.html">mxnet.symbol</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/symbol.html">symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/contrib/index.html">symbol.contrib</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/image/index.html">symbol.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/linalg/index.html">symbol.linalg</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/op/index.html">symbol.op</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/random/index.html">symbol.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/register/index.html">symbol.register</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/symbol/sparse/index.html">symbol.sparse</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/module/index.html">mxnet.module</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/contrib/index.html">mxnet.contrib</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/autograd/index.html">contrib.autograd</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/io/index.html">contrib.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/ndarray/index.html">contrib.ndarray</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/onnx/index.html">contrib.onnx</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/quantization/index.html">contrib.quantization</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/symbol/index.html">contrib.symbol</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/tensorboard/index.html">contrib.tensorboard</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/tensorrt/index.html">contrib.tensorrt</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/contrib/text/index.html">contrib.text</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/mxnet/index.html">mxnet</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/attribute/index.html">mxnet.attribute</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/base/index.html">mxnet.base</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/callback/index.html">mxnet.callback</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/context/index.html">mxnet.context</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/engine/index.html">mxnet.engine</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/executor/index.html">mxnet.executor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/executor_manager/index.html">mxnet.executor_manager</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/image/index.html">mxnet.image</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/io/index.html">mxnet.io</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/kvstore_server/index.html">mxnet.kvstore_server</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/libinfo/index.html">mxnet.libinfo</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/log/index.html">mxnet.log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/model/index.html">mxnet.model</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/monitor/index.html">mxnet.monitor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/name/index.html">mxnet.name</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/notebook/index.html">mxnet.notebook</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/operator/index.html">mxnet.operator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/profiler/index.html">mxnet.profiler</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/random/index.html">mxnet.random</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/recordio/index.html">mxnet.recordio</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/registry/index.html">mxnet.registry</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/rtc/index.html">mxnet.rtc</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/runtime/index.html">mxnet.runtime</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/test_utils/index.html">mxnet.test_utils</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/torch/index.html">mxnet.torch</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/util/index.html">mxnet.util</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../../../api/mxnet/visualization/index.html">mxnet.visualization</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</div>
</header>
<div class="document">
<div class="page-content" role="main">
<!--- Licensed to the Apache Software Foundation (ASF) under one --><!--- or more contributor license agreements. See the NOTICE file --><!--- distributed with this work for additional information --><!--- regarding copyright ownership. The ASF licenses this file --><!--- to you under the Apache License, Version 2.0 (the --><!--- "License"); you may not use this file except in compliance --><!--- with the License. You may obtain a copy of the License at --><!--- http://www.apache.org/licenses/LICENSE-2.0 --><!--- Unless required by applicable law or agreed to in writing, --><!--- software distributed under the License is distributed on an --><!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --><!--- KIND, either express or implied. See the License for the --><!--- specific language governing permissions and limitations --><!--- under the License. --><div class="section" id="Custom-Loss-Blocks">
<h1>Custom Loss Blocks<a class="headerlink" href="#Custom-Loss-Blocks" title="Permalink to this headline"></a></h1>
<p>All neural networks need a loss function for training. A loss function is a quantitive measure of how bad the predictions of the network are when compared to ground truth labels. Given this score, a network can improve by iteratively updating its weights to minimise this loss. Some tasks use a combination of multiple loss functions, but often you’ll just use one. MXNet Gluon provides a number of the most commonly used loss functions, and you’ll choose certain functions depending on your network
and task. Some common task and loss function pairs include:</p>
<ul class="simple">
<li><p>Regression: <a class="reference external" href="/api/python/docs/api/gluon/loss/index.html#mxnet.gluon.loss.L1Loss">L1Loss</a>, <a class="reference external" href="/api/python/docs/api/gluon/loss/index.html#mxnet.gluon.loss.L2Loss">L2Loss</a></p></li>
<li><p>Classification: <a class="reference external" href="/api/python/docs/api/gluon/loss/index.html#mxnet.gluon.loss.SigmoidBinaryCrossEntropyLoss">SigmoidBinaryCrossEntropyLoss</a>, <a class="reference external" href="/api/python/docs/api/gluon/loss/index.html#mxnet.gluon.loss.SoftmaxBinaryCrossEntropyLoss">SoftmaxBinaryCrossEntropyLoss</a></p></li>
<li><p>Embeddings: <a class="reference external" href="/api/python/docs/api/gluon/loss/index.html#mxnet.gluon.loss.HingeLoss">HingeLoss</a></p></li>
</ul>
<p>However, we may sometimes want to solve problems that require customized loss functions; this tutorial shows how we can do that in Gluon. We will implement contrastive loss which is typically used in Siamese networks.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
<span class="kn">import</span> <span class="nn">mxnet</span> <span class="k">as</span> <span class="nn">mx</span>
<span class="kn">from</span> <span class="nn">mxnet</span> <span class="kn">import</span> <span class="n">autograd</span><span class="p">,</span> <span class="n">gluon</span><span class="p">,</span> <span class="n">nd</span>
<span class="kn">from</span> <span class="nn">mxnet.gluon.loss</span> <span class="kn">import</span> <span class="n">Loss</span>
<span class="kn">import</span> <span class="nn">random</span>
</pre></div>
</div>
<div class="section" id="What-is-Contrastive-Loss">
<h2>What is Contrastive Loss<a class="headerlink" href="#What-is-Contrastive-Loss" title="Permalink to this headline"></a></h2>
<p><a class="reference external" href="http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf">Contrastive loss</a> is a distance-based loss function. During training, pairs of images are fed into a model. If the images are similar, the loss function will return 0, otherwise 1.</p>
<p><em>Y</em> is a binary label indicating similarity between training images. Contrastive loss uses the Euclidean distance <em>D</em> between images and is the sum of 2 terms: - the loss for a pair of similar points - the loss for a pair of dissimilar points</p>
<p>The loss function uses a margin <em>m</em> which is has the effect that dissimlar pairs only contribute if their loss is within a certain margin.</p>
<p>In order to implement such a customized loss function in Gluon, we only need to define a new class that is inheriting from the <a class="reference external" href="/api/python/docs/api/gluon/loss/index.html#mxnet.gluon.loss.Loss">Loss</a> base class. We then define the contrastive loss logic in the <a class="reference external" href="/api/python/docs/api/gluon/hybrid_block.html#mxnet.gluon.HybridBlock.hybrid_forward">hybrid_forward</a> method. This method takes the images <code class="docutils literal notranslate"><span class="pre">image1</span></code>, <code class="docutils literal notranslate"><span class="pre">image2</span></code> and the label which defines whether <code class="docutils literal notranslate"><span class="pre">image1</span></code> and <code class="docutils literal notranslate"><span class="pre">image2</span></code> are
similar (=0) or dissimilar (=1). The input F is an <code class="docutils literal notranslate"><span class="pre">mxnet.ndarry</span></code> or an <code class="docutils literal notranslate"><span class="pre">mxnet.symbol</span></code> if we hybridize the network. Gluon’s <code class="docutils literal notranslate"><span class="pre">Loss</span></code> base class is in fact a <a class="reference external" href="/api/python/docs/api/gluon/hybrid_block.html">HybridBlock</a>. This means we can either run imperatively or symbolically. When we hybridize our custom loss function, we can get performance speedups.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">ContrastiveLoss</span><span class="p">(</span><span class="n">Loss</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">margin</span><span class="o">=</span><span class="mf">6.</span><span class="p">,</span> <span class="n">weight</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">batch_axis</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">ContrastiveLoss</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">weight</span><span class="p">,</span> <span class="n">batch_axis</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">margin</span> <span class="o">=</span> <span class="n">margin</span>
<span class="k">def</span> <span class="nf">hybrid_forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">F</span><span class="p">,</span> <span class="n">image1</span><span class="p">,</span> <span class="n">image2</span><span class="p">,</span> <span class="n">label</span><span class="p">):</span>
<span class="n">distances</span> <span class="o">=</span> <span class="n">image1</span> <span class="o">-</span> <span class="n">image2</span>
<span class="n">distances_squared</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">square</span><span class="p">(</span><span class="n">distances</span><span class="p">),</span> <span class="mi">1</span><span class="p">,</span> <span class="n">keepdims</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">euclidean_distances</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">distances_squared</span> <span class="o">+</span> <span class="mf">0.0001</span><span class="p">)</span>
<span class="n">d</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">margin</span> <span class="o">-</span> <span class="n">euclidean_distances</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">margin</span><span class="p">)</span>
<span class="n">loss</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">label</span><span class="p">)</span> <span class="o">*</span> <span class="n">distances_squared</span> <span class="o">+</span> <span class="n">label</span> <span class="o">*</span> <span class="n">F</span><span class="o">.</span><span class="n">square</span><span class="p">(</span><span class="n">d</span><span class="p">)</span>
<span class="n">loss</span> <span class="o">=</span> <span class="mf">0.5</span><span class="o">*</span><span class="n">loss</span>
<span class="k">return</span> <span class="n">loss</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">ContrastiveLoss</span><span class="p">(</span><span class="n">margin</span><span class="o">=</span><span class="mf">6.0</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="Define-the-Siamese-network">
<h2>Define the Siamese network<a class="headerlink" href="#Define-the-Siamese-network" title="Permalink to this headline"></a></h2>
<p>A <a class="reference external" href="https://papers.nips.cc/paper/769-signature-verification-using-a-siamese-time-delay-neural-network.pdf">Siamese network</a> consists of 2 identical networks, that share the same weights. They are trained on pairs of images and each network processes one image. The label defines whether the pair of images is similar or not. The Siamese network learns to differentiate between two input images.</p>
<p>Our network consists of 2 convolutional and max pooling layers that downsample the input image. The output is then fed through a fully connected layer with 256 hidden units and another fully connected layer with 2 hidden units.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Siamese</span><span class="p">(</span><span class="n">gluon</span><span class="o">.</span><span class="n">HybridBlock</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">Siamese</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cnn</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">HybridSequential</span><span class="p">()</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="o">.</span><span class="n">name_scope</span><span class="p">():</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2D</span><span class="p">(</span><span class="mi">64</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">&#39;relu&#39;</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">MaxPool2D</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2D</span><span class="p">(</span><span class="mi">64</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">&#39;relu&#39;</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">MaxPool2D</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">&#39;relu&#39;</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">gluon</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dense</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="n">activation</span><span class="o">=</span><span class="s1">&#39;softrelu&#39;</span><span class="p">))</span>
<span class="k">def</span> <span class="nf">hybrid_forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">F</span><span class="p">,</span> <span class="n">input0</span><span class="p">,</span> <span class="n">input1</span><span class="p">):</span>
<span class="n">out0</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="p">(</span><span class="n">input0</span><span class="p">)</span>
<span class="n">out1</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cnn</span><span class="p">(</span><span class="n">input1</span><span class="p">)</span>
<span class="k">return</span> <span class="n">out0</span><span class="p">,</span> <span class="n">out1</span>
</pre></div>
</div>
</div>
<div class="section" id="Prepare-the-training-data">
<h2>Prepare the training data<a class="headerlink" href="#Prepare-the-training-data" title="Permalink to this headline"></a></h2>
<p>We train our network on the <a class="reference external" href="http://www.omniglot.com/">Ominglot</a> dataset which is a collection of 1623 hand drawn characters from 50 alphabets. You can download it from <a class="reference external" href="https://github.com/brendenlake/omniglot/tree/master/python">here</a>. We need to create a dataset that contains a random set of similar and dissimilar images. We use Gluon’s <code class="docutils literal notranslate"><span class="pre">ImageFolderDataset</span></code> where we overwrite <code class="docutils literal notranslate"><span class="pre">__getitem__</span></code> and randomly return similar and dissimilar pairs of images.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">GetImagePairs</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">gluon</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">vision</span><span class="o">.</span><span class="n">ImageFolderDataset</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">root</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">GetImagePairs</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">root</span><span class="p">,</span> <span class="n">flag</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">root</span> <span class="o">=</span> <span class="n">root</span>
<span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
<span class="n">items_with_index</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">items</span><span class="p">))</span>
<span class="n">image0_index</span><span class="p">,</span> <span class="n">image0_tuple</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">items_with_index</span><span class="p">)</span>
<span class="n">should_get_same_class</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">should_get_same_class</span><span class="p">:</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">image1_index</span><span class="p">,</span> <span class="n">image1_tuple</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">items_with_index</span><span class="p">)</span>
<span class="k">if</span> <span class="n">image0_tuple</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">image1_tuple</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span>
<span class="k">break</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">image1_index</span><span class="p">,</span> <span class="n">image1_tuple</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">items_with_index</span><span class="p">)</span>
<span class="n">image0</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__getitem__</span><span class="p">(</span><span class="n">image0_index</span><span class="p">)</span>
<span class="n">image1</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__getitem__</span><span class="p">(</span><span class="n">image1_index</span><span class="p">)</span>
<span class="n">label</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="nb">int</span><span class="p">(</span><span class="n">image1_tuple</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">!=</span> <span class="n">image0_tuple</span><span class="p">[</span><span class="mi">1</span><span class="p">])])</span>
<span class="k">return</span> <span class="n">image0</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">image1</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">label</span>
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__len__</span><span class="p">()</span>
</pre></div>
</div>
<p>We train the network on a subset of the data, the <a class="reference external" href="https://www.omniglot.com/writing/tifinagh.htm">Tifinagh</a> alphabet. Once the model is trained we test it on the <a class="reference external" href="https://www.omniglot.com/writing/inuktitut.htm">Inuktitut</a> alphabet.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="n">img0</span><span class="p">,</span> <span class="n">img1</span><span class="p">,</span> <span class="n">label</span><span class="p">):</span>
<span class="n">normalized_img0</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="n">img0</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;float32&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span><span class="o">/</span><span class="mf">255.0</span>
<span class="n">normalized_img1</span> <span class="o">=</span> <span class="n">nd</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="n">img1</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;float32&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span><span class="o">/</span><span class="mf">255.0</span>
<span class="k">return</span> <span class="n">normalized_img0</span><span class="p">,</span> <span class="n">normalized_img1</span><span class="p">,</span> <span class="n">label</span>
<span class="n">training_dir</span> <span class="o">=</span> <span class="s2">&quot;images_background/Tifinagh&quot;</span>
<span class="n">testing_dir</span> <span class="o">=</span> <span class="s2">&quot;images_background/Inuktitut_(Canadian_Aboriginal_Syllabics)&quot;</span>
<span class="n">train</span> <span class="o">=</span> <span class="n">GetImagePairs</span><span class="p">(</span><span class="n">training_dir</span><span class="p">)</span>
<span class="n">test</span> <span class="o">=</span> <span class="n">GetImagePairs</span><span class="p">(</span><span class="n">testing_dir</span><span class="p">)</span>
<span class="n">train_dataloader</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">transform</span><span class="p">),</span>
<span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">16</span><span class="p">)</span>
<span class="n">test_dataloader</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">transform</span><span class="p">),</span>
<span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
<p>Following code plots some examples from the test dataset.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">img1</span><span class="p">,</span> <span class="n">img2</span><span class="p">,</span> <span class="n">label</span> <span class="o">=</span> <span class="n">test</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Same: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">label</span><span class="o">.</span><span class="n">asscalar</span><span class="p">())</span> <span class="o">==</span> <span class="mi">0</span><span class="p">))</span>
<span class="n">fig</span><span class="p">,</span> <span class="p">(</span><span class="n">ax0</span><span class="p">,</span> <span class="n">ax1</span><span class="p">)</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="n">ncols</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">5</span><span class="p">))</span>
<span class="n">ax0</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">img1</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">()[:,:,</span><span class="mi">0</span><span class="p">],</span> <span class="n">cmap</span><span class="o">=</span><span class="s1">&#39;gray&#39;</span><span class="p">)</span>
<span class="n">ax0</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s1">&#39;off&#39;</span><span class="p">)</span>
<span class="n">ax1</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">img2</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">()[:,:,</span><span class="mi">0</span><span class="p">],</span> <span class="n">cmap</span><span class="o">=</span><span class="s1">&#39;gray&#39;</span><span class="p">)</span>
<span class="n">ax1</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
</pre></div>
</div>
<img alt="example1" src="tutorials/packages/gluon/loss/images/inuktitut_1.png" />
</div>
<div class="section" id="Train-the-Siamese-network">
<h2>Train the Siamese network<a class="headerlink" href="#Train-the-Siamese-network" title="Permalink to this headline"></a></h2>
<p>Before we can start training, we need to instantiate the custom constrastive loss function and initialize the model.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">model</span> <span class="o">=</span> <span class="n">Siamese</span><span class="p">()</span>
<span class="n">model</span><span class="o">.</span><span class="n">initialize</span><span class="p">(</span><span class="n">init</span><span class="o">=</span><span class="n">mx</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">Xavier</span><span class="p">())</span>
<span class="n">trainer</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">Trainer</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">collect_params</span><span class="p">(),</span> <span class="s1">&#39;adam&#39;</span><span class="p">,</span> <span class="p">{</span><span class="s1">&#39;learning_rate&#39;</span><span class="p">:</span> <span class="mf">0.001</span><span class="p">})</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">ContrastiveLoss</span><span class="p">(</span><span class="n">margin</span><span class="o">=</span><span class="mf">6.0</span><span class="p">)</span>
</pre></div>
</div>
<p>Start the training loop:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">data</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">train_dataloader</span><span class="p">):</span>
<span class="n">image1</span><span class="p">,</span> <span class="n">image2</span><span class="p">,</span> <span class="n">label</span> <span class="o">=</span> <span class="n">data</span>
<span class="k">with</span> <span class="n">autograd</span><span class="o">.</span><span class="n">record</span><span class="p">():</span>
<span class="n">output1</span><span class="p">,</span> <span class="n">output2</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="n">image1</span><span class="p">,</span> <span class="n">image2</span><span class="p">)</span>
<span class="n">loss_contrastive</span> <span class="o">=</span> <span class="n">loss</span><span class="p">(</span><span class="n">output1</span><span class="p">,</span> <span class="n">output2</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span>
<span class="n">loss_contrastive</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="n">trainer</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">image1</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">loss_mean</span> <span class="o">=</span> <span class="n">loss_contrastive</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">asscalar</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Epoch number </span><span class="si">{}</span><span class="se">\n</span><span class="s2"> Current loss </span><span class="si">{}</span><span class="se">\n</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">epoch</span><span class="p">,</span> <span class="n">loss_mean</span><span class="p">))</span>
</pre></div>
</div>
</div>
<div class="section" id="Test-the-trained-Siamese-network">
<h2>Test the trained Siamese network<a class="headerlink" href="#Test-the-trained-Siamese-network" title="Permalink to this headline"></a></h2>
<p>During inference we compute the Euclidean distance between the output vectors of the Siamese network. High distance indicates dissimilarity, low values indicate similarity.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">data</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">test_dataloader</span><span class="p">):</span>
<span class="n">img1</span><span class="p">,</span> <span class="n">img2</span><span class="p">,</span> <span class="n">label</span> <span class="o">=</span> <span class="n">data</span>
<span class="n">output1</span><span class="p">,</span> <span class="n">output2</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="n">img1</span><span class="p">,</span> <span class="n">img2</span><span class="p">)</span>
<span class="n">dist_sq</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">ndarray</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">mx</span><span class="o">.</span><span class="n">ndarray</span><span class="o">.</span><span class="n">square</span><span class="p">(</span><span class="n">output1</span> <span class="o">-</span> <span class="n">output2</span><span class="p">))</span>
<span class="n">dist</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">ndarray</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">dist_sq</span><span class="p">)</span><span class="o">.</span><span class="n">asscalar</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Euclidean Distance:&quot;</span><span class="p">,</span> <span class="n">dist</span><span class="p">,</span> <span class="s2">&quot;Test label&quot;</span><span class="p">,</span> <span class="n">label</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">asscalar</span><span class="p">())</span>
<span class="n">fig</span><span class="p">,</span> <span class="p">(</span><span class="n">ax0</span><span class="p">,</span> <span class="n">ax1</span><span class="p">)</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="n">ncols</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">5</span><span class="p">))</span>
<span class="n">ax0</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">img1</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">()[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:],</span> <span class="n">cmap</span><span class="o">=</span><span class="s1">&#39;gray&#39;</span><span class="p">)</span>
<span class="n">ax0</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s1">&#39;off&#39;</span><span class="p">)</span>
<span class="n">ax1</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">img2</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">()[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="p">:,</span> <span class="p">:],</span> <span class="n">cmap</span><span class="o">=</span><span class="s1">&#39;gray&#39;</span><span class="p">)</span>
<span class="n">ax1</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
</pre></div>
</div>
<img alt="example2" src="tutorials/packages/gluon/loss/images/inuktitut_2.png" />
</div>
<div class="section" id="Common-pitfalls-with-custom-loss-functions">
<h2>Common pitfalls with custom loss functions<a class="headerlink" href="#Common-pitfalls-with-custom-loss-functions" title="Permalink to this headline"></a></h2>
<p>When customizing loss functions, we may encounter certain pitfalls. If the loss is not decreasing as expected or if forward/backward pass is crashing, then one should check the following:</p>
<div class="section" id="Activation-function-in-the-last-layer">
<h3>Activation function in the last layer<a class="headerlink" href="#Activation-function-in-the-last-layer" title="Permalink to this headline"></a></h3>
<p>Verify whether the last network layer uses the correct activation function: for instance in binary classification tasks we need to apply a sigmoid on the output data. If we use this activation in the last layer and define a loss function like Gluon’s SigmoidBinaryCrossEntropy, we would basically apply sigmoid twice and the loss would not converge as expected. If we don’t define any activation function, Gluon will per default apply a linear activation.</p>
</div>
<div class="section" id="Intermediate-loss-values">
<h3>Intermediate loss values<a class="headerlink" href="#Intermediate-loss-values" title="Permalink to this headline"></a></h3>
<p>In our example, we computed the square root of squared distances between 2 images: <code class="docutils literal notranslate"><span class="pre">F.sqrt(distances_squared)</span></code>. If images are very similar we take the sqare root of a value close to 0, which can lead to <em>NaN</em> values. Adding a small epsilon to <code class="docutils literal notranslate"><span class="pre">distances_squared</span></code> avoids this problem.</p>
</div>
<div class="section" id="Shape-of-intermediate-loss-vectors">
<h3>Shape of intermediate loss vectors<a class="headerlink" href="#Shape-of-intermediate-loss-vectors" title="Permalink to this headline"></a></h3>
<p>In most cases having the wrong tensor shape will lead to an error, as soon as we compare data with labels. But in some cases, we may be able to normally run the training, but it does not converge. For instance, if we don’t set <code class="docutils literal notranslate"><span class="pre">keepdims=True</span></code> in our customized loss function, the shape of the tensor changes. The example still runs fine but does not converge.</p>
<p>If you encounter a similar problem, then it is useful to check the tensor shape after each computation step in the loss function.</p>
</div>
<div class="section" id="Differentiable">
<h3>Differentiable<a class="headerlink" href="#Differentiable" title="Permalink to this headline"></a></h3>
<p>Backprogration requires the loss function to be differentiable. If the customized loss function cannot be differentiated the backward pass will crash.</p>
</div>
</div>
</div>
<hr class="feedback-hr-top" />
<div class="feedback-container">
<div class="feedback-question">Did this page help you?</div>
<div class="feedback-answer-container">
<div class="feedback-answer yes-link" data-response="yes">Yes</div>
<div class="feedback-answer no-link" data-response="no">No</div>
</div>
<div class="feedback-thank-you">Thanks for your feedback!</div>
</div>
<hr class="feedback-hr-bottom" />
</div>
<div class="side-doc-outline">
<div class="side-doc-outline--content">
<div class="localtoc">
<p class="caption">
<span class="caption-text">Table Of Contents</span>
</p>
<ul>
<li><a class="reference internal" href="#">Custom Loss Blocks</a><ul>
<li><a class="reference internal" href="#What-is-Contrastive-Loss">What is Contrastive Loss</a></li>
<li><a class="reference internal" href="#Define-the-Siamese-network">Define the Siamese network</a></li>
<li><a class="reference internal" href="#Prepare-the-training-data">Prepare the training data</a></li>
<li><a class="reference internal" href="#Train-the-Siamese-network">Train the Siamese network</a></li>
<li><a class="reference internal" href="#Test-the-trained-Siamese-network">Test the trained Siamese network</a></li>
<li><a class="reference internal" href="#Common-pitfalls-with-custom-loss-functions">Common pitfalls with custom loss functions</a><ul>
<li><a class="reference internal" href="#Activation-function-in-the-last-layer">Activation function in the last layer</a></li>
<li><a class="reference internal" href="#Intermediate-loss-values">Intermediate loss values</a></li>
<li><a class="reference internal" href="#Shape-of-intermediate-loss-vectors">Shape of intermediate loss vectors</a></li>
<li><a class="reference internal" href="#Differentiable">Differentiable</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
</div>
<div class="clearer"></div>
</div><div class="pagenation">
<a id="button-prev" href="index.html" class="mdl-button mdl-js-button mdl-js-ripple-effect mdl-button--colored" role="botton" accesskey="P">
<i class="pagenation-arrow-L fas fa-arrow-left fa-lg"></i>
<div class="pagenation-text">
<span class="pagenation-direction">Previous</span>
<div>Losses</div>
</div>
</a>
<a id="button-next" href="kl_divergence.html" class="mdl-button mdl-js-button mdl-js-ripple-effect mdl-button--colored" role="botton" accesskey="N">
<i class="pagenation-arrow-R fas fa-arrow-right fa-lg"></i>
<div class="pagenation-text">
<span class="pagenation-direction">Next</span>
<div>Kullback-Leibler (KL) Divergence</div>
</div>
</a>
</div>
<footer class="site-footer h-card">
<div class="wrapper">
<div class="row">
<div class="col-4">
<h4 class="footer-category-title">Resources</h4>
<ul class="contact-list">
<li><a class="u-email" href="mailto:dev@mxnet.apache.org">Dev list</a></li>
<li><a class="u-email" href="mailto:user@mxnet.apache.org">User mailing list</a></li>
<li><a href="https://cwiki.apache.org/confluence/display/MXNET/Apache+MXNet+Home">Developer Wiki</a></li>
<li><a href="https://issues.apache.org/jira/projects/MXNET/issues">Jira Tracker</a></li>
<li><a href="https://github.com/apache/mxnet/labels/Roadmap">Github Roadmap</a></li>
<li><a href="https://medium.com/apache-mxnet">Blog</a></li>
<li><a href="https://discuss.mxnet.io">Forum</a></li>
<li><a href="/community/contribute">Contribute</a></li>
</ul>
</div>
<div class="col-4"><ul class="social-media-list"><li><a href="https://github.com/apache/mxnet"><svg class="svg-icon"><use xlink:href="../../../../_static/minima-social-icons.svg#github"></use></svg> <span class="username">apache/mxnet</span></a></li><li><a href="https://www.twitter.com/apachemxnet"><svg class="svg-icon"><use xlink:href="../../../../_static/minima-social-icons.svg#twitter"></use></svg> <span class="username">apachemxnet</span></a></li><li><a href="https://youtube.com/apachemxnet"><svg class="svg-icon"><use xlink:href="../../../../_static/minima-social-icons.svg#youtube"></use></svg> <span class="username">apachemxnet</span></a></li></ul>
</div>
<div class="col-4 footer-text">
<p>A flexible and efficient library for deep learning.</p>
</div>
</div>
</div>
</footer>
<footer class="site-footer2">
<div class="wrapper">
<div class="row">
<div class="col-3">
<img src="../../../../_static/apache_incubator_logo.png" class="footer-logo col-2">
</div>
<div class="footer-bottom-warning col-9">
<p>Apache MXNet is an effort undergoing incubation at <a href="http://www.apache.org/">The Apache Software Foundation</a> (ASF), <span style="font-weight:bold">sponsored by the <i>Apache Incubator</i></span>. Incubation is required
of all newly accepted projects until a further review indicates that the infrastructure,
communications, and decision making process have stabilized in a manner consistent with other
successful ASF projects. While incubation status is not necessarily a reflection of the completeness
or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
</p><p>"Copyright © 2017-2018, The Apache Software Foundation Apache MXNet, MXNet, Apache, the Apache
feather, and the Apache MXNet project logo are either registered trademarks or trademarks of the
Apache Software Foundation."</p>
</div>
</div>
</div>
</footer>
</body>
</html>