src/operator/batch_norm.cc - mxnet-test - Git at Google

 /*!
  * Copyright (c) 2015 by Contributors
  * \file batch_norm.cc
  * \brief
  * \author Bing Xu
 */

 #include "./batch_norm-inl.h"
 #include <nnvm/op_attr_types.h>
 #if MXNET_USE_MKL2017 == 1
 #include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_batch_norm-inl.h"
 #endif  // MXNET_USE_MKL2017

 namespace mxnet {
 namespace op {
 template<>
 Operator *CreateOp<cpu>(BatchNormParam param, int dtype) {
 #if MXNET_USE_MKL2017 == 1
   return new MKLBatchNormOp<cpu, float>(param);
 #endif
   return new BatchNormOp<cpu>(param);
 }

 // DO_BIND_DISPATCH comes from operator_common.h
 Operator *BatchNormProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
     std::vector<int> *in_type) const {
     std::vector<TShape> out_shape, aux_shape;
     std::vector<int> out_type, aux_type;
     CHECK(InferType(in_type, &out_type, &aux_type));
     CHECK(InferShape(in_shape, &out_shape, &aux_shape));
     DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }

 DMLC_REGISTER_PARAMETER(BatchNormParam);

 MXNET_REGISTER_OP_PROPERTY(BatchNorm, BatchNormProp)
 .describe(R"code(Batch normalization.

 Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 well as offset ``beta``.

 Assume the input has more than one dimension and we normalize along axis 1.
 We first compute the mean and variance along this axis:

 .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

 Then compute the normalized output, which has the same shape as input, as following:

 .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i]

 Both *mean* and *var* returns a scalar by treating the input as a vector.

 Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both ``data_mean`` and
 ``data_var`` as well, which are needed for the backward pass.

 Besides the inputs and the outputs, this operator accepts two auxiliary
 states, ``moving_mean`` and ``moving_var``, which are *k*-length
 vectors. They are global statistics for the whole dataset, which are updated
 by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

 If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 the output. It is often used during inference.

 Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is true,
 then set ``gamma`` to 1 and its gradient to 0.

 )code" ADD_FILELINE)
 .add_argument("data", "ndarray-or-symbol", "Input data to batch normalization")
 .add_argument("gamma", "ndarray-or-symbol", "gamma array")
 .add_argument("beta", "ndarray-or-symbol", "beta array")
 .add_arguments(BatchNormParam::__FIELDS__());

 NNVM_REGISTER_OP(BatchNorm)
 .set_attr<nnvm::FSetInputVarAttrOnCompose>("FSetInputVarAttrOnCompose",
     [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) {
       if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return;
       if (index == 3) {
         var->attrs.dict["__init__"] = "[\"zero\", {}]";
       } else if (index == 4) {
         var->attrs.dict["__init__"] = "[\"one\", {}]";
       }
     });

 }  // namespace op
 }  // namespace mxnet
	/*!
	* Copyright (c) 2015 by Contributors
	* \file batch_norm.cc
	* \brief
	* \author Bing Xu
	*/

	#include "./batch_norm-inl.h"
	#include <nnvm/op_attr_types.h>
	#if MXNET_USE_MKL2017 == 1
	#include <mkl_memory.h>
	#include "./mkl/mkl_memory-inl.h"
	#include "./mkl/mkl_batch_norm-inl.h"
	#endif // MXNET_USE_MKL2017

	namespace mxnet {
	namespace op {
	template<>
	Operator *CreateOp<cpu>(BatchNormParam param, int dtype) {
	#if MXNET_USE_MKL2017 == 1
	return new MKLBatchNormOp<cpu, float>(param);
	#endif
	return new BatchNormOp<cpu>(param);
	}

	// DO_BIND_DISPATCH comes from operator_common.h
	Operator BatchNormProp::CreateOperatorEx(Context ctx, std::vector<TShape> in_shape,
	std::vector<int> *in_type) const {
	std::vector<TShape> out_shape, aux_shape;
	std::vector<int> out_type, aux_type;
	CHECK(InferType(in_type, &out_type, &aux_type));
	CHECK(InferShape(in_shape, &out_shape, &aux_shape));
	DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
	}

	DMLC_REGISTER_PARAMETER(BatchNormParam);

	MXNET_REGISTER_OP_PROPERTY(BatchNorm, BatchNormProp)
	.describe(R"code(Batch normalization.

	Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
	well as offset ``beta``.

	Assume the input has more than one dimension and we normalize along axis 1.
	We first compute the mean and variance along this axis:

	.. math::

	data\_mean[i] = mean(data[:,i,:,...]) \\
	data\_var[i] = var(data[:,i,:,...])

	Then compute the normalized output, which has the same shape as input, as following:

	.. math::

	out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i]

	Both mean and var returns a scalar by treating the input as a vector.

	Assume the input has size k on axis 1, then both ``gamma`` and ``beta``
	have shape (k,). If ``output_mean_var`` is set to be true, then outputs both ``data_mean`` and
	``data_var`` as well, which are needed for the backward pass.

	Besides the inputs and the outputs, this operator accepts two auxiliary
	states, ``moving_mean`` and ``moving_var``, which are k-length
	vectors. They are global statistics for the whole dataset, which are updated
	by::

	moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
	moving_var = moving_var * momentum + data_var * (1 - momentum)

	If ``use_global_stats`` is set to be true, then ``moving_mean`` and
	``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
	the output. It is often used during inference.

	Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is true,
	then set ``gamma`` to 1 and its gradient to 0.

	)code" ADD_FILELINE)
	.add_argument("data", "ndarray-or-symbol", "Input data to batch normalization")
	.add_argument("gamma", "ndarray-or-symbol", "gamma array")
	.add_argument("beta", "ndarray-or-symbol", "beta array")
	.add_arguments(BatchNormParam::__FIELDS__());

	NNVM_REGISTER_OP(BatchNorm)
	.set_attr<nnvm::FSetInputVarAttrOnCompose>("FSetInputVarAttrOnCompose",
	[](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) {
	if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return;
	if (index == 3) {
	var->attrs.dict["__init__"] = "[\"zero\", {}]";
	} else if (index == 4) {
	var->attrs.dict["__init__"] = "[\"one\", {}]";
	}
	});

	} // namespace op
	} // namespace mxnet