blob: 982584b59d92a7c724926bcab0563038d71773fc [file] [log] [blame]
/*!
* Copyright (c) 2015 by Contributors
* \file softmax_output.cc
* \brief
* \author Bing Xu
*/
#include "./softmax_output-inl.h"
namespace mxnet {
namespace op {
template<>
Operator *CreateOp<cpu>(SoftmaxOutputParam param, int dtype) {
Operator *op = NULL;
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
op = new SoftmaxOutputOp<cpu, DType>(param);
})
return op;
}
// DO_BIND_DISPATCH comes from operator_common.h
Operator *SoftmaxOutputProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
std::vector<TShape> out_shape, aux_shape;
std::vector<int> out_type, aux_type;
CHECK(InferType(in_type, &out_type, &aux_type));
CHECK(InferShape(in_shape, &out_shape, &aux_shape));
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
}
DMLC_REGISTER_PARAMETER(SoftmaxOutputParam);
MXNET_REGISTER_OP_PROPERTY(SoftmaxOutput, SoftmaxOutputProp)
.describe(R"code(Softmax with logit loss.
In the forward pass, the softmax output is returned. Assume the input data has
shape *(n,k)*, then the output will have the same shape as the input, which is computed by
.. math::
out[i,:] = softmax(data[i,:])
for :math:`i=0,...,n-1`, where
.. math::
softmax(x) = \left[..., \frac{exp(x[j])}{exp(x[0])+...+exp(x[k-1])}, ...\right]
For general *N*-D input array with shape :math:`(d_1, ..., d_n)`. Denoted by the size
:math:`s=d_1d_2...d_n`. The way to compute softmax various:
- ``preserve_shape`` is false (default). Reshape input into a 2-D array with
shape :math:`(d_1, s/d_1)` beforing computing the softmax, and then reshaped back to the
original shape.
- ``preserve_shape`` is true. For all :math:`i_1, ..., i_{n-1}`, compute
.. math::
out[i_1, ..., i_{n-1}, :] = softmax(data[i_1, ..., i_{n-1},:])
- ``multi_output`` is true. For all :math:`i_1, ..., i_{n-1}`, compute
.. math::
out[i_1, :, ..., i_{n-1}] = softmax(data[i_1, :, ..., i_{n-1}])
In the backward pass, the logit loss, also called cross-entroy loss, is
added. The provided label can be a *(N-1)*-D label index array or a *N*-D label
probability array.
Examples with a particular label can be ignored during backward by specifying
``ignore_label`` (also need ``use_ignore`` to be true).
A scale can be applied to the gradient by ``grad_scale``, which is often used in
mutli-loss object function in which we can given each loss different weight. It
also supports various ways to normalize the gradient by ``normalization``:
- **null**: do nothing
- **batch**: divide by batch size (number of examples)
- **valid**: divide by the number of examples which are not ignored.
)code" ADD_FILELINE)
.add_argument("data", "ndarray-or-symbol", "Input data.")
.add_argument("label", "ndarray-or-symbol", "Ground truth label.")
.add_arguments(SoftmaxOutputParam::__FIELDS__());
MXNET_REGISTER_OP_PROPERTY(Softmax, DeprecatedSoftmaxProp)
.describe("DEPRECATED: Perform a softmax transformation on input. Please use SoftmaxOutput")
.add_argument("data", "ndarray-or-symbol", "Input data to softmax.")
.add_arguments(SoftmaxOutputParam::__FIELDS__());
} // namespace op
} // namespace mxnet