blob: 27c5167da76b488df242b44d5d24aff1510c2fcc [file] [log] [blame]
/*!
* Copyright (c) 2015 by Contributors
* \file make_loss-inl.h
* \brief special layer for propagating loss
*/
#ifndef MXNET_OPERATOR_MAKE_LOSS_INL_H_
#define MXNET_OPERATOR_MAKE_LOSS_INL_H_
#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/operator.h>
#include <cstring>
#include <map>
#include <string>
#include <vector>
#include <utility>
#include "./mshadow_op.h"
#include "./operator_common.h"
namespace mxnet {
namespace op {
namespace make_loss_enum {
enum MakeLossOpInputs {kData};
enum MakeLossOpOutputs {kOut};
enum MakeLossOpType {kNull, kBatch, kValid};
enum MakeLossOpResource {kTempSpace};
} // namespace make_loss_enum
struct MakeLossParam : public dmlc::Parameter<MakeLossParam> {
float grad_scale;
int normalization;
float valid_thresh;
DMLC_DECLARE_PARAMETER(MakeLossParam) {
DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f)
.describe("gradient scale as a supplement to unary and binary operators");
DMLC_DECLARE_FIELD(valid_thresh).set_default(0.0f)
.describe("regard element valid when x > valid_thresh, this is "
"used only in valid normalization mode.");
DMLC_DECLARE_FIELD(normalization)
.add_enum("null", make_loss_enum::kNull)
.add_enum("batch", make_loss_enum::kBatch)
.add_enum("valid", make_loss_enum::kValid)
.set_default(make_loss_enum::kNull)
.describe("If set to null, op will not normalize on output gradient."
"If set to batch, op will normalize gradient by divide batch size."
"If set to valid, op will normalize gradient by divide # sample "
"marked as valid");
}
};
template<typename xpu, typename DType>
class MakeLossOp : public Operator {
public:
explicit MakeLossOp(MakeLossParam param) : param_(param) {}
virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(in_data.size(), 1U) << "MakeLoss can only be used to one input";
CHECK_EQ(out_data.size(), 1U);
if (req[make_loss_enum::kOut] != kWriteInplace) {
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2, DType> data = in_data[make_loss_enum::kData].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> out = out_data[make_loss_enum::kOut].FlatTo2D<xpu, DType>(s);
Assign(out, req[make_loss_enum::kOut], F<mshadow_op::identity>(data));
}
}
virtual void Backward(const OpContext &ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2, DType> grad = in_grad[make_loss_enum::kData].FlatTo2D<xpu, DType>(s);
if (param_.normalization == make_loss_enum::kValid) {
Tensor<xpu, 2, DType> data = in_data[make_loss_enum::kData].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 1, DType> temp = ctx.requested[make_loss_enum::kTempSpace]
.get_space_typed<xpu, 1, DType>(mshadow::Shape1(1), s);
temp = sumall_except_dim<0>(reduce_keepdim<red::sum, false>(
F<mshadow_op::threshold>(ScalarExp<DType>(param_.valid_thresh), data), 0));
temp = F<mshadow_op::maximum>(ScalarExp<DType>(1.f), temp); // avoid zero
Assign(grad, req[make_loss_enum::kData],
ScalarExp<DType>(param_.grad_scale) / broadcast<0>(
broadcast_keepdim(temp, 0, grad.shape_[0]), grad.shape_));
} else if (param_.normalization == make_loss_enum::kBatch) {
Assign(grad, req[make_loss_enum::kData],
ScalarExp<DType>(param_.grad_scale / grad.shape_[0]));
} else {
Assign(grad, req[make_loss_enum::kData], ScalarExp<DType>(param_.grad_scale));
}
}
private:
MakeLossParam param_;
}; // class MakeLossOp
template <typename xpu>
Operator *CreateOp(MakeLossParam param, int dtype);
#if DMLC_USE_CXX11
class MakeLossProp : public OperatorProperty {
public:
void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
param_.Init(kwargs);
};
std::map<std::string, std::string> GetParams() const override {
return param_.__DICT__();
}
bool InferShape(std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
using namespace mshadow;
CHECK_EQ(in_shape->size(), 1U);
const TShape &dshape = in_shape->at(make_loss_enum::kData);
if (dshape.ndim() == 0) return false;
out_shape->clear();
out_shape->push_back(dshape);
return true;
}
bool InferType(std::vector<int> *in_type,
std::vector<int> *out_type,
std::vector<int> *aux_type) const override {
CHECK_EQ(in_type->size(), 1U);
int dtype = (*in_type)[0];
CHECK_NE(dtype, -1) << "Input must have specified type";
out_type->clear();
out_type->push_back(dtype);
return true;
}
OperatorProperty* Copy() const override {
auto ptr = new MakeLossProp();
ptr->param_ = param_;
return ptr;
}
std::string TypeString() const override {
return "MakeLoss";
}
std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
if (param_.normalization == make_loss_enum::kValid) {
return {in_data[make_loss_enum::kData]};
}
return {};
}
std::vector<ResourceRequest> BackwardResource(
const std::vector<TShape> &in_shape) const override {
if (param_.normalization == make_loss_enum::kValid) {
return {ResourceRequest::kTempSpace};
}
return {};
}
std::vector<std::pair<int, void*> > ForwardInplaceOption(
const std::vector<int> &in_data,
const std::vector<void*> &out_data) const override {
return {{in_data[make_loss_enum::kData], out_data[make_loss_enum::kOut]}};
}
Operator* CreateOperator(Context ctx) const override {
LOG(FATAL) << "Not Implemented";
return NULL;
}
Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const override;
private:
MakeLossParam param_;
}; // class MakeLossProperty
#endif // DMLC_USE_CXX11
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_MAKE_LOSS_INL_H_