src/operator/softmax_activation-inl.h - mxnet-test - Git at Google

 /*!
  * Copyright (c) 2015 by Contributors
  * \file softmax_activation-inl.h
  * \brief SoftmaxActivation operator
  * \author Junyuan Xie
 */
 #ifndef MXNET_OPERATOR_SOFTMAX_ACTIVATION_INL_H_
 #define MXNET_OPERATOR_SOFTMAX_ACTIVATION_INL_H_

 #include <dmlc/logging.h>
 #include <dmlc/parameter.h>
 #include <mxnet/operator.h>
 #include <cstring>
 #include <map>
 #include <string>
 #include <vector>
 #include <utility>
 #include "./operator_common.h"

 namespace mxnet {
 namespace op {
 // Declare enumeration of input order to make code more intuitive.
 // // These enums are only visible within this header
 namespace softmax_activation {
 enum SoftmaxActivationOpInputs {kData};
 enum SoftmaxActivationOpOutputs {kOut};
 enum SoftmaxActivationOpType {kInstance, kChannel};
 enum SoftmaxActivationOpResource {kTempSpace};
 }  // softmax_activation

 struct SoftmaxActivationParam : public dmlc::Parameter<SoftmaxActivationParam> {
   // use int for enumeration
   int mode;
   DMLC_DECLARE_PARAMETER(SoftmaxActivationParam) {
     DMLC_DECLARE_FIELD(mode)
     .add_enum("instance", softmax_activation::kInstance)
     .add_enum("channel", softmax_activation::kChannel)
     .set_default(softmax_activation::kInstance)
     .describe("Specifies how to compute the softmax. If set to ``instance``, "
               "it computes softmax for each instance. If set to ``channel``, "
               "It computes cross channel softmax for each position of each instance.");
   }
 };

 /**
  * \brief This is the implementation of softmax_activation operator.
  * \tparam xpu The device that the op will be executed on.
  */
 template<typename xpu>
 class SoftmaxActivationOp : public Operator {
  public:
   explicit SoftmaxActivationOp(SoftmaxActivationParam p) {
     this->param_ = p;
   }

   virtual void Forward(const OpContext &ctx,
                        const std::vector<TBlob> &in_data,
                        const std::vector<OpReqType> &req,
                        const std::vector<TBlob> &out_data,
                        const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     using namespace mshadow::expr;
     CHECK_EQ(in_data.size(), 1U);
     CHECK_EQ(out_data.size(), 1U);
     Stream<xpu> *s = ctx.get_stream<xpu>();
     if (param_.mode == softmax_activation::kInstance) {
       Tensor<xpu, 2> data = in_data[softmax_activation::kData].FlatTo2D<xpu, real_t>(s);
       Tensor<xpu, 2> out = out_data[softmax_activation::kOut].FlatTo2D<xpu, real_t>(s);
       Softmax(out, data);
     } else {
       CHECK_GE(in_data[softmax_activation::kData].ndim(), 3)
         << "Input need to have a least 3 dimensions when mode=channel";
       int n = in_data[softmax_activation::kData].size(0);
       int k = in_data[softmax_activation::kData].size(1);
       Shape<3> s3 = Shape3(n, k, static_cast<int>(in_data[softmax_activation::kData].Size()/n/k));
       Tensor<xpu, 3, real_t> data =
         in_data[softmax_activation::kData].get_with_shape<xpu, 3, real_t>(s3, s);
       Tensor<xpu, 3, real_t> out =
         out_data[softmax_activation::kOut].get_with_shape<xpu, 3, real_t>(s3, s);
       Softmax(out, data);
     }
   }

   virtual void Backward(const OpContext &ctx,
                         const std::vector<TBlob> &out_grad,
                         const std::vector<TBlob> &in_data,
                         const std::vector<TBlob> &out_data,
                         const std::vector<OpReqType> &req,
                         const std::vector<TBlob> &in_grad,
                         const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     using namespace mshadow::expr;
     CHECK_EQ(out_grad.size(), 1U);
     CHECK(in_data.size() == 1 && in_grad.size() == 1);
     CHECK_EQ(req.size(), 1U);
     // Use 3d tensor for both mode -> {instance, channel}. Get shapes
     int total_size = in_grad[softmax_activation::kData].Size();
     int batch_size = in_grad[softmax_activation::kData].shape_[0];
     int channel_num = in_grad[softmax_activation::kData].shape_[1];
     int rest_size = total_size / (batch_size * channel_num);
     const Shape<3> data_shape = Shape3(batch_size, channel_num, rest_size);
     // Get tensors
     Stream<xpu> *s = ctx.get_stream<xpu>();
     Tensor<xpu, 3> m_out_grad =
       out_grad[softmax_activation::kOut].get_with_shape<xpu, 3, real_t>(data_shape, s);
     Tensor<xpu, 3> m_out_data =
       out_data[softmax_activation::kOut].get_with_shape<xpu, 3, real_t>(data_shape, s);
     Tensor<xpu, 3> m_in_grad =
       in_grad[softmax_activation::kData].get_with_shape<xpu, 3, real_t>(data_shape, s);
     // get requested temp space
     Tensor<xpu, 2> workspace = ctx.requested[softmax_activation::kTempSpace].get_space<xpu>(
         Shape2(batch_size, rest_size), s);
     workspace = reduce_with_axis<red::sum, false>(m_out_grad * m_out_data, 1);
     Assign(m_in_grad, req[softmax_activation::kData],
         m_out_data * (m_out_grad - broadcast_with_axis(workspace, 0, channel_num)));
   }

  private:
   SoftmaxActivationParam param_;
 };  // class SoftmaxActivationOp

 // Decalre Factory function, used for dispatch specialization
 template<typename xpu>
 Operator* CreateOp(SoftmaxActivationParam type);

 #if DMLC_USE_CXX11
 class SoftmaxActivationProp : public OperatorProperty {
  public:
   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
     param_.Init(kwargs);
   }

   std::map<std::string, std::string> GetParams() const override {
     return param_.__DICT__();
   }

   bool InferShape(std::vector<TShape> *in_shape,
                   std::vector<TShape> *out_shape,
                   std::vector<TShape> *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
     const TShape &dshape = in_shape->at(softmax_activation::kData);
     if (dshape.ndim() == 0) return false;
     out_shape->clear();
     out_shape->push_back(dshape);
     return true;
   }

   OperatorProperty* Copy() const override {
     auto ptr = new SoftmaxActivationProp();
     ptr->param_ = param_;
     return ptr;
   }

   std::string TypeString() const override {
     return "SoftmaxActivation";
   }

   // decalre dependency and inplace optimization options
   std::vector<int> DeclareBackwardDependency(
     const std::vector<int> &out_grad,
     const std::vector<int> &in_data,
     const std::vector<int> &out_data) const override {
     return {out_grad[softmax_activation::kOut], out_data[softmax_activation::kOut]};
   }

   std::vector<ResourceRequest> BackwardResource(
       const std::vector<TShape> &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }

   std::vector<std::pair<int, void*> > BackwardInplaceOption(
     const std::vector<int> &out_grad,
     const std::vector<int> &in_data,
     const std::vector<int> &out_data,
     const std::vector<void*> &in_grad) const override {
     return {{out_grad[softmax_activation::kOut], in_grad[softmax_activation::kData]}};
   }

   std::vector<std::pair<int, void*> > ForwardInplaceOption(
     const std::vector<int> &in_data,
     const std::vector<void*> &out_data) const override {
     return {{in_data[softmax_activation::kData], out_data[softmax_activation::kOut]}};
   }

   Operator* CreateOperator(Context ctx) const override;

  private:
   SoftmaxActivationParam param_;
 };
 #endif  // DMLC_USE_CXX11
 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_SOFTMAX_ACTIVATION_INL_H_
	/*!
	* Copyright (c) 2015 by Contributors
	* \file softmax_activation-inl.h
	* \brief SoftmaxActivation operator
	* \author Junyuan Xie
	*/
	#ifndef MXNET_OPERATOR_SOFTMAX_ACTIVATION_INL_H_
	#define MXNET_OPERATOR_SOFTMAX_ACTIVATION_INL_H_

	#include <dmlc/logging.h>
	#include <dmlc/parameter.h>
	#include <mxnet/operator.h>
	#include <cstring>
	#include <map>
	#include <string>
	#include <vector>
	#include <utility>
	#include "./operator_common.h"

	namespace mxnet {
	namespace op {
	// Declare enumeration of input order to make code more intuitive.
	// // These enums are only visible within this header
	namespace softmax_activation {
	enum SoftmaxActivationOpInputs {kData};
	enum SoftmaxActivationOpOutputs {kOut};
	enum SoftmaxActivationOpType {kInstance, kChannel};
	enum SoftmaxActivationOpResource {kTempSpace};
	} // softmax_activation

	struct SoftmaxActivationParam : public dmlc::Parameter<SoftmaxActivationParam> {
	// use int for enumeration
	int mode;
	DMLC_DECLARE_PARAMETER(SoftmaxActivationParam) {
	DMLC_DECLARE_FIELD(mode)
	.add_enum("instance", softmax_activation::kInstance)
	.add_enum("channel", softmax_activation::kChannel)
	.set_default(softmax_activation::kInstance)
	.describe("Specifies how to compute the softmax. If set to ``instance``, "
	"it computes softmax for each instance. If set to ``channel``, "
	"It computes cross channel softmax for each position of each instance.");
	}
	};

	/**
	* \brief This is the implementation of softmax_activation operator.
	* \tparam xpu The device that the op will be executed on.
	*/
	template<typename xpu>
	class SoftmaxActivationOp : public Operator {
	public:
	explicit SoftmaxActivationOp(SoftmaxActivationParam p) {
	this->param_ = p;
	}

	virtual void Forward(const OpContext &ctx,
	const std::vector<TBlob> &in_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &out_data,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	using namespace mshadow::expr;
	CHECK_EQ(in_data.size(), 1U);
	CHECK_EQ(out_data.size(), 1U);
	Stream<xpu> *s = ctx.get_stream<xpu>();
	if (param_.mode == softmax_activation::kInstance) {
	Tensor<xpu, 2> data = in_data[softmax_activation::kData].FlatTo2D<xpu, real_t>(s);
	Tensor<xpu, 2> out = out_data[softmax_activation::kOut].FlatTo2D<xpu, real_t>(s);
	Softmax(out, data);
	} else {
	CHECK_GE(in_data[softmax_activation::kData].ndim(), 3)
	<< "Input need to have a least 3 dimensions when mode=channel";
	int n = in_data[softmax_activation::kData].size(0);
	int k = in_data[softmax_activation::kData].size(1);
	Shape<3> s3 = Shape3(n, k, static_cast<int>(in_data[softmax_activation::kData].Size()/n/k));
	Tensor<xpu, 3, real_t> data =
	in_data[softmax_activation::kData].get_with_shape<xpu, 3, real_t>(s3, s);
	Tensor<xpu, 3, real_t> out =
	out_data[softmax_activation::kOut].get_with_shape<xpu, 3, real_t>(s3, s);
	Softmax(out, data);
	}
	}

	virtual void Backward(const OpContext &ctx,
	const std::vector<TBlob> &out_grad,
	const std::vector<TBlob> &in_data,
	const std::vector<TBlob> &out_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &in_grad,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	using namespace mshadow::expr;
	CHECK_EQ(out_grad.size(), 1U);
	CHECK(in_data.size() == 1 && in_grad.size() == 1);
	CHECK_EQ(req.size(), 1U);
	// Use 3d tensor for both mode -> {instance, channel}. Get shapes
	int total_size = in_grad[softmax_activation::kData].Size();
	int batch_size = in_grad[softmax_activation::kData].shape_[0];
	int channel_num = in_grad[softmax_activation::kData].shape_[1];
	int rest_size = total_size / (batch_size * channel_num);
	const Shape<3> data_shape = Shape3(batch_size, channel_num, rest_size);
	// Get tensors
	Stream<xpu> *s = ctx.get_stream<xpu>();
	Tensor<xpu, 3> m_out_grad =
	out_grad[softmax_activation::kOut].get_with_shape<xpu, 3, real_t>(data_shape, s);
	Tensor<xpu, 3> m_out_data =
	out_data[softmax_activation::kOut].get_with_shape<xpu, 3, real_t>(data_shape, s);
	Tensor<xpu, 3> m_in_grad =
	in_grad[softmax_activation::kData].get_with_shape<xpu, 3, real_t>(data_shape, s);
	// get requested temp space
	Tensor<xpu, 2> workspace = ctx.requested[softmax_activation::kTempSpace].get_space<xpu>(
	Shape2(batch_size, rest_size), s);
	workspace = reduce_with_axis<red::sum, false>(m_out_grad * m_out_data, 1);
	Assign(m_in_grad, req[softmax_activation::kData],
	m_out_data * (m_out_grad - broadcast_with_axis(workspace, 0, channel_num)));
	}

	private:
	SoftmaxActivationParam param_;
	}; // class SoftmaxActivationOp

	// Decalre Factory function, used for dispatch specialization
	template<typename xpu>
	Operator* CreateOp(SoftmaxActivationParam type);

	#if DMLC_USE_CXX11
	class SoftmaxActivationProp : public OperatorProperty {
	public:
	void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
	param_.Init(kwargs);
	}

	std::map<std::string, std::string> GetParams() const override {
	return param_.__DICT__();
	}

	bool InferShape(std::vector<TShape> *in_shape,
	std::vector<TShape> *out_shape,
	std::vector<TShape> *aux_shape) const override {
	using namespace mshadow;
	CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
	const TShape &dshape = in_shape->at(softmax_activation::kData);
	if (dshape.ndim() == 0) return false;
	out_shape->clear();
	out_shape->push_back(dshape);
	return true;
	}

	OperatorProperty* Copy() const override {
	auto ptr = new SoftmaxActivationProp();
	ptr->param_ = param_;
	return ptr;
	}

	std::string TypeString() const override {
	return "SoftmaxActivation";
	}

	// decalre dependency and inplace optimization options
	std::vector<int> DeclareBackwardDependency(
	const std::vector<int> &out_grad,
	const std::vector<int> &in_data,
	const std::vector<int> &out_data) const override {
	return {out_grad[softmax_activation::kOut], out_data[softmax_activation::kOut]};
	}

	std::vector<ResourceRequest> BackwardResource(
	const std::vector<TShape> &in_shape) const override {
	return {ResourceRequest::kTempSpace};
	}

	std::vector<std::pair<int, void*> > BackwardInplaceOption(
	const std::vector<int> &out_grad,
	const std::vector<int> &in_data,
	const std::vector<int> &out_data,
	const std::vector<void*> &in_grad) const override {
	return {{out_grad[softmax_activation::kOut], in_grad[softmax_activation::kData]}};
	}

	std::vector<std::pair<int, void*> > ForwardInplaceOption(
	const std::vector<int> &in_data,
	const std::vector<void*> &out_data) const override {
	return {{in_data[softmax_activation::kData], out_data[softmax_activation::kOut]}};
	}

	Operator* CreateOperator(Context ctx) const override;

	private:
	SoftmaxActivationParam param_;
	};
	#endif // DMLC_USE_CXX11
	} // namespace op
	} // namespace mxnet
	#endif // MXNET_OPERATOR_SOFTMAX_ACTIVATION_INL_H_