plugin/torch/torch_criterion-inl.h - mxnet-test - Git at Google

 /*!
  * Copyright (c) 2015 by Contributors
  * \file torch_module-inl.h
  * \brief torch module operator
  * \author Min Lin
 */
 #ifndef PLUGIN_TORCH_TORCH_CRITERION_INL_H_
 #define PLUGIN_TORCH_TORCH_CRITERION_INL_H_

 #include <dmlc/logging.h>
 #include <dmlc/parameter.h>
 #include <mxnet/operator.h>
 #include <stdio.h>
 #include <cstring>
 #include <map>
 #include <string>
 #include <vector>
 #include <utility>
 #include "../../src/operator/operator_common.h"
 #include "./torch_base.h"

 namespace mxnet {
 namespace op {
 struct TorchCriterionParam : public dmlc::Parameter<TorchCriterionParam> {
   std::string lua_string;
   TShape label_shape;
   float grad_scale;
   DMLC_DECLARE_PARAMETER(TorchCriterionParam) {
     DMLC_DECLARE_FIELD(lua_string)
     .describe("lua string that is called to generate the torch criterion object");
     DMLC_DECLARE_FIELD(label_shape)
     .set_default(TShape())
     .enforce_nonzero()
     .describe("Shape of label (without batch size).");
     DMLC_DECLARE_FIELD(grad_scale)
     .set_default(1.0f)
     .describe("Scale the gradient by a float factor (a.k.a weight of this loss).");
   }
 };

 /**
  * \brief This is the implementation of activation operator.
  * \tparam xpu The device that the op will be executed on.
  */
 template<typename xpu>
 class TorchCriterionOp : public Operator {
  private:
   TorchCriterionParam param_;
   TorchState* torchState_;
   int lua_reference_;

  public:
   explicit TorchCriterionOp(TorchCriterionParam p) {
     this->param_ = p;
     this->torchState_ = new TorchState();
     lua_State *L = torchState_->L;
     CHECK_EQ(lua_gettop(L), 0);
     std::string exec = std::string("return ") + p.lua_string
       + TorchTensor::ModuleType(xpu::kDevMask);
     CHECK_EQ(luaL_loadstring(L, exec.c_str()), 0);
     int err = lua_pcall(L, 0, 1, 0);
     CHECK_EQ(err, 0) << lua_tostring(L, -1);
     // serialize
     this->lua_reference_ = lua_ref(L, LUA_REGISTRYINDEX);
   }

   ~TorchCriterionOp() {
     delete this->torchState_;
   }

   virtual void Forward(const OpContext &ctx,
                        const std::vector<TBlob> &in_data,
                        const std::vector<OpReqType> &req,
                        const std::vector<TBlob> &out_data,
                        const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     lua_State *L = torchState_->L;
     CHECK_EQ(lua_gettop(L), 0);
     CHECK_EQ(in_data.size(), 2);
     CHECK_EQ(out_data.size(), 1);
     Stream<xpu> *s = ctx.get_stream<xpu>();
     torchState_->SetStream(s);
     lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_);
     // call forward
     // | self
     lua_getfield(L, -1, "forward");
     // | self | forward
     lua_pushvalue(L, -2);
     // | self | forward | self
     for (index_t i = 0; i < in_data.size(); ++i) {
       THGeneralTensor th = TorchTensor::TBlobToTHTensor(torchState_, in_data[i]);
       luaT_pushudata(L, th, TorchTensor::TensorType(in_data[i]));
     }
     // | self | forward | self | pred | label
     int err = lua_pcall(L, 3, 1, 0);
     CHECK_EQ(err, 0) << lua_tostring(L, -1);
     CHECK(lua_isnumber(L, -1)) << "Criterion must return a number";
     real_t loss = static_cast<real_t>(lua_tonumber(L, -1));
     lua_pop(L, 1);
     Tensor<xpu, 2> out = out_data[0].FlatTo2D<xpu, real_t>(s);
     Assign(out, req[0], loss*param_.grad_scale);
     lua_pop(L, 1);
     CHECK_EQ(lua_gettop(L), 0);
   }

   virtual void Backward(const OpContext &ctx,
                         const std::vector<TBlob> &out_grad,
                         const std::vector<TBlob> &in_data,
                         const std::vector<TBlob> &out_data,
                         const std::vector<OpReqType> &req,
                         const std::vector<TBlob> &in_grad,
                         const std::vector<TBlob> &aux_args) {
     using namespace mshadow;
     lua_State *L = torchState_->L;
     CHECK_EQ(lua_gettop(L), 0);
     CHECK_EQ(in_data.size(), 2);
     CHECK_EQ(out_data.size(), 1);
     CHECK_EQ(req[0], kWriteTo) << "Torch Criterion only supports write to in_grad";
     CHECK_EQ(req[1], kNullOp) << "Torch Criterion cannot back prop to label";
     Stream<xpu> *s = ctx.get_stream<xpu>();
     torchState_->SetStream(s);
     lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_);
     THGeneralTensor th = TorchTensor::TBlobToTHTensor(torchState_, in_grad[0]);
     luaT_pushudata(L, th, TorchTensor::TensorType(in_grad[0]));
     lua_setfield(L, -2, "gradInput");
     lua_getfield(L, -1, "backward");
     // | self | backward
     lua_pushvalue(L, -2);
     // | self | backward | self
     for (index_t i = 0; i < in_data.size(); ++i) {
       th = TorchTensor::TBlobToTHTensor(torchState_, in_data[i]);
       luaT_pushudata(L, th, TorchTensor::TensorType(in_data[i]));
     }
     // | self | forward | self | pred | label
     int err = lua_pcall(L, 3, 0, 0);
     CHECK_EQ(err, 0) << lua_tostring(L, -1);
     Tensor<xpu, 2> grad = in_grad[0].FlatTo2D<xpu, real_t>(s);
     grad *= param_.grad_scale * in_grad[0].shape_[0];
     lua_pop(L, 1);
     CHECK_EQ(lua_gettop(L), 0);
   }
 };  // class TorchCriterionOp

 // Decalre Factory function, used for dispatch specialization
 template<typename xpu>
 Operator* CreateOp(TorchCriterionParam type);

 #if DMLC_USE_CXX11
 class TorchCriterionProp : public OperatorProperty {
  public:
   std::vector<std::string> ListArguments() const override {
     return {"data", "label"};
   }

   virtual std::vector<std::string> ListOutputs() const {
     return {"output"};
   }

   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
     param_.Init(kwargs);
   }

   std::map<std::string, std::string> GetParams() const override {
     return param_.__DICT__();
   }

   bool InferShape(std::vector<TShape> *in_shape,
                   std::vector<TShape> *out_shape,
                   std::vector<TShape> *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2);
     const TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
     std::vector<index_t> lshape;
     lshape.push_back(dshape[0]);
     lshape.insert(lshape.end(), param_.label_shape.data(),
       param_.label_shape.data() +  param_.label_shape.ndim());
     TShape shape(lshape.begin(), lshape.end());
     SHAPE_ASSIGN_CHECK(*in_shape, 1, shape);
     out_shape->clear();
     out_shape->push_back(Shape1(dshape[0]));
     return true;
   }

   OperatorProperty* Copy() const override {
     auto ptr = new TorchCriterionProp();
     ptr->param_ = param_;
     return ptr;
   }

   std::string TypeString() const override {
     return "TorchCriterion";
   }

   // decalre dependency and inplace optimization options
   std::vector<int> DeclareBackwardDependency(
     const std::vector<int> &out_grad,
     const std::vector<int> &in_data,
     const std::vector<int> &out_data) const override {
     std::vector<int> dep;
     dep.insert(dep.end(), in_data.begin(), in_data.end());
     // Ensure that the backward and forward cannot be called at the same time
     dep.insert(dep.end(), out_data.begin(), out_data.end());
     return dep;
   }

   Operator* CreateOperator(Context ctx) const override;

  private:
   TorchCriterionParam param_;
 };
 #endif  // DMLC_USE_CXX11
 }  // namespace op
 }  // namespace mxnet
 #endif  // PLUGIN_TORCH_TORCH_CRITERION_INL_H_
	/*!
	* Copyright (c) 2015 by Contributors
	* \file torch_module-inl.h
	* \brief torch module operator
	* \author Min Lin
	*/
	#ifndef PLUGIN_TORCH_TORCH_CRITERION_INL_H_
	#define PLUGIN_TORCH_TORCH_CRITERION_INL_H_

	#include <dmlc/logging.h>
	#include <dmlc/parameter.h>
	#include <mxnet/operator.h>
	#include <stdio.h>
	#include <cstring>
	#include <map>
	#include <string>
	#include <vector>
	#include <utility>
	#include "../../src/operator/operator_common.h"
	#include "./torch_base.h"

	namespace mxnet {
	namespace op {
	struct TorchCriterionParam : public dmlc::Parameter<TorchCriterionParam> {
	std::string lua_string;
	TShape label_shape;
	float grad_scale;
	DMLC_DECLARE_PARAMETER(TorchCriterionParam) {
	DMLC_DECLARE_FIELD(lua_string)
	.describe("lua string that is called to generate the torch criterion object");
	DMLC_DECLARE_FIELD(label_shape)
	.set_default(TShape())
	.enforce_nonzero()
	.describe("Shape of label (without batch size).");
	DMLC_DECLARE_FIELD(grad_scale)
	.set_default(1.0f)
	.describe("Scale the gradient by a float factor (a.k.a weight of this loss).");
	}
	};

	/**
	* \brief This is the implementation of activation operator.
	* \tparam xpu The device that the op will be executed on.
	*/
	template<typename xpu>
	class TorchCriterionOp : public Operator {
	private:
	TorchCriterionParam param_;
	TorchState* torchState_;
	int lua_reference_;

	public:
	explicit TorchCriterionOp(TorchCriterionParam p) {
	this->param_ = p;
	this->torchState_ = new TorchState();
	lua_State *L = torchState_->L;
	CHECK_EQ(lua_gettop(L), 0);
	std::string exec = std::string("return ") + p.lua_string
	+ TorchTensor::ModuleType(xpu::kDevMask);
	CHECK_EQ(luaL_loadstring(L, exec.c_str()), 0);
	int err = lua_pcall(L, 0, 1, 0);
	CHECK_EQ(err, 0) << lua_tostring(L, -1);
	// serialize
	this->lua_reference_ = lua_ref(L, LUA_REGISTRYINDEX);
	}

	~TorchCriterionOp() {
	delete this->torchState_;
	}

	virtual void Forward(const OpContext &ctx,
	const std::vector<TBlob> &in_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &out_data,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	lua_State *L = torchState_->L;
	CHECK_EQ(lua_gettop(L), 0);
	CHECK_EQ(in_data.size(), 2);
	CHECK_EQ(out_data.size(), 1);
	Stream<xpu> *s = ctx.get_stream<xpu>();
	torchState_->SetStream(s);
	lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_);
	// call forward
	// \| self
	lua_getfield(L, -1, "forward");
	// \| self \| forward
	lua_pushvalue(L, -2);
	// \| self \| forward \| self
	for (index_t i = 0; i < in_data.size(); ++i) {
	THGeneralTensor th = TorchTensor::TBlobToTHTensor(torchState_, in_data[i]);
	luaT_pushudata(L, th, TorchTensor::TensorType(in_data[i]));
	}
	// \| self \| forward \| self \| pred \| label
	int err = lua_pcall(L, 3, 1, 0);
	CHECK_EQ(err, 0) << lua_tostring(L, -1);
	CHECK(lua_isnumber(L, -1)) << "Criterion must return a number";
	real_t loss = static_cast<real_t>(lua_tonumber(L, -1));
	lua_pop(L, 1);
	Tensor<xpu, 2> out = out_data[0].FlatTo2D<xpu, real_t>(s);
	Assign(out, req[0], loss*param_.grad_scale);
	lua_pop(L, 1);
	CHECK_EQ(lua_gettop(L), 0);
	}

	virtual void Backward(const OpContext &ctx,
	const std::vector<TBlob> &out_grad,
	const std::vector<TBlob> &in_data,
	const std::vector<TBlob> &out_data,
	const std::vector<OpReqType> &req,
	const std::vector<TBlob> &in_grad,
	const std::vector<TBlob> &aux_args) {
	using namespace mshadow;
	lua_State *L = torchState_->L;
	CHECK_EQ(lua_gettop(L), 0);
	CHECK_EQ(in_data.size(), 2);
	CHECK_EQ(out_data.size(), 1);
	CHECK_EQ(req[0], kWriteTo) << "Torch Criterion only supports write to in_grad";
	CHECK_EQ(req[1], kNullOp) << "Torch Criterion cannot back prop to label";
	Stream<xpu> *s = ctx.get_stream<xpu>();
	torchState_->SetStream(s);
	lua_rawgeti(L, LUA_REGISTRYINDEX, lua_reference_);
	THGeneralTensor th = TorchTensor::TBlobToTHTensor(torchState_, in_grad[0]);
	luaT_pushudata(L, th, TorchTensor::TensorType(in_grad[0]));
	lua_setfield(L, -2, "gradInput");
	lua_getfield(L, -1, "backward");
	// \| self \| backward
	lua_pushvalue(L, -2);
	// \| self \| backward \| self
	for (index_t i = 0; i < in_data.size(); ++i) {
	th = TorchTensor::TBlobToTHTensor(torchState_, in_data[i]);
	luaT_pushudata(L, th, TorchTensor::TensorType(in_data[i]));
	}
	// \| self \| forward \| self \| pred \| label
	int err = lua_pcall(L, 3, 0, 0);
	CHECK_EQ(err, 0) << lua_tostring(L, -1);
	Tensor<xpu, 2> grad = in_grad[0].FlatTo2D<xpu, real_t>(s);
	grad = param_.grad_scale in_grad[0].shape_[0];
	lua_pop(L, 1);
	CHECK_EQ(lua_gettop(L), 0);
	}
	}; // class TorchCriterionOp

	// Decalre Factory function, used for dispatch specialization
	template<typename xpu>
	Operator* CreateOp(TorchCriterionParam type);

	#if DMLC_USE_CXX11
	class TorchCriterionProp : public OperatorProperty {
	public:
	std::vector<std::string> ListArguments() const override {
	return {"data", "label"};
	}

	virtual std::vector<std::string> ListOutputs() const {
	return {"output"};
	}

	void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
	param_.Init(kwargs);
	}

	std::map<std::string, std::string> GetParams() const override {
	return param_.__DICT__();
	}

	bool InferShape(std::vector<TShape> *in_shape,
	std::vector<TShape> *out_shape,
	std::vector<TShape> *aux_shape) const override {
	using namespace mshadow;
	CHECK_EQ(in_shape->size(), 2);
	const TShape &dshape = in_shape->at(0);
	if (dshape.ndim() == 0) return false;
	std::vector<index_t> lshape;
	lshape.push_back(dshape[0]);
	lshape.insert(lshape.end(), param_.label_shape.data(),
	param_.label_shape.data() + param_.label_shape.ndim());
	TShape shape(lshape.begin(), lshape.end());
	SHAPE_ASSIGN_CHECK(*in_shape, 1, shape);
	out_shape->clear();
	out_shape->push_back(Shape1(dshape[0]));
	return true;
	}

	OperatorProperty* Copy() const override {
	auto ptr = new TorchCriterionProp();
	ptr->param_ = param_;
	return ptr;
	}

	std::string TypeString() const override {
	return "TorchCriterion";
	}

	// decalre dependency and inplace optimization options
	std::vector<int> DeclareBackwardDependency(
	const std::vector<int> &out_grad,
	const std::vector<int> &in_data,
	const std::vector<int> &out_data) const override {
	std::vector<int> dep;
	dep.insert(dep.end(), in_data.begin(), in_data.end());
	// Ensure that the backward and forward cannot be called at the same time
	dep.insert(dep.end(), out_data.begin(), out_data.end());
	return dep;
	}

	Operator* CreateOperator(Context ctx) const override;

	private:
	TorchCriterionParam param_;
	};
	#endif // DMLC_USE_CXX11
	} // namespace op
	} // namespace mxnet
	#endif // PLUGIN_TORCH_TORCH_CRITERION_INL_H_