src/operator/loss_binary_op-inl.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  *  Copyright (c) 2015 by Contributors
  * \file loss_binary_op-inl.h
  * \brief Loss functions
  */
 #ifndef MXNET_OPERATOR_LOSS_BINARY_OP_INL_H_
 #define MXNET_OPERATOR_LOSS_BINARY_OP_INL_H_

 #include <mxnet/operator_util.h>
 #include <vector>
 #include "./mshadow_op.h"
 #include "./elemwise_op_common.h"

 namespace mxnet {
 namespace op {

 // return a shape of scalar
 inline bool SoftmaxCrossEntropyShape(const nnvm::NodeAttrs& attrs,
                                      mxnet::ShapeVector *in_attrs,
                                      mxnet::ShapeVector *out_attrs) {
   CHECK_EQ((*in_attrs)[0].ndim(), 2U)
       << "SoftmaxCrossEntropy only accept 2D data";
   CHECK_EQ((*in_attrs)[1].ndim(), 1U)
       << "SoftmaxCrossEntropy only accept 1D label";
   CHECK_EQ((*in_attrs)[0][0], (*in_attrs)[1][0])
       << "SoftmaxCrossEntropy: data label shape mismatch";
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1));
   return true;
 }

 template<typename xpu>
 void SoftmaxCrossEntropyForward(const nnvm::NodeAttrs& attrs,
                                 const OpContext& ctx,
                                 const std::vector<TBlob>& inputs,
                                 const std::vector<OpReqType>& req,
                                 const std::vector<TBlob>& outputs) {
   using namespace mshadow::expr;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   CHECK_EQ(outputs[0].type_flag_, inputs[0].type_flag_)
     << "Binary function only support input/output with the same type";
   CHECK_EQ(outputs[0].type_flag_, inputs[1].type_flag_)
     << "Binary function only support input/output with the same type";
   MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     mshadow::Tensor<xpu, 1, DType> out = outputs[0].get<xpu, 1, DType>(s);
     mshadow::Tensor<xpu, 1, DType> mlabel = inputs[1].get<xpu, 1, DType>(s);
     mshadow::Tensor<xpu, 2, DType> mdata = inputs[0].get<xpu, 2, DType>(s);
     mshadow::Tensor<xpu, 1, DType> workspace = ctx.requested[0].get_space_typed<xpu, 1, DType>(
         mshadow::Shape1(mdata.shape_.Size() + mlabel.size(0)), s);
     mshadow::Tensor<xpu, 2, DType> temp1(workspace.dptr_, mdata.shape_, s);
     mshadow::Tensor<xpu, 2, DType> temp2(workspace.dptr_ + mdata.shape_.Size(),
         mshadow::Shape2(1, mlabel.size(0)), s);
     // calculate softmax on temp
     // TODO(tqchen): change to SoftmaxLog later
     mshadow::Softmax(temp1, mdata);
     // choose the softmax rows
     mshadow::Tensor<xpu, 1, DType> tdst = temp2[0];
     tdst = F<mshadow_op::negation>(
         F<mshadow_op::log>(
             F<mshadow_op::maximum>(mat_choose_row_element(temp1, mlabel),
                                    scalar<DType>(1e-8f))));
     ASSIGN_DISPATCH(out, req[0], sumall_except_dim<0>(temp2));
   });
 }

 template<typename xpu>
 void SoftmaxCrossEntropyBackward(const nnvm::NodeAttrs& attrs,
                                  const OpContext& ctx,
                                  const std::vector<TBlob>& inputs,
                                  const std::vector<OpReqType>& req,
                                  const std::vector<TBlob>& outputs) {
   using namespace mshadow::expr;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   CHECK_EQ(req[1], kNullOp)
       << "SoftmaxCrossEntropy: Cannot take gradient wrt label";
   MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     mshadow::Tensor<xpu, 1, DType> mlabel = inputs[2].get<xpu, 1, DType>(s);
     mshadow::Tensor<xpu, 2, DType> mdata = inputs[1].get<xpu, 2, DType>(s);
     mshadow::Tensor<xpu, 2, DType> mdata_grad = outputs[0].get<xpu, 2, DType>(s);
     mshadow::Tensor<xpu, 1, DType> mscale = inputs[0].get<xpu, 1, DType>(s);
     mshadow::Tensor<xpu, 2, DType> temp = ctx.requested[0].get_space_typed<xpu, 2, DType>(
         mdata.shape_, s);
     mshadow::Softmax(temp, mdata);
     mshadow::SoftmaxGrad(temp, temp, mlabel);
     ASSIGN_DISPATCH(mdata_grad, req[0], broadcast_scalar(mscale, temp.shape_) * temp);
   });
 }

 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_LOSS_BINARY_OP_INL_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* Copyright (c) 2015 by Contributors
	* \file loss_binary_op-inl.h
	* \brief Loss functions
	*/
	#ifndef MXNET_OPERATOR_LOSS_BINARY_OP_INL_H_
	#define MXNET_OPERATOR_LOSS_BINARY_OP_INL_H_

	#include <mxnet/operator_util.h>
	#include <vector>
	#include "./mshadow_op.h"
	#include "./elemwise_op_common.h"

	namespace mxnet {
	namespace op {

	// return a shape of scalar
	inline bool SoftmaxCrossEntropyShape(const nnvm::NodeAttrs& attrs,
	mxnet::ShapeVector *in_attrs,
	mxnet::ShapeVector *out_attrs) {
	CHECK_EQ((*in_attrs)[0].ndim(), 2U)
	<< "SoftmaxCrossEntropy only accept 2D data";
	CHECK_EQ((*in_attrs)[1].ndim(), 1U)
	<< "SoftmaxCrossEntropy only accept 1D label";
	CHECK_EQ((in_attrs)[0][0], (in_attrs)[1][0])
	<< "SoftmaxCrossEntropy: data label shape mismatch";
	SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1));
	return true;
	}

	template<typename xpu>
	void SoftmaxCrossEntropyForward(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	using namespace mshadow::expr;
	mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
	CHECK_EQ(outputs[0].type_flag_, inputs[0].type_flag_)
	<< "Binary function only support input/output with the same type";
	CHECK_EQ(outputs[0].type_flag_, inputs[1].type_flag_)
	<< "Binary function only support input/output with the same type";
	MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
	mshadow::Tensor<xpu, 1, DType> out = outputs[0].get<xpu, 1, DType>(s);
	mshadow::Tensor<xpu, 1, DType> mlabel = inputs[1].get<xpu, 1, DType>(s);
	mshadow::Tensor<xpu, 2, DType> mdata = inputs[0].get<xpu, 2, DType>(s);
	mshadow::Tensor<xpu, 1, DType> workspace = ctx.requested[0].get_space_typed<xpu, 1, DType>(
	mshadow::Shape1(mdata.shape_.Size() + mlabel.size(0)), s);
	mshadow::Tensor<xpu, 2, DType> temp1(workspace.dptr_, mdata.shape_, s);
	mshadow::Tensor<xpu, 2, DType> temp2(workspace.dptr_ + mdata.shape_.Size(),
	mshadow::Shape2(1, mlabel.size(0)), s);
	// calculate softmax on temp
	// TODO(tqchen): change to SoftmaxLog later
	mshadow::Softmax(temp1, mdata);
	// choose the softmax rows
	mshadow::Tensor<xpu, 1, DType> tdst = temp2[0];
	tdst = F<mshadow_op::negation>(
	F<mshadow_op::log>(
	F<mshadow_op::maximum>(mat_choose_row_element(temp1, mlabel),
	scalar<DType>(1e-8f))));
	ASSIGN_DISPATCH(out, req[0], sumall_except_dim<0>(temp2));
	});
	}

	template<typename xpu>
	void SoftmaxCrossEntropyBackward(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	using namespace mshadow::expr;
	mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
	CHECK_EQ(req[1], kNullOp)
	<< "SoftmaxCrossEntropy: Cannot take gradient wrt label";
	MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
	mshadow::Tensor<xpu, 1, DType> mlabel = inputs[2].get<xpu, 1, DType>(s);
	mshadow::Tensor<xpu, 2, DType> mdata = inputs[1].get<xpu, 2, DType>(s);
	mshadow::Tensor<xpu, 2, DType> mdata_grad = outputs[0].get<xpu, 2, DType>(s);
	mshadow::Tensor<xpu, 1, DType> mscale = inputs[0].get<xpu, 1, DType>(s);
	mshadow::Tensor<xpu, 2, DType> temp = ctx.requested[0].get_space_typed<xpu, 2, DType>(
	mdata.shape_, s);
	mshadow::Softmax(temp, mdata);
	mshadow::SoftmaxGrad(temp, temp, mlabel);
	ASSIGN_DISPATCH(mdata_grad, req[0], broadcast_scalar(mscale, temp.shape_) * temp);
	});
	}

	} // namespace op
	} // namespace mxnet
	#endif // MXNET_OPERATOR_LOSS_BINARY_OP_INL_H_