src/operator/numpy/np_true_divide-inl.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file np_true_divide-inl.h
  * \brief Function definitions of true_divide operator
  */

 #ifndef MXNET_OPERATOR_NUMPY_NP_TRUE_DIVIDE_INL_H_
 #define MXNET_OPERATOR_NUMPY_NP_TRUE_DIVIDE_INL_H_

 #include <vector>
 #include "../../common/utils.h"
 #include "../tensor/elemwise_binary_broadcast_op.h"
 #include "../numpy/np_elemwise_broadcast_op.h"

 namespace mxnet {
 namespace op {

 template <typename xpu, typename OP>
 void TrueDivideScalarCompute(const nnvm::NodeAttrs& attrs,
                              const OpContext& ctx,
                              const std::vector<TBlob>& inputs,
                              const std::vector<OpReqType>& req,
                              const std::vector<TBlob>& outputs) {
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
   if (req[0] == kNullOp || outputs[0].Size() == 0U)
     return;
   using namespace mshadow;
   using namespace mxnet_op;
   using namespace mshadow::expr;
   Stream<xpu>* s                      = ctx.get_stream<xpu>();
   const NumpyBinaryScalarParam& param = nnvm::get<NumpyBinaryScalarParam>(attrs.parsed);
   const double alpha                  = param.scalar;
   const TBlob& data                   = inputs[0];
   const TBlob& out                    = outputs[0];
   if (out.type_flag_ == data.type_flag_) {
     MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
       MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
         Kernel<op_with_req<OP, Req>, xpu>::Launch(
             s, data.Size(), out.dptr<DType>(), data.dptr<DType>(), DType(alpha));
       });
     });
   } else {
     CHECK(out.type_flag_ == mshadow::kFloat32 || out.type_flag_ == mshadow::kFloat64)
         << "true_divide only supports float32 and float64"
            " output when input's dtype is "
         << type_string(inputs[0].type_flag_);
     MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, ODType, {
       MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, {
         MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
           Kernel<op_with_req<OP, Req>, xpu>::Launch(
               s, data.Size(), out.dptr<ODType>(), data.dptr<DType>(), static_cast<ODType>(alpha));
         });
       });
     });
   }
 }

 template <typename xpu>
 void TrueDivideElemwiseCompute(const nnvm::NodeAttrs& attrs,
                                const OpContext& ctx,
                                const std::vector<TBlob>& inputs,
                                const std::vector<OpReqType>& req,
                                const std::vector<TBlob>& outputs) {
   using namespace mxnet_op;
   if (req[0] == kNullOp || outputs[0].Size() == 0U)
     return;
   Stream<xpu>* s = ctx.get_stream<xpu>();
   CHECK_EQ(inputs.size(), 2U);
   CHECK_EQ(outputs.size(), 1U);

   const TBlob& lhs = inputs[0];
   const TBlob& rhs = inputs[1];
   const TBlob& out = outputs[0];
   if (lhs.type_flag_ == rhs.type_flag_) {
     // Case when types of the 2 input tensors are the same
     if (common::is_float(lhs.type_flag_)) {
       // If both are the same floats, normal launch
       MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
         MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, {
           Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
               s, out.Size(), out.dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>());
         });
       });
     } else {
       // If both are the same integers, output is float32 or float64
       CHECK_EQ(out.type_flag_, mxnet::common::GetDefaultDtype())
           << "true_divide only supports float32 and float64"
              " output when input's dtype is "
           << type_string(lhs.type_flag_);
       MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
         MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, {
           Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
               s, out.Size(), out.dptr<float>(), lhs.dptr<DType>(), rhs.dptr<DType>());
         });
       });
     }
   } else {
     // Case when types of the 2 input tensors are different
     if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) {
       // both lhs and rhs are float types, output type is the more precise one
       TBlob temp_tblob;
       if (lhs.type_flag_ == out.type_flag_) {
         MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
           Tensor<xpu, 1, LType> temp_tensor =
               ctx.requested[0].get_space_typed<xpu, 1, LType>(Shape1(rhs.Size()), s);
           temp_tblob = TBlob(temp_tensor);
         });
         CastCompute<xpu>(attrs, ctx, {rhs}, {kWriteTo}, {temp_tblob});
         MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
           MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
             Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
                 s, out.Size(), out.dptr<DType>(), lhs.dptr<DType>(), temp_tblob.dptr<DType>());
           });
         });
       } else {
         MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
           Tensor<xpu, 1, RType> temp_tensor =
               ctx.requested[0].get_space_typed<xpu, 1, RType>(Shape1(lhs.Size()), s);
           temp_tblob = TBlob(temp_tensor);
         });
         CastCompute<xpu>(attrs, ctx, {lhs}, {kWriteTo}, {temp_tblob});
         MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
           MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
             Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
                 s, out.Size(), out.dptr<DType>(), temp_tblob.dptr<DType>(), rhs.dptr<DType>());
           });
         });
       }
     } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) {
       // one is float type, the other is integer type, the output type should be the same as float
       CHECK_EQ(out.type_flag_, common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_)
           << "This case out type should be same as the float type";
       if (common::is_float(lhs.type_flag_)) {
         // lhs is the float one
         MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
           MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
             MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, {
               Kernel<op_with_req<mshadow_op::rtrue_divide, Req>, xpu>::Launch(
                   s, out.Size(), out.dptr<LType>(), rhs.dptr<RType>(), lhs.dptr<LType>());
             });
           });
         });
       } else {
         // rhs is the float one
         MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
           MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, {
             MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
               Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
                   s, out.Size(), out.dptr<RType>(), lhs.dptr<LType>(), rhs.dptr<RType>());
             });
           });
         });
       }
     } else {
       // lhs is integer type, rhs is integer type, output type should be float
       LOG(FATAL) << "not implemented yet...";
     }
   }
 }

 template <typename xpu>
 void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs,
                                 const OpContext& ctx,
                                 const std::vector<TBlob>& inputs,
                                 const std::vector<OpReqType>& req,
                                 const std::vector<TBlob>& outputs) {
   using namespace mxnet_op;
   if (outputs[0].shape_.Size() == 0U)
     return;
   CHECK_EQ(inputs.size(), 2U);
   mxnet::TShape new_lshape, new_rshape, new_oshape;
   int ndim = BinaryBroadcastShapeCompact(
       inputs[0].shape_, inputs[1].shape_, outputs[0].shape_, &new_lshape, &new_rshape, &new_oshape);
   if (!ndim) {
     TrueDivideElemwiseCompute<xpu>(attrs, ctx, inputs, req, outputs);
   } else {
     if (req[0] == kNullOp)
       return;
     mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
     const TBlob& lhs        = inputs[0];
     const TBlob& rhs        = inputs[1];
     const TBlob& out        = outputs[0];
     BROADCAST_NDIM_SWITCH(ndim, NDim, {
       mshadow::Shape<NDim> oshape  = new_oshape.get<NDim>();
       mshadow::Shape<NDim> lstride = calc_stride(new_lshape.get<NDim>());
       mshadow::Shape<NDim> rstride = calc_stride(new_rshape.get<NDim>());
       if (lhs.type_flag_ == rhs.type_flag_) {
         // When the both inputs have the same data types
         if (common::is_float(lhs.type_flag_)) {
           // If both inputs are the same float types, output is the same float type
           MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, {
             Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>, xpu>::template LaunchEx(
                 s,
                 new_oshape.Size(),
                 req[0],
                 lstride,
                 rstride,
                 oshape,
                 lhs.dptr<DType>(),
                 rhs.dptr<DType>(),
                 out.dptr<DType>());
           });
         } else {
           CHECK_EQ(out.type_flag_, mxnet::common::GetDefaultDtype())
               << "true_divide only supports float32 and float64 output when input's dtype is "
               << type_string(lhs.type_flag_);
           MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, {
             // If both inputs are the same integer types, output is float type
             Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>, xpu>::template LaunchEx(
                 s,
                 new_oshape.Size(),
                 req[0],
                 lstride,
                 rstride,
                 oshape,
                 lhs.dptr<DType>(),
                 rhs.dptr<DType>(),
                 out.dptr<float>());
           });
         }
       } else {
         if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) {
           // lhs and rhs have different float types, the output is the more precise one
           TBlob temp_tblob;
           if (lhs.type_flag_ == out.type_flag_) {
             MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
               Tensor<xpu, 1, LType> temp_tensor =
                   ctx.requested[0].get_space_typed<xpu, 1, LType>(Shape1(rhs.Size()), s);
               temp_tblob = TBlob(temp_tensor);
             });
             CastCompute<xpu>(attrs, ctx, {rhs}, {kWriteTo}, {temp_tblob});
             MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
               Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>,
                      xpu>::template LaunchEx(s,
                                              new_oshape.Size(),
                                              req[0],
                                              lstride,
                                              rstride,
                                              oshape,
                                              lhs.dptr<DType>(),
                                              temp_tblob.dptr<DType>(),
                                              out.dptr<DType>());
             });
           } else {
             MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
               Tensor<xpu, 1, RType> temp_tensor =
                   ctx.requested[0].get_space_typed<xpu, 1, RType>(Shape1(lhs.Size()), s);
               temp_tblob = TBlob(temp_tensor);
             });
             CastCompute<xpu>(attrs, ctx, {lhs}, {kWriteTo}, {temp_tblob});
             MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
               Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>,
                      xpu>::template LaunchEx(s,
                                              new_oshape.Size(),
                                              req[0],
                                              lstride,
                                              rstride,
                                              oshape,
                                              temp_tblob.dptr<DType>(),
                                              rhs.dptr<DType>(),
                                              out.dptr<DType>());
             });
           }
         } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) {
           // one of lhs and rhs is float, the output is the same type as the float one
           if (common::is_float(lhs.type_flag_)) {
             // lhs is float type, output will be the same float type
             CHECK_EQ(lhs.type_flag_, out.type_flag_)
                 << "lhs should have the same type as out, infer type broken?";
             MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
               MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, {
                 Kernel<binary_broadcast_kernel<NDim, mshadow_op::rtrue_divide>,
                        xpu>::template LaunchEx(s,
                                                new_oshape.Size(),
                                                req[0],
                                                rstride,
                                                lstride,
                                                oshape,
                                                rhs.dptr<RType>(),
                                                lhs.dptr<LType>(),
                                                out.dptr<LType>());
               });
             });
           } else {
             // rhs is float type, output will be the same float type
             CHECK_EQ(rhs.type_flag_, out.type_flag_)
                 << "rhs should have the same type as out, infer type broken?";
             MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, {
               MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
                 Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>,
                        xpu>::template LaunchEx(s,
                                                new_oshape.Size(),
                                                req[0],
                                                lstride,
                                                rstride,
                                                oshape,
                                                lhs.dptr<LType>(),
                                                rhs.dptr<RType>(),
                                                out.dptr<RType>());
               });
             });
           }
         } else {
           // lhs and rhs have different integer types, the output is float type
           LOG(FATAL) << "not implemented yet...";
         }
       }
     });
   }
 }

 }  // namespace op
 }  // namespace mxnet

 #endif  // MXNET_OPERATOR_NUMPY_NP_TRUE_DIVIDE_INL_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* \file np_true_divide-inl.h
	* \brief Function definitions of true_divide operator
	*/

	#ifndef MXNET_OPERATOR_NUMPY_NP_TRUE_DIVIDE_INL_H_
	#define MXNET_OPERATOR_NUMPY_NP_TRUE_DIVIDE_INL_H_

	#include <vector>
	#include "../../common/utils.h"
	#include "../tensor/elemwise_binary_broadcast_op.h"
	#include "../numpy/np_elemwise_broadcast_op.h"

	namespace mxnet {
	namespace op {

	template <typename xpu, typename OP>
	void TrueDivideScalarCompute(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	CHECK_EQ(inputs.size(), 1U);
	CHECK_EQ(outputs.size(), 1U);
	if (req[0] == kNullOp \|\| outputs[0].Size() == 0U)
	return;
	using namespace mshadow;
	using namespace mxnet_op;
	using namespace mshadow::expr;
	Stream<xpu>* s = ctx.get_stream<xpu>();
	const NumpyBinaryScalarParam& param = nnvm::get<NumpyBinaryScalarParam>(attrs.parsed);
	const double alpha = param.scalar;
	const TBlob& data = inputs[0];
	const TBlob& out = outputs[0];
	if (out.type_flag_ == data.type_flag_) {
	MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	Kernel<op_with_req<OP, Req>, xpu>::Launch(
	s, data.Size(), out.dptr<DType>(), data.dptr<DType>(), DType(alpha));
	});
	});
	} else {
	CHECK(out.type_flag_ == mshadow::kFloat32 \|\| out.type_flag_ == mshadow::kFloat64)
	<< "true_divide only supports float32 and float64"
	" output when input's dtype is "
	<< type_string(inputs[0].type_flag_);
	MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, ODType, {
	MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, {
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	Kernel<op_with_req<OP, Req>, xpu>::Launch(
	s, data.Size(), out.dptr<ODType>(), data.dptr<DType>(), static_cast<ODType>(alpha));
	});
	});
	});
	}
	}

	template <typename xpu>
	void TrueDivideElemwiseCompute(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	using namespace mxnet_op;
	if (req[0] == kNullOp \|\| outputs[0].Size() == 0U)
	return;
	Stream<xpu>* s = ctx.get_stream<xpu>();
	CHECK_EQ(inputs.size(), 2U);
	CHECK_EQ(outputs.size(), 1U);

	const TBlob& lhs = inputs[0];
	const TBlob& rhs = inputs[1];
	const TBlob& out = outputs[0];
	if (lhs.type_flag_ == rhs.type_flag_) {
	// Case when types of the 2 input tensors are the same
	if (common::is_float(lhs.type_flag_)) {
	// If both are the same floats, normal launch
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, {
	Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
	s, out.Size(), out.dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>());
	});
	});
	} else {
	// If both are the same integers, output is float32 or float64
	CHECK_EQ(out.type_flag_, mxnet::common::GetDefaultDtype())
	<< "true_divide only supports float32 and float64"
	" output when input's dtype is "
	<< type_string(lhs.type_flag_);
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, {
	Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
	s, out.Size(), out.dptr<float>(), lhs.dptr<DType>(), rhs.dptr<DType>());
	});
	});
	}
	} else {
	// Case when types of the 2 input tensors are different
	if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) {
	// both lhs and rhs are float types, output type is the more precise one
	TBlob temp_tblob;
	if (lhs.type_flag_ == out.type_flag_) {
	MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
	Tensor<xpu, 1, LType> temp_tensor =
	ctx.requested[0].get_space_typed<xpu, 1, LType>(Shape1(rhs.Size()), s);
	temp_tblob = TBlob(temp_tensor);
	});
	CastCompute<xpu>(attrs, ctx, {rhs}, {kWriteTo}, {temp_tblob});
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
	Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
	s, out.Size(), out.dptr<DType>(), lhs.dptr<DType>(), temp_tblob.dptr<DType>());
	});
	});
	} else {
	MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
	Tensor<xpu, 1, RType> temp_tensor =
	ctx.requested[0].get_space_typed<xpu, 1, RType>(Shape1(lhs.Size()), s);
	temp_tblob = TBlob(temp_tensor);
	});
	CastCompute<xpu>(attrs, ctx, {lhs}, {kWriteTo}, {temp_tblob});
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
	Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
	s, out.Size(), out.dptr<DType>(), temp_tblob.dptr<DType>(), rhs.dptr<DType>());
	});
	});
	}
	} else if (common::is_float(lhs.type_flag_) \|\| common::is_float(rhs.type_flag_)) {
	// one is float type, the other is integer type, the output type should be the same as float
	CHECK_EQ(out.type_flag_, common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_)
	<< "This case out type should be same as the float type";
	if (common::is_float(lhs.type_flag_)) {
	// lhs is the float one
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
	MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, {
	Kernel<op_with_req<mshadow_op::rtrue_divide, Req>, xpu>::Launch(
	s, out.Size(), out.dptr<LType>(), rhs.dptr<RType>(), lhs.dptr<LType>());
	});
	});
	});
	} else {
	// rhs is the float one
	MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
	MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, {
	MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
	Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
	s, out.Size(), out.dptr<RType>(), lhs.dptr<LType>(), rhs.dptr<RType>());
	});
	});
	});
	}
	} else {
	// lhs is integer type, rhs is integer type, output type should be float
	LOG(FATAL) << "not implemented yet...";
	}
	}
	}

	template <typename xpu>
	void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs,
	const OpContext& ctx,
	const std::vector<TBlob>& inputs,
	const std::vector<OpReqType>& req,
	const std::vector<TBlob>& outputs) {
	using namespace mxnet_op;
	if (outputs[0].shape_.Size() == 0U)
	return;
	CHECK_EQ(inputs.size(), 2U);
	mxnet::TShape new_lshape, new_rshape, new_oshape;
	int ndim = BinaryBroadcastShapeCompact(
	inputs[0].shape_, inputs[1].shape_, outputs[0].shape_, &new_lshape, &new_rshape, &new_oshape);
	if (!ndim) {
	TrueDivideElemwiseCompute<xpu>(attrs, ctx, inputs, req, outputs);
	} else {
	if (req[0] == kNullOp)
	return;
	mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
	const TBlob& lhs = inputs[0];
	const TBlob& rhs = inputs[1];
	const TBlob& out = outputs[0];
	BROADCAST_NDIM_SWITCH(ndim, NDim, {
	mshadow::Shape<NDim> oshape = new_oshape.get<NDim>();
	mshadow::Shape<NDim> lstride = calc_stride(new_lshape.get<NDim>());
	mshadow::Shape<NDim> rstride = calc_stride(new_rshape.get<NDim>());
	if (lhs.type_flag_ == rhs.type_flag_) {
	// When the both inputs have the same data types
	if (common::is_float(lhs.type_flag_)) {
	// If both inputs are the same float types, output is the same float type
	MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, {
	Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>, xpu>::template LaunchEx(
	s,
	new_oshape.Size(),
	req[0],
	lstride,
	rstride,
	oshape,
	lhs.dptr<DType>(),
	rhs.dptr<DType>(),
	out.dptr<DType>());
	});
	} else {
	CHECK_EQ(out.type_flag_, mxnet::common::GetDefaultDtype())
	<< "true_divide only supports float32 and float64 output when input's dtype is "
	<< type_string(lhs.type_flag_);
	MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, {
	// If both inputs are the same integer types, output is float type
	Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>, xpu>::template LaunchEx(
	s,
	new_oshape.Size(),
	req[0],
	lstride,
	rstride,
	oshape,
	lhs.dptr<DType>(),
	rhs.dptr<DType>(),
	out.dptr<float>());
	});
	}
	} else {
	if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) {
	// lhs and rhs have different float types, the output is the more precise one
	TBlob temp_tblob;
	if (lhs.type_flag_ == out.type_flag_) {
	MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
	Tensor<xpu, 1, LType> temp_tensor =
	ctx.requested[0].get_space_typed<xpu, 1, LType>(Shape1(rhs.Size()), s);
	temp_tblob = TBlob(temp_tensor);
	});
	CastCompute<xpu>(attrs, ctx, {rhs}, {kWriteTo}, {temp_tblob});
	MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
	Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>,
	xpu>::template LaunchEx(s,
	new_oshape.Size(),
	req[0],
	lstride,
	rstride,
	oshape,
	lhs.dptr<DType>(),
	temp_tblob.dptr<DType>(),
	out.dptr<DType>());
	});
	} else {
	MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
	Tensor<xpu, 1, RType> temp_tensor =
	ctx.requested[0].get_space_typed<xpu, 1, RType>(Shape1(lhs.Size()), s);
	temp_tblob = TBlob(temp_tensor);
	});
	CastCompute<xpu>(attrs, ctx, {lhs}, {kWriteTo}, {temp_tblob});
	MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, {
	Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>,
	xpu>::template LaunchEx(s,
	new_oshape.Size(),
	req[0],
	lstride,
	rstride,
	oshape,
	temp_tblob.dptr<DType>(),
	rhs.dptr<DType>(),
	out.dptr<DType>());
	});
	}
	} else if (common::is_float(lhs.type_flag_) \|\| common::is_float(rhs.type_flag_)) {
	// one of lhs and rhs is float, the output is the same type as the float one
	if (common::is_float(lhs.type_flag_)) {
	// lhs is float type, output will be the same float type
	CHECK_EQ(lhs.type_flag_, out.type_flag_)
	<< "lhs should have the same type as out, infer type broken?";
	MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, {
	MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, {
	Kernel<binary_broadcast_kernel<NDim, mshadow_op::rtrue_divide>,
	xpu>::template LaunchEx(s,
	new_oshape.Size(),
	req[0],
	rstride,
	lstride,
	oshape,
	rhs.dptr<RType>(),
	lhs.dptr<LType>(),
	out.dptr<LType>());
	});
	});
	} else {
	// rhs is float type, output will be the same float type
	CHECK_EQ(rhs.type_flag_, out.type_flag_)
	<< "rhs should have the same type as out, infer type broken?";
	MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, {
	MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, {
	Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>,
	xpu>::template LaunchEx(s,
	new_oshape.Size(),
	req[0],
	lstride,
	rstride,
	oshape,
	lhs.dptr<LType>(),
	rhs.dptr<RType>(),
	out.dptr<RType>());
	});
	});
	}
	} else {
	// lhs and rhs have different integer types, the output is float type
	LOG(FATAL) << "not implemented yet...";
	}
	}
	});
	}
	}

	} // namespace op
	} // namespace mxnet

	#endif // MXNET_OPERATOR_NUMPY_NP_TRUE_DIVIDE_INL_H_