| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file np_polynomial_op.cu |
| */ |
| |
| #include "np_polynomial_op-inl.h" |
| #include "../../common/cuda/utils.h" |
| |
| namespace mxnet { |
| namespace op { |
| |
| template <int req> |
| struct polyval_backward_gpu { |
| template <typename DType> |
| MSHADOW_XINLINE static void Map(int i, |
| const DType* p_dptr, |
| const DType* x_dptr, |
| DType* igrad_x_dptr, |
| DType* igrad_p_dptr, |
| const DType* ograd_dptr, |
| const index_t p_size) { |
| DType igrad_p = 1; |
| DType igrad_x = 0; |
| index_t j = p_size - 1; |
| while (j > 0) { |
| // atomic add since different threads could update same variable |
| atomicAdd(&igrad_p_dptr[j], igrad_p * ograd_dptr[i]); |
| igrad_p *= x_dptr[i]; |
| igrad_x = igrad_x * x_dptr[i] + p_dptr[p_size - j - 1] * j; |
| j--; |
| } |
| atomicAdd(&igrad_p_dptr[j], igrad_p * ograd_dptr[i]); |
| KERNEL_ASSIGN(igrad_x_dptr[i], req, igrad_x * ograd_dptr[i]); |
| } |
| }; |
| |
| void NumpyPolyvalBackwardGPU(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 3U); |
| CHECK_EQ(outputs.size(), 2U); |
| CHECK_NE(req[0], kWriteInplace); |
| |
| if (inputs[1].type_flag_ != inputs[2].type_flag_ || !common::is_float(inputs[1].type_flag_) || |
| !common::is_float(inputs[2].type_flag_)) { |
| return; |
| } |
| |
| mshadow::Stream<gpu>* s = ctx.get_stream<gpu>(); |
| const TBlob& ograd = inputs[0]; |
| const TBlob& p = inputs[1]; |
| const TBlob& x = inputs[2]; |
| const TBlob& igrad_p = outputs[0]; |
| const TBlob& igrad_x = outputs[1]; |
| const size_t p_size = p.Size(); |
| |
| using namespace mxnet_op; |
| MSHADOW_REAL_TYPE_SWITCH(ograd.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { |
| Kernel<polyval_backward_gpu<req_type>, gpu>::Launch(s, |
| ograd.Size(), |
| p.dptr<DType>(), |
| x.dptr<DType>(), |
| igrad_x.dptr<DType>(), |
| igrad_p.dptr<DType>(), |
| ograd.dptr<DType>(), |
| p_size); |
| }); |
| }); |
| } |
| |
| NNVM_REGISTER_OP(_npi_polyval).set_attr<mxnet::FCompute>("FCompute<gpu>", NumpyPolyvalForward<gpu>); |
| |
| NNVM_REGISTER_OP(_npi_backward_polyval) |
| .set_attr<mxnet::FCompute>("FCompute<gpu>", NumpyPolyvalBackwardGPU); |
| |
| } // namespace op |
| } // namespace mxnet |