| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * Copyright (c) 2015 by Contributors |
| * \file matrix_op-inl.h |
| * \brief Function definition of matrix related operators |
| */ |
| #ifndef MXNET_OPERATOR_TENSOR_MATRIX_OP_INL_H_ |
| #define MXNET_OPERATOR_TENSOR_MATRIX_OP_INL_H_ |
| |
| #include <mxnet/operator_util.h> |
| #include <vector> |
| #include <algorithm> |
| #include <utility> |
| #include <type_traits> |
| #include "../mshadow_op.h" |
| #include "../elemwise_op_common.h" |
| #include "../channel_op_common.h" |
| #include "../mxnet_op.h" |
| #include "broadcast_reduce_op.h" |
| #include "./init_op.h" |
| #include "../../common/static_array.h" |
| #include "./slice-inl.h" |
| |
| #if MXNET_USE_CUDA |
| #include <thrust/device_vector.h> |
| #endif |
| |
| namespace mxnet { |
| namespace op { |
| |
| struct ReshapeParam : public dmlc::Parameter<ReshapeParam> { |
| mxnet::TShape target_shape; |
| bool keep_highest; |
| nnvm::Tuple<int> shape; |
| bool reverse; |
| DMLC_DECLARE_PARAMETER(ReshapeParam) { |
| DMLC_DECLARE_FIELD(shape) |
| .set_default(nnvm::Tuple<int>()) |
| .describe("The target shape"); |
| DMLC_DECLARE_FIELD(reverse) |
| .set_default(false) |
| .describe("If true then the special values are inferred from right to left"); |
| DMLC_DECLARE_FIELD(target_shape) |
| .set_default(mxnet::TShape()) |
| .describe("(Deprecated! Use ``shape`` instead.) " |
| "Target new shape. One and only one dim can be 0, " |
| "in which case it will be inferred from the rest of dims"); |
| DMLC_DECLARE_FIELD(keep_highest).set_default(false) |
| .describe("(Deprecated! Use ``shape`` instead.) Whether keep the highest dim unchanged." |
| "If set to true, then the first dim in target_shape is ignored," |
| "and always fixed as input"); |
| } |
| }; |
| |
| template<typename IType> |
| inline mxnet::TShape InferReshapeShape(const nnvm::Tuple<IType>& shape, |
| const mxnet::TShape& dshape, bool reverse) { |
| std::vector<IType> dshape_vec; |
| std::vector<IType> param_shape_vec(shape.begin(), shape.end()); |
| for (index_t i = 0; i < dshape.ndim(); ++i) { |
| dshape_vec.push_back(dshape[i]); |
| } |
| std::vector<IType> tmp; |
| size_t src_idx = 0; |
| int inf_idx = -1; |
| if (reverse) { |
| std::reverse(dshape_vec.begin(), dshape_vec.end()); |
| std::reverse(param_shape_vec.begin(), param_shape_vec.end()); |
| } |
| auto dshape_len = dshape_vec.size(); |
| auto params_len = param_shape_vec.size(); |
| for (size_t i = 0; i < params_len; ++i) { |
| IType proposed_dim = param_shape_vec[i]; |
| if (proposed_dim == 0) { |
| // keep same |
| CHECK_LT(src_idx, dshape_len); |
| tmp.push_back(dshape_vec[src_idx++]); |
| } else if (proposed_dim == -1) { |
| // infer |
| CHECK_LT(inf_idx, 0) << "One and only one dim can be inferred"; |
| inf_idx = i; |
| tmp.push_back(1); |
| src_idx++; |
| } else if (proposed_dim == -2) { |
| // copy all remaining dims from source |
| while (src_idx < dshape_len) { |
| size_t dn = dshape_vec[src_idx++]; |
| tmp.push_back(dn); |
| } |
| } else if (proposed_dim == -3) { |
| // merge two dims from source |
| CHECK_LT(src_idx, dshape_len-1); |
| size_t d1 = dshape_vec[src_idx++]; |
| size_t d2 = dshape_vec[src_idx++]; |
| size_t dn = d1 * d2; |
| tmp.push_back(dn); |
| } else if (proposed_dim == -4) { |
| // split the source dim s into two dims |
| // read the left dim and then the right dim (either can be -1) |
| CHECK_LT(i + 2, params_len); |
| CHECK_LT(src_idx, dshape_len); |
| size_t d0 = dshape_vec[src_idx++]; |
| IType d1 = param_shape_vec[++i]; |
| IType d2 = param_shape_vec[++i]; |
| CHECK(d1 != -1 || d2 != -1) << "Split dims cannot both be -1."; |
| if (d1 == -1) d1 = d0 / d2; |
| if (d2 == -1) d2 = d0 / d1; |
| CHECK(d1 * d2 == static_cast<IType>(d0) || static_cast<IType>(d0) == IType(0)) << |
| "Split dims " << d1 << ", " << d2 << " do not divide original dim " << d0; |
| tmp.push_back(d1); |
| tmp.push_back(d2); |
| } else { |
| // greater than 0, new shape |
| tmp.push_back(proposed_dim); |
| src_idx++; |
| } |
| } |
| |
| if (inf_idx >= 0) { |
| if (dshape.Size() > 0) { |
| IType new_size = 1; |
| for (IType x : tmp) new_size *= x; |
| tmp[inf_idx] = dshape.Size() / new_size; |
| } else { |
| tmp[inf_idx] = 0; |
| } |
| } |
| if (reverse) { |
| std::reverse(param_shape_vec.begin(), param_shape_vec.end()); |
| std::reverse(dshape_vec.begin(), dshape_vec.end()); |
| std::reverse(tmp.begin(), tmp.end()); |
| } |
| mxnet::TShape oshape(tmp.begin(), tmp.end()); |
| return oshape; |
| } |
| |
| inline bool ReverseReshapeInferShape(mxnet::TShape *in, const mxnet::TShape& out) { |
| if (in->Size() && out.Size()) { |
| return true; |
| } else if (!out.Size()) { |
| return false; |
| } else { |
| int zero_axis = -1; |
| int non_zero_prod = 1; |
| for (index_t i = 0; i < in->ndim(); i++) { |
| if ((*in)[i] == 0) { |
| if (zero_axis != -1) |
| return false; // more than 1 zero found. |
| else |
| zero_axis = i; |
| } else { |
| non_zero_prod *= (*in)[i]; |
| } |
| } |
| (*in)[zero_axis] = out.Size() / non_zero_prod; |
| return true; |
| } |
| } |
| |
| inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const ReshapeParam& param_ = nnvm::get<ReshapeParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; |
| CHECK_EQ(out_attrs->size(), 1U); |
| mxnet::TShape &dshape = (*in_attrs)[0]; |
| if (dshape.ndim() == 0) return false; |
| mxnet::TShape oshape; |
| if (param_.shape.ndim() != 0) { |
| oshape = InferReshapeShape(param_.shape, dshape, param_.reverse); |
| } else if (param_.target_shape.ndim()) { |
| LOG(INFO) << "Using target_shape will be deprecated."; |
| oshape = param_.target_shape; |
| int neg_count = 0; |
| index_t inf_idx = 0; |
| index_t start_idx = param_.keep_highest ? 1 : 0; |
| if (param_.keep_highest) { |
| oshape[0] = dshape[0]; |
| } |
| for (index_t i = start_idx; i < oshape.ndim(); ++i) { |
| if (oshape[i] == 0) { |
| neg_count++; |
| inf_idx = i; |
| } |
| } |
| if (neg_count == 1) { |
| oshape[inf_idx] = 1; |
| oshape[inf_idx] = dshape.Size() / oshape.Size(); |
| } |
| } else { |
| return (*out_attrs)[0].ndim() && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); |
| } |
| ReverseReshapeInferShape(&dshape, oshape); |
| CHECK_EQ(oshape.Size(), dshape.Size()) |
| << "Target shape size is different to source. " |
| << "Target: " << oshape |
| << "\nSource: " << dshape; |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); |
| return ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); |
| } |
| |
| inline bool FlattenShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; |
| CHECK_EQ(out_attrs->size(), 1U); |
| const mxnet::TShape &dshape = (*in_attrs)[0]; |
| if (dshape.ndim() == 0) return false; |
| uint32_t target_dim = 1; |
| for (uint32_t i = 1; i < dshape.ndim(); ++i) { |
| target_dim *= dshape[i]; |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape2(dshape[0], target_dim)); |
| return true; |
| } |
| |
| struct TransposeParam : public dmlc::Parameter<TransposeParam> { |
| mxnet::TShape axes; |
| DMLC_DECLARE_PARAMETER(TransposeParam) { |
| DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape()) |
| .describe("Target axis order. By default the axes will be inverted."); |
| } |
| }; |
| |
| template<typename xpu> |
| void TransposeImpl(RunContext ctx, |
| const TBlob& src, |
| const TBlob& ret, |
| const mxnet::TShape& axes) { |
| using namespace mshadow; |
| using namespace mshadow::expr; |
| CHECK_EQ(src.type_flag_, ret.type_flag_); |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| MSHADOW_TYPE_SWITCH(ret.type_flag_, DType, { |
| switch (axes.ndim()) { |
| case 0: |
| break; |
| case 1: { |
| Tensor<xpu, 1, DType> in = src.get<xpu, 1, DType>(s); |
| Tensor<xpu, 1, DType> out = ret.get<xpu, 1, DType>(s); |
| Copy(out, in, s); |
| break; |
| } |
| case 2: { |
| mshadow::Tensor<xpu, 2, DType> in = src.FlatTo2D<xpu, DType>(s); |
| mshadow::Tensor<xpu, 2, DType> out = ret.FlatTo2D<xpu, DType>(s); |
| if (axes[0] == 1 && axes[1] == 0) { |
| out = in.T(); |
| } else { |
| Copy(out, in, s); |
| } |
| break; |
| } |
| case 3: { |
| Tensor<xpu, 3, DType> in = src.get<xpu, 3, DType>(s); |
| Tensor<xpu, 3, DType> out = ret.get<xpu, 3, DType>(s); |
| out = transpose(in, axes.get<3>()); |
| break; |
| } |
| case 4: { |
| Tensor<xpu, 4, DType> in = src.get<xpu, 4, DType>(s); |
| Tensor<xpu, 4, DType> out = ret.get<xpu, 4, DType>(s); |
| out = transpose(in, axes.get<4>()); |
| break; |
| } |
| case 5: { |
| Tensor<xpu, 5, DType> in = src.get<xpu, 5, DType>(s); |
| Tensor<xpu, 5, DType> out = ret.get<xpu, 5, DType>(s); |
| out = transpose(in, axes.get<5>()); |
| break; |
| } |
| case 6: { |
| Tensor<xpu, 6, DType> in = src.get<xpu, 6, DType>(s); |
| Tensor<xpu, 6, DType> out = ret.get<xpu, 6, DType>(s); |
| out = transpose(in, axes.get<6>()); |
| break; |
| } |
| default: |
| LOG(FATAL) << "Transpose support at most 6 dimensions"; |
| break; |
| } |
| }); |
| } |
| |
| // matrix transpose |
| template<typename xpu> |
| void Transpose(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed); |
| CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace"; |
| if (param.axes.ndim() == 0) { |
| mxnet::TShape axes = mxnet::TShape(inputs[0].ndim()); |
| for (index_t i = 0; i < axes.ndim(); ++i) { |
| axes[i] = axes.ndim() - 1 - i; |
| } |
| TransposeImpl<xpu>(ctx.run_ctx, inputs[0], outputs[0], axes); |
| } else { |
| TransposeImpl<xpu>(ctx.run_ctx, inputs[0], outputs[0], param.axes); |
| } |
| } |
| |
| inline bool TransposeShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| mxnet::TShape& shp = (*in_attrs)[0]; |
| CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions"; |
| mxnet::TShape ret(shp.ndim()); |
| if (param.axes.ndim() == 0) { |
| for (index_t i = 0; i < shp.ndim(); ++i) { |
| ret[i] = shp[shp.ndim()-1-i]; |
| } |
| } else { |
| CHECK_EQ(shp.ndim(), param.axes.ndim()); |
| for (size_t i = 0; i < shp.ndim(); ++i) { |
| CHECK(param.axes[i] < static_cast<int64_t>(shp.ndim())); |
| ret[i] = shp[param.axes[i]]; |
| } |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret); |
| return true; |
| } |
| |
| |
| struct ExpandDimParam : public dmlc::Parameter<ExpandDimParam> { |
| int axis; |
| DMLC_DECLARE_PARAMETER(ExpandDimParam) { |
| DMLC_DECLARE_FIELD(axis) |
| .describe("Position where new axis is to be inserted. Suppose that " |
| "the input `NDArray`'s dimension is `ndim`, the range of " |
| "the inserted axis is `[-ndim, ndim]`"); |
| } |
| }; |
| |
| |
| inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const ExpandDimParam& param = nnvm::get<ExpandDimParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| if (in_attrs->at(0).ndim() == 0U && out_attrs->at(0).ndim() == 0U) { |
| return false; |
| } |
| |
| mxnet::TShape& ishape = (*in_attrs)[0]; |
| mxnet::TShape& oshape = (*out_attrs)[0]; |
| int indim = ishape.ndim(); |
| bool unknown_ishape = false; |
| if (0 == indim) { |
| indim = oshape.ndim() - 1; |
| unknown_ishape = true; |
| } |
| |
| int axis = param.axis; |
| if (axis < 0) { |
| axis += indim + 1; |
| } |
| CHECK(axis >= 0 && axis <= indim) |
| << "axis must be in the range [" << -indim << ", " << indim << "] (" |
| << param.axis << " provided)"; |
| mxnet::TShape ret(indim + 1); |
| for (int i = 0; i < axis; ++i) { |
| ret[i] = (unknown_ishape? 0 : ishape[i]); |
| } |
| ret[axis] = 1; |
| for (int i = axis+1; i < indim+1; ++i) { |
| ret[i] = (unknown_ishape? 0 : ishape[i-1]); |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret); |
| |
| ret = mxnet::TShape(indim); |
| for (int i = 0; i < axis; ++i) ret[i] = oshape[i]; |
| for (int i = axis+1; i < indim+1; ++i) ret[i-1] = oshape[i]; |
| SHAPE_ASSIGN_CHECK(*in_attrs, 0, ret); |
| return true; |
| } |
| |
| // Currently MKLDNN only supports step = 1 or step has no value |
| inline bool SupportMKLDNNSlice(const SliceParam& param) { |
| if (param.step.ndim() == 0U) return true; |
| for (uint32_t i = 0; i < param.step.ndim(); ++i) { |
| if (param.step[i].has_value() && param.step[i].value() != 1) |
| return false; |
| } |
| return true; |
| } |
| |
| inline bool SliceForwardInferStorageType(const nnvm::NodeAttrs& attrs, |
| const int dev_mask, |
| DispatchMode* dispatch_mode, |
| std::vector<int>* in_attrs, |
| std::vector<int>* out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1); |
| CHECK_EQ(out_attrs->size(), 1); |
| const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed); |
| const auto& in_stype = in_attrs->at(0); |
| auto& out_stype = out_attrs->at(0); |
| bool dispatched = false; |
| const auto dispatch_ex = DispatchMode::kFComputeEx; |
| // If step = 1, no need to fallback; otherwise fallback to dense |
| bool trivial_step = false; |
| if (param.step.ndim() == 0U) { |
| trivial_step = true; |
| } else if (param.step.ndim() == 1U |
| && (!param.step[0].has_value() || param.step[0].value() == 1)) { |
| trivial_step = true; |
| } |
| |
| if (in_stype == kDefaultStorage) { |
| #if MXNET_USE_MKLDNN == 1 |
| if (dev_mask == Context::kCPU && MKLDNNEnvSet() |
| && SupportMKLDNNSlice(param)) { |
| dispatched = storage_type_assign(&out_stype, kDefaultStorage, |
| dispatch_mode, dispatch_ex); |
| } |
| #endif |
| if (!dispatched) { |
| dispatched = storage_type_assign(&out_stype, kDefaultStorage, |
| dispatch_mode, DispatchMode::kFCompute); |
| } |
| } |
| |
| if (!dispatched && in_stype == kCSRStorage && trivial_step) { |
| dispatched = storage_type_assign(&out_stype, kCSRStorage, |
| dispatch_mode, dispatch_ex); |
| } |
| |
| if (!dispatched) { |
| dispatched = dispatch_fallback(out_attrs, dispatch_mode); |
| } |
| |
| return dispatched; |
| } |
| |
| // slice the indptr of a csr |
| struct SliceCsrIndPtr { |
| template<typename IType> |
| MSHADOW_XINLINE static void Map(int i, IType* out, const IType* in, const IType* base) { |
| KERNEL_ASSIGN(out[i], kWriteTo, in[i] - *base); |
| } |
| }; |
| |
| /* |
| * a wrapper to launch SliceCsrIndPtr kernel. |
| * slice [src[begin] .. src[end]) and store in dst[0, end - begin) |
| */ |
| template<typename xpu, typename IType> |
| void SliceCsrIndPtrImpl(const int begin, const int end, RunContext ctx, |
| const IType* src, IType* dst) { |
| using namespace mshadow; |
| using namespace mxnet_op; |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| int indptr_len = end - begin + 1; |
| Kernel<SliceCsrIndPtr, xpu>::Launch(s, indptr_len, dst, src + begin, src + begin); |
| } |
| |
| /* |
| * Slice a CSR NDArray for first dimension |
| */ |
| template<typename xpu> |
| void SliceDimOneCsrImpl(const mxnet::TShape &begin, const mxnet::TShape &end, const OpContext& ctx, |
| const NDArray &in, const NDArray &out) { |
| using namespace mshadow; |
| using namespace mxnet_op; |
| using namespace csr; |
| nnvm::dim_t begin_row = begin[0]; |
| nnvm::dim_t end_row = end[0]; |
| nnvm::dim_t indptr_len = end_row - begin_row + 1; |
| out.CheckAndAllocAuxData(kIndPtr, Shape1(indptr_len)); |
| // assume idx indptr share the same type |
| MSHADOW_IDX_TYPE_SWITCH(in.aux_type(kIndPtr), RType, { |
| MSHADOW_IDX_TYPE_SWITCH(in.aux_type(kIdx), IType, { |
| MSHADOW_TYPE_SWITCH(in.dtype(), DType, { |
| RType* in_indptr = in.aux_data(kIndPtr).dptr<RType>(); |
| RType* out_indptr = out.aux_data(kIndPtr).dptr<RType>(); |
| SliceCsrIndPtrImpl<xpu, RType>(begin_row, end_row, ctx.run_ctx, in_indptr, out_indptr); |
| |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| |
| RType nnz = 0; |
| mshadow::Copy(Tensor<cpu, 1, RType>(&nnz, Shape1(1)), |
| Tensor<xpu, 1, RType>(out_indptr + indptr_len - 1, Shape1(1), s)); |
| // return csr zeros if nnz = 0 |
| if (nnz == 0) { |
| out.set_aux_shape(kIdx, Shape1(0)); |
| return; |
| } |
| // copy indices and values |
| out.CheckAndAllocAuxData(kIdx, Shape1(nnz)); |
| out.CheckAndAllocData(Shape1(nnz)); |
| IType* in_idx = in.aux_data(kIdx).dptr<IType>(); |
| IType* out_idx = out.aux_data(kIdx).dptr<IType>(); |
| DType* in_data = in.data().dptr<DType>(); |
| DType* out_data = out.data().dptr<DType>(); |
| |
| RType offset = 0; |
| mshadow::Copy(Tensor<cpu, 1, RType>(&offset, Shape1(1)), |
| Tensor<xpu, 1, RType>(in_indptr + begin_row, Shape1(1), s)); |
| |
| mshadow::Copy(Tensor<xpu, 1, IType>(out_idx, Shape1(nnz), s), |
| Tensor<xpu, 1, IType>(in_idx + offset, Shape1(nnz), s), s); |
| mshadow::Copy(Tensor<xpu, 1, DType>(out_data, Shape1(nnz), s), |
| Tensor<xpu, 1, DType>(in_data + offset, Shape1(nnz), s), s); |
| }); |
| }); |
| }); |
| } |
| |
| /*! |
| * \brief slice a CSRNDArray for two dimensions |
| */ |
| struct SliceDimTwoCsrAssign { |
| /*! |
| * \brief This function slices a CSRNDArray on axis one between begin_col and end_col |
| * \param i loop index |
| * \param out_idx output csr ndarray column indices |
| * \param out_data output csr ndarray data |
| * \param out_indptr output csr ndarray row index pointer |
| * \param in_idx input csr ndarray column indices |
| * \param in_data input csr ndarray data |
| * \param in_indptr input csr ndarray row index pointer |
| * \param begin_col begin column indice |
| * \param end_col end column indice |
| */ |
| template<typename IType, typename RType, typename DType> |
| MSHADOW_XINLINE static void Map(int i, |
| IType* out_idx, DType* out_data, |
| const RType* out_indptr, |
| const IType* in_idx, const DType* in_data, |
| const RType* in_indptr, |
| const int begin_col, const int end_col) { |
| RType ind = out_indptr[i]; |
| for (RType j = in_indptr[i]; j < in_indptr[i+1]; j++) { |
| // indices of CSRNDArray are in ascending order per row |
| if (in_idx[j] >= end_col) { |
| break; |
| } else if (in_idx[j] >= begin_col) { |
| out_idx[ind] = in_idx[j] - begin_col; |
| out_data[ind] = in_data[j]; |
| ind++; |
| } |
| } |
| } |
| }; |
| |
| /* |
| * Slice a CSR NDArray for two dimensions |
| */ |
| template<typename xpu> |
| void SliceDimTwoCsrImpl(const mxnet::TShape &begin, const mxnet::TShape &end, const OpContext& ctx, |
| const NDArray &in, const NDArray &out); |
| |
| |
| template<typename xpu> |
| void SliceCsrImpl(const SliceParam ¶m, const OpContext& ctx, |
| const NDArray &in, OpReqType req, const NDArray &out) { |
| if (req == kNullOp) return; |
| CHECK_NE(req, kAddTo) << "kAddTo for Slice on CSR input is not supported"; |
| CHECK_NE(req, kWriteInplace) << "kWriteInplace for Slice on CSR input is not supported"; |
| |
| const mxnet::TShape ishape = in.shape(); |
| const mxnet::TShape oshape = out.shape(); |
| |
| uint32_t N = ishape.ndim(); |
| mxnet::TShape begin(N), end(N); |
| for (uint32_t i = 0; i < N; ++i) { |
| int s = 0; |
| if (param.begin[i]) { |
| s = *param.begin[i]; |
| if (s < 0) s += ishape[i]; |
| } |
| begin[i] = s; |
| end[i] = s + oshape[i]; |
| } |
| switch (N) { |
| case 1: { |
| SliceDimOneCsrImpl<xpu>(begin, end, ctx, in, out); |
| break; |
| } |
| case 2: { |
| SliceDimTwoCsrImpl<xpu>(begin, end, ctx, in, out); |
| break; |
| } |
| default: |
| LOG(FATAL) << "CSR is only for 2-D shape"; |
| break; |
| } |
| } |
| |
| template<typename xpu> |
| void SliceEx(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<NDArray>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<NDArray>& outputs) { |
| CHECK_EQ(inputs.size(), 1); |
| CHECK_EQ(outputs.size(), 1); |
| const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed); |
| auto in_stype = inputs[0].storage_type(); |
| if (in_stype == kCSRStorage) { |
| SliceCsrImpl<xpu>(param, ctx, inputs[0], req[0], outputs[0]); |
| } else { |
| LOG(FATAL) << "Slice not implemented for storage type" << in_stype; |
| } |
| } |
| |
| template<int ndim> |
| inline void GetIndexRange(const mxnet::TShape& dshape, |
| const nnvm::Tuple<dmlc::optional<int>>& param_begin, |
| const nnvm::Tuple<dmlc::optional<int>>& param_end, |
| const nnvm::Tuple<dmlc::optional<int>>& param_step, |
| common::StaticArray<index_t, ndim>* begin, |
| common::StaticArray<index_t, ndim>* end, |
| common::StaticArray<index_t, ndim>* step) { |
| CHECK_NE(dshape.ndim(), 0U); |
| CHECK_LE(param_begin.ndim(), dshape.ndim()) |
| << "Slicing axis exceeds data dimensions"; |
| CHECK_LE(param_end.ndim(), dshape.ndim()) |
| << "Slicing axis exceeds data dimensions"; |
| CHECK_EQ(param_begin.ndim(), param_end.ndim()) |
| << "begin and end must have the same length"; |
| CHECK_EQ(ndim, dshape.ndim()) |
| << "Static array size=" << ndim |
| << " is not equal to data shape ndim=" << dshape.ndim(); |
| |
| if (param_step.ndim() != 0U) { |
| CHECK_EQ(param_step.ndim(), param_begin.ndim()) |
| << "step and begin must have the same length"; |
| } |
| |
| for (index_t i = 0; i < param_begin.ndim(); ++i) { |
| index_t s = param_step.ndim() != 0U && param_step[i].has_value() ? param_step[i].value() : 1; |
| CHECK_NE(s, 0) << "slice op step[" << i << "] cannot be 0"; |
| |
| index_t b = 0, e = 0; |
| const index_t len = dshape[i]; |
| if (len > 0) { |
| b = param_begin[i].has_value() ? param_begin[i].value() : (s < 0 ? len - 1 : 0); |
| e = param_end[i].has_value() ? param_end[i].value() : (s < 0 ? -1 : len); |
| |
| // checking upper and lower bounds for begin |
| if (b < 0) { |
| b += len; |
| CHECK_GE(b, 0) << "slicing with begin[" << i << "]=" << b - len |
| << " exceeds limit of input dimension[" << i << "]=" << len; |
| } |
| CHECK_LT(b, len) << "slicing with begin[" << i << "]=" << b |
| << " exceeds limit of input dimension[" << i << "]=" << len; |
| |
| // checking upper and lower bounds for end |
| if (e < 0 && param_end[i].has_value()) { |
| e += len; |
| CHECK_GE(e, 0) << "slicing with end[" << i << "]=" << e - len |
| << " exceeds limit of input dimension[" << i << "]=" << len; |
| } |
| CHECK_LE(e, len) << "slicing with end[" << i << "]=" << e |
| << " exceeds limit of input dimension[" << i << "]=" << len; |
| |
| // checking begin==end case which is not supported |
| CHECK_NE(b, e) << "slicing with begin[" << i << "]=end[" << i << "]=" |
| << e << " results in an empty tensor and is not supported"; |
| } |
| |
| (*begin)[i] = b; |
| (*end)[i] = e; |
| (*step)[i] = s; |
| } |
| |
| for (index_t i = param_begin.ndim(); i < dshape.ndim(); ++i) { |
| (*begin)[i] = 0; |
| (*end)[i] = dshape[i]; |
| (*step)[i] = 1; |
| } |
| } |
| |
| inline void SetSliceOpOutputDimSize(const index_t i, const int b, |
| const int e, const int s, |
| mxnet::TShape* oshape) { |
| if (e != b) { |
| if (s > 0) { |
| CHECK_LT(b, e) << "slicing with begin=[" << i << "]=" << b << ", end[" << i << "]=" |
| << e << ", and step[" << i << "]=" << s << " is invalid"; |
| (*oshape)[i] = (e - b - 1) / s + 1; |
| } else { |
| CHECK_LT(e, b) << "slicing with begin=[" << i << "]=" << b << ", end[" << i << "]=" |
| << e << ", and step[" << i << "]=" << s << " is invalid"; |
| (*oshape)[i] = (b - e - 1) / (-s) + 1; |
| } |
| } // else leave oshape[i] as 0 for partial infer |
| } |
| |
| inline bool SliceOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector* in_attrs, |
| mxnet::ShapeVector* out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| const mxnet::TShape& dshape = (*in_attrs)[0]; |
| if (dshape.ndim() == 0) return false; |
| const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed); |
| mxnet::TShape oshape = dshape; |
| |
| MXNET_NDIM_SWITCH(dshape.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end, &step); |
| for (index_t i = 0; i < param.begin.ndim(); ++i) { |
| const int b = begin[i], e = end[i], s = step[i]; |
| SetSliceOpOutputDimSize(i, b, e, s, &oshape); |
| } |
| }); |
| |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); |
| return !shape_is_none(dshape) && !shape_is_none(oshape); |
| } |
| |
| template<int ndim, int req, typename xpu> |
| struct slice_forward; |
| |
| template<int ndim, int req> |
| struct slice_forward<ndim, req, gpu> { |
| // i is the i-th row after flattening out into 2D tensor |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* data, |
| const mshadow::Shape<ndim> dshape, |
| const mshadow::Shape<ndim> oshape, |
| const common::StaticArray<index_t, ndim> begin, |
| const common::StaticArray<index_t, ndim> step) { |
| const index_t data_last_dim_size = dshape[ndim-1]; |
| const index_t out_last_dim_size = oshape[ndim-1]; |
| const index_t step_last_dim = step[ndim-1]; |
| const index_t begin_last_dim = begin[ndim-1]; |
| const index_t j = i % out_last_dim_size; |
| index_t irow = 0; // row id of flattend 2D data |
| index_t stride = 1; |
| index_t idx = i / out_last_dim_size; |
| #pragma unroll |
| for (int k = ndim - 2; k >= 0; --k) { |
| irow += stride * ((idx % oshape[k]) * step[k] + begin[k]); |
| idx /= oshape[k]; |
| stride *= dshape[k]; |
| } |
| KERNEL_ASSIGN(out[i], req, |
| data[irow * data_last_dim_size + j * step_last_dim + begin_last_dim]); |
| } |
| }; |
| |
| template<int ndim, int req> |
| struct slice_forward<ndim, req, cpu> { |
| // i is the i-th row after flattening out into 2D tensor |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* data, |
| const mshadow::Shape<ndim> dshape, |
| const mshadow::Shape<ndim> oshape, |
| const common::StaticArray<index_t, ndim> begin, |
| const common::StaticArray<index_t, ndim> step) { |
| const index_t data_last_dim_size = dshape[ndim-1]; |
| const index_t out_last_dim_size = oshape[ndim-1]; |
| const index_t step_last_dim = step[ndim-1]; |
| const index_t begin_last_dim = begin[ndim-1]; |
| index_t out_offset = i * out_last_dim_size; |
| for (index_t j = 0; j < out_last_dim_size; ++j) { |
| index_t irow = 0; // row id of flattend 2D data |
| index_t stride = 1; |
| index_t idx = i; |
| #pragma unroll |
| for (int k = ndim - 2; k >= 0; --k) { |
| irow += stride * ((idx % oshape[k]) * step[k] + begin[k]); |
| idx /= oshape[k]; |
| stride *= dshape[k]; |
| } |
| KERNEL_ASSIGN(out[out_offset++], req, |
| data[irow * data_last_dim_size + j * step_last_dim + begin_last_dim]); |
| } |
| } |
| }; |
| |
| template<typename xpu> |
| void SliceOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| CHECK_EQ(req.size(), 1U); |
| if (req[0] == kNullOp) return; |
| using namespace mshadow; |
| Stream<xpu>* s = ctx.get_stream<xpu>(); |
| const TBlob& data = inputs[0]; |
| const TBlob& out = outputs[0]; |
| const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed); |
| MXNET_NDIM_SWITCH(data.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step); |
| MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { |
| size_t num_threads = out.shape_.FlatTo2D()[0]; |
| if (std::is_same<xpu, gpu>::value) { |
| num_threads *= out.shape_.get<ndim>()[ndim - 1]; |
| } |
| mxnet_op::Kernel<slice_forward<ndim, Req, xpu>, xpu>::Launch(s, num_threads, |
| out.dptr<DType>(), data.dptr<DType>(), |
| data.shape_.get<ndim>(), out.shape_.get<ndim>(), begin, step); |
| }) |
| }) |
| }) |
| } |
| |
| template<int ndim, int req, typename xpu> |
| struct slice_assign; |
| |
| template<int ndim, int req> |
| struct slice_assign<ndim, req, cpu> { |
| // i is the i-th row after flattening out into 2D tensor |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* val, |
| const mshadow::Shape<ndim> oshape, |
| const mshadow::Shape<ndim> vshape, |
| const common::StaticArray<index_t, ndim> begin, |
| const common::StaticArray<index_t, ndim> step) { |
| const index_t data_last_dim_size = oshape[ndim-1]; |
| const index_t out_last_dim_size = vshape[ndim-1]; |
| const index_t step_last_dim = step[ndim-1]; |
| const index_t begin_last_dim = begin[ndim-1]; |
| index_t offset = i * out_last_dim_size; |
| for (index_t j = 0; j < out_last_dim_size; ++j) { |
| index_t irow = 0; // row id of flattend 2D out |
| index_t stride = 1; |
| index_t idx = i; |
| #pragma unroll |
| for (int k = ndim - 2; k >= 0; --k) { |
| irow += stride * ((idx % vshape[k]) * step[k] + begin[k]); |
| idx /= vshape[k]; |
| stride *= oshape[k]; |
| } |
| KERNEL_ASSIGN(out[irow * data_last_dim_size + j * step_last_dim + begin_last_dim], |
| req, val[offset++]); |
| } |
| } |
| }; |
| |
| template<int ndim, int req> |
| struct slice_assign<ndim, req, gpu> { |
| // i is the i-th row after flattening out into 2D tensor |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* val, |
| const mshadow::Shape<ndim> oshape, |
| const mshadow::Shape<ndim> vshape, |
| const common::StaticArray<index_t, ndim> begin, |
| const common::StaticArray<index_t, ndim> step) { |
| const index_t data_last_dim_size = oshape[ndim-1]; |
| const index_t out_last_dim_size = vshape[ndim-1]; |
| const index_t step_last_dim = step[ndim-1]; |
| const index_t begin_last_dim = begin[ndim-1]; |
| const index_t j = i % out_last_dim_size; |
| index_t irow = 0; // row id of flattend 2D out |
| index_t stride = 1; |
| index_t idx = i / out_last_dim_size; |
| #pragma unroll |
| for (int k = ndim - 2; k >= 0; --k) { |
| irow += stride * ((idx % vshape[k]) * step[k] + begin[k]); |
| idx /= vshape[k]; |
| stride *= oshape[k]; |
| } |
| KERNEL_ASSIGN(out[irow * data_last_dim_size + j * step_last_dim + begin_last_dim], |
| req, val[i]); |
| } |
| }; |
| |
| template<typename xpu> |
| void SliceOpBackward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| CHECK_EQ(req.size(), 1U); |
| if (req[0] == kNullOp) return; |
| using namespace mshadow; |
| Stream<xpu>* s = ctx.get_stream<xpu>(); |
| const TBlob& ograd = inputs[0]; |
| const TBlob& igrad = outputs[0]; |
| const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed); |
| if (req[0] == kWriteTo) { |
| Fill(s, igrad, req[0], 0); |
| } else if (req[0] == kWriteInplace) { |
| LOG(FATAL) << "_slice_backward does not support kWriteInplace"; |
| } |
| MXNET_NDIM_SWITCH(ograd.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(igrad.shape_, param.begin, param.end, param.step, &begin, &end, &step); |
| MSHADOW_TYPE_SWITCH(ograd.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { |
| int num_threads = ograd.shape_.FlatTo2D()[0]; |
| if (std::is_same<xpu, gpu>::value) { |
| num_threads *= ograd.shape_.get<ndim>()[ndim - 1]; |
| } |
| mxnet_op::Kernel<slice_assign<ndim, Req, xpu>, xpu>::Launch(s, num_threads, |
| igrad.dptr<DType>(), ograd.dptr<DType>(), |
| igrad.shape_.get<ndim>(), ograd.shape_.get<ndim>(), begin, step); |
| }) |
| }) |
| }) |
| } |
| |
| inline bool SliceAssignOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| CHECK_EQ(in_attrs->size(), 2U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| const mxnet::TShape& dshape = (*in_attrs)[0]; |
| if (dshape.ndim() == 0U || dshape.Size() == 0U) return false; |
| mxnet::TShape vshape = dshape; // vshape is the value shape on the right hand side |
| const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed); |
| MXNET_NDIM_SWITCH(dshape.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end, &step); |
| for (index_t i = 0; i < param.begin.ndim(); ++i) { |
| const int b = begin[i], e = end[i], s = step[i]; |
| SetSliceOpOutputDimSize(i, b, e, s, &vshape); |
| } |
| }); |
| SHAPE_ASSIGN_CHECK(*in_attrs, 1, vshape); |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape); |
| return true; |
| } |
| |
| template<typename xpu> |
| void SliceAssignOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| CHECK_EQ(inputs.size(), 2U); // data[index] = val, data and val are two inputs |
| CHECK_EQ(outputs.size(), 1U); |
| if (req[0] == kNullOp) return; |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| const TBlob& data = inputs[0]; |
| const TBlob& val = inputs[1]; |
| const TBlob& out = outputs[0]; |
| if (req[0] == kWriteTo) { |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| Tensor<xpu, 1, DType> in = inputs[0].FlatTo1D<xpu, DType>(s); |
| Tensor<xpu, 1, DType> out = outputs[0].FlatTo1D<xpu, DType>(s); |
| Copy(out, in, s); |
| }); |
| } else if (req[0] != kWriteInplace) { |
| LOG(FATAL) << "_slice_assign only supports kWriteTo and kWriteInplace"; |
| } |
| |
| const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed); |
| MXNET_NDIM_SWITCH(data.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step); |
| MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { |
| int num_threads = val.shape_.FlatTo2D()[0]; |
| if (std::is_same<xpu, gpu>::value) { |
| num_threads *= val.shape_.get<ndim>()[ndim - 1]; |
| } |
| mxnet_op::Kernel<slice_assign<ndim, Req, xpu>, xpu>::Launch(s, num_threads, |
| out.dptr<DType>(), val.dptr<DType>(), |
| out.shape_.get<ndim>(), val.shape_.get<ndim>(), begin, step); |
| }) |
| }) |
| }) |
| } |
| |
| struct SliceAssignScalarParam : public dmlc::Parameter<SliceAssignScalarParam> { |
| double scalar; |
| nnvm::Tuple<dmlc::optional<int>> begin, end; |
| nnvm::Tuple<dmlc::optional<int>> step; |
| DMLC_DECLARE_PARAMETER(SliceAssignScalarParam) { |
| DMLC_DECLARE_FIELD(scalar) |
| .set_default(0) |
| .describe("The scalar value for assignment."); |
| DMLC_DECLARE_FIELD(begin) |
| .describe("starting indices for the slice operation, supports negative indices."); |
| DMLC_DECLARE_FIELD(end) |
| .describe("ending indices for the slice operation, supports negative indices."); |
| DMLC_DECLARE_FIELD(step) |
| .set_default(nnvm::Tuple<dmlc::optional<int>>()) |
| .describe("step for the slice operation, supports negative values."); |
| } |
| }; |
| |
| inline bool SliceAssignScalarOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| const mxnet::TShape& dshape = (*in_attrs)[0]; |
| if (dshape.ndim() == 0U || dshape.Size() == 0U) return false; |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape); |
| return true; |
| } |
| |
| template<int ndim> |
| struct slice_assign_scalar { |
| // i is the i-th row after flattening out into 2D tensor |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType val, |
| const OpReqType req, |
| const mshadow::Shape<ndim> oshape, |
| const mshadow::Shape<ndim> vshape, |
| const common::StaticArray<index_t, ndim> begin, |
| const common::StaticArray<index_t, ndim> step) { |
| const index_t data_last_dim_size = oshape[ndim-1]; |
| const index_t out_last_dim_size = vshape[ndim-1]; |
| const index_t step_last_dim = step[ndim-1]; |
| const index_t begin_last_dim = begin[ndim-1]; |
| for (index_t j = 0; j < out_last_dim_size; ++j) { |
| index_t irow = 0; // row id of flattend 2D out |
| index_t stride = 1; |
| index_t idx = i; |
| #pragma unroll |
| for (int k = ndim - 2; k >= 0; --k) { |
| irow += stride * ((idx % vshape[k]) * step[k] + begin[k]); |
| idx /= vshape[k]; |
| stride *= oshape[k]; |
| } |
| KERNEL_ASSIGN(out[irow * data_last_dim_size + j * step_last_dim + begin_last_dim], req, val); |
| } |
| } |
| }; |
| |
| template<typename xpu> |
| void SliceAssignScalarOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| CHECK_EQ(req.size(), 1U); |
| using namespace mshadow; |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| |
| const TBlob& data = inputs[0]; |
| const TBlob& out = outputs[0]; |
| if (req[0] == kWriteTo) { |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| Tensor<xpu, 1, DType> in = inputs[0].FlatTo1D<xpu, DType>(s); |
| Tensor<xpu, 1, DType> out = outputs[0].FlatTo1D<xpu, DType>(s); |
| Copy(out, in, s); |
| }); |
| } else if (req[0] != kWriteInplace) { |
| LOG(FATAL) << "_crop_assign_scalar only supports kWriteTo and kWriteInplace"; |
| } |
| |
| mxnet::TShape vshape = data.shape_; |
| const SliceAssignScalarParam& param = nnvm::get<SliceAssignScalarParam>(attrs.parsed); |
| MXNET_NDIM_SWITCH(data.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step); |
| for (index_t i = 0; i < param.begin.ndim(); ++i) { |
| const int b = begin[i], e = end[i], s = step[i]; |
| SetSliceOpOutputDimSize(i, b, e, s, &vshape); |
| } |
| MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { |
| mxnet_op::Kernel<slice_assign_scalar<ndim>, xpu>::Launch(s, vshape.FlatTo2D()[0], |
| out.dptr<DType>(), static_cast<DType>(param.scalar), req[0], |
| out.shape_.get<ndim>(), vshape.get<ndim>(), begin, step); |
| }) |
| }) |
| } |
| |
| struct SliceAxisParam : public dmlc::Parameter<SliceAxisParam> { |
| int axis; |
| int begin; |
| dmlc::optional<int> end; |
| DMLC_DECLARE_PARAMETER(SliceAxisParam) { |
| DMLC_DECLARE_FIELD(axis) |
| .describe("Axis along which to be sliced, supports negative indexes."); |
| DMLC_DECLARE_FIELD(begin) |
| .describe("The beginning index along the axis to be sliced, " |
| " supports negative indexes."); |
| DMLC_DECLARE_FIELD(end) |
| .describe("The ending index along the axis to be sliced, " |
| " supports negative indexes."); |
| } |
| }; |
| |
| inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape& ishape, |
| int* axis, index_t* begin, index_t* end) { |
| *axis = param.axis; |
| if (*axis < 0) { |
| *axis += static_cast<int>(ishape.ndim()); |
| } |
| CHECK(*axis < static_cast<int>(ishape.ndim()) && *axis >= 0) << |
| "Transformed axis must be smaller than the source ndim and larger than zero! Recieved axis=" << |
| param.axis << ", src_ndim=" << ishape.ndim() << ", transformed axis=" << *axis; |
| index_t axis_size = static_cast<index_t>(ishape[*axis]); |
| *begin = param.begin; |
| *end = -1; |
| if (*begin < 0) { |
| *begin += axis_size; |
| } |
| if (axis_size) { |
| if (!static_cast<bool>(param.end)) { |
| *end = axis_size; |
| } else { |
| *end = param.end.value(); |
| if (*end < 0) { |
| *end += axis_size; |
| } |
| } |
| CHECK(*end <= axis_size) << "Invalid end for end=" << *end << " as axis_size is " << axis_size; |
| CHECK((*begin < *end)) |
| << "Invalid begin, end, get begin=" << param.begin << ", end=" << param.end; |
| } else { |
| *begin = 0; |
| *end = 0; |
| } |
| CHECK(*end >= 0) |
| << "Invalid begin, end, get begin=" << param.begin << ", end=" << param.end; |
| CHECK(*begin >= 0) << "Invalid begin for begin=" << param.begin; |
| } |
| |
| inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const SliceAxisParam& param = nnvm::get<SliceAxisParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| mxnet::TShape& ishape = (*in_attrs)[0]; |
| int axis; |
| index_t begin, end; |
| GetSliceAxisParams(param, ishape, &axis, &begin, &end); |
| mxnet::TShape shape(ishape.ndim()); |
| for (index_t i = 0; i < ishape.ndim(); ++i) { |
| if (static_cast<int>(i) == axis) { |
| shape[i] = static_cast<index_t>(end - begin); |
| } else { |
| shape[i] = ishape[i]; |
| } |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); |
| return true; |
| } |
| |
| |
| template<typename xpu> |
| void SliceAxis(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow::expr; |
| const SliceAxisParam& param = nnvm::get<SliceAxisParam>(attrs.parsed); |
| mshadow::Stream<xpu> *s = ctx.get_stream<xpu>(); |
| int axis; |
| index_t begin, end; |
| GetSliceAxisParams(param, inputs[0].shape_, &axis, &begin, &end); |
| int ndim = static_cast<int>(outputs[0].ndim()); |
| |
| if (axis + 1 == ndim) { |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| mshadow::Tensor<xpu, 2, DType> in = |
| inputs[0].FlatTo2D<xpu, DType>(s); |
| mshadow::Tensor<xpu, 2, DType> out = |
| outputs[0].FlatTo2D<xpu, DType>(s); |
| ASSIGN_DISPATCH(out, req[0], slice<1>(in, begin, end)); |
| }); |
| } else { |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| mshadow::Tensor<xpu, 3, DType> in = |
| inputs[0].FlatTo3D<xpu, DType>(axis, s); |
| mshadow::Tensor<xpu, 3, DType> out = |
| outputs[0].FlatTo3D<xpu, DType>(axis, s); |
| ASSIGN_DISPATCH(out, req[0], slice<1>(in, begin, end)); |
| }); |
| } |
| } |
| |
| // Backward pass of broadcast over the given axis |
| template<typename xpu> |
| void SliceAxisGrad_(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| const SliceAxisParam& param = nnvm::get<SliceAxisParam>(attrs.parsed); |
| using namespace mshadow::op; |
| using namespace mshadow::expr; |
| mshadow::Stream<xpu> *s = ctx.get_stream<xpu>(); |
| int axis; |
| index_t begin, end; |
| GetSliceAxisParams(param, outputs[0].shape_, &axis, &begin, &end); |
| int ndim = static_cast<int>(outputs[0].shape_.ndim()); |
| |
| if (axis + 1 == ndim) { |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| mshadow::Tensor<xpu, 2, DType> ograd = |
| inputs[0].FlatTo2D<xpu, DType>(s); |
| mshadow::Tensor<xpu, 2, DType> igrad = |
| outputs[0].FlatTo2D<xpu, DType>(s); |
| if (req[0] == kAddTo) { |
| slice<1>(igrad, begin, end) += F<identity>(ograd); |
| } else if (req[0] == kWriteTo) { |
| igrad = 0.0f; |
| slice<1>(igrad, begin, end) = F<identity>(ograd); |
| } else { |
| CHECK_EQ(req[0], kNullOp); |
| } |
| }); |
| } else { |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| mshadow::Tensor<xpu, 3, DType> ograd = |
| inputs[0].FlatTo3D<xpu, DType>(axis, s); |
| mshadow::Tensor<xpu, 3, DType> igrad = |
| outputs[0].FlatTo3D<xpu, DType>(axis, s); |
| if (req[0] == kAddTo) { |
| slice<1>(igrad, begin, end) += F<identity>(ograd); |
| } else if (req[0] == kWriteTo) { |
| igrad = 0.0f; |
| slice<1>(igrad, begin, end) = F<identity>(ograd); |
| } else { |
| CHECK_EQ(req[0], kNullOp); |
| } |
| }); |
| } |
| } |
| |
| struct SliceLikeParam : public dmlc::Parameter<SliceLikeParam> { |
| mxnet::TShape axes; |
| DMLC_DECLARE_PARAMETER(SliceLikeParam) { |
| DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape()) |
| .describe("List of axes on which input data will be sliced according to the " |
| "corresponding size of the second input. By default will slice on " |
| "all axes. Negative axes are supported."); |
| } |
| }; |
| |
| inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const SliceLikeParam& param = nnvm::get<SliceLikeParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 2U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| mxnet::TShape& ishape = (*in_attrs)[0]; |
| mxnet::TShape& from_shape = (*in_attrs)[1]; |
| if (param.axes.ndim() == 0) { |
| CHECK_EQ(ishape.ndim(), from_shape.ndim()) |
| << "By default slice_axis performs slice on all axes, but ndim mismatch " |
| "for inputs: " << ishape.ndim() << " vs. " << from_shape.ndim(); |
| for (index_t i = 0; i < ishape.ndim(); ++i) { |
| CHECK_GE(ishape[i], from_shape[i]) |
| << "Slice axis " << i << " with size " << from_shape[i] |
| << "exceeds limit of input with size " << ishape[i]; |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, from_shape); |
| } else { |
| mxnet::TShape shape(ishape); |
| for (index_t i = 0; i < param.axes.ndim(); ++i) { |
| int axis = static_cast<int>(param.axes[i]); |
| if (axis < 0) { |
| axis += static_cast<int>(ishape.ndim()); |
| } |
| CHECK_GE(axis, 0) |
| << "Slice axis: " << static_cast<int>(param.axes[i]) << " too small"; |
| CHECK_GT(ishape.ndim(), axis) |
| << "Slice axis: " << axis << " exceeds first input: " << ishape.ndim(); |
| CHECK_GT(from_shape.ndim(), axis) |
| << "Slice axis: " << axis << " exceeds second input: " << from_shape.ndim(); |
| shape[axis] = from_shape[axis]; |
| CHECK_GE(ishape[axis], from_shape[axis]) |
| << "Slice axis " << axis << " with size " << from_shape[axis] |
| << "exceeds limit of input with size " << ishape[axis]; |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); |
| } |
| return true; |
| } |
| |
| inline void SliceLikeInferRanges(const mxnet::TShape& dshape, |
| const mxnet::TShape& fshape, |
| const mxnet::TShape& axes, |
| nnvm::Tuple<dmlc::optional<int>>* param_begin, |
| nnvm::Tuple<dmlc::optional<int>>* param_end, |
| nnvm::Tuple<dmlc::optional<int>>* param_step) { |
| std::vector<dmlc::optional<int>> pb(dshape.ndim()); |
| std::vector<dmlc::optional<int>> pe(dshape.ndim()); |
| std::vector<dmlc::optional<int>> ps(dshape.ndim()); |
| if (axes.ndim() == 0) { |
| for (index_t i = 0; i < dshape.ndim(); ++i) { |
| pb[i] = 0; |
| pe[i] = fshape[i]; |
| ps[i] = 1; |
| } |
| } else { |
| for (index_t i = 0; i < axes.ndim(); ++i) { |
| int axis = static_cast<int>(axes[i]); |
| if (axis < 0) { |
| axis += static_cast<int>(dshape.ndim()); |
| } |
| CHECK_GE(axis, 0) |
| << "Slice axis: " << static_cast<int>(axes[i]) << " too small"; |
| CHECK_LT(axis, static_cast<int>(dshape.ndim())) |
| << "Slice axis: " << axis << " exceeds first input: " << dshape.ndim(); |
| CHECK_LT(axis, static_cast<int>(fshape.ndim())) |
| << "Slice axis: " << axis << " exceeds first input: " << fshape.ndim(); |
| pb[axis] = 0; |
| pe[axis] = fshape[axis]; |
| ps[axis] = 1; |
| } |
| } |
| *param_begin = nnvm::Tuple<dmlc::optional<int>>(pb.begin(), pb.end()); |
| *param_end = nnvm::Tuple<dmlc::optional<int>>(pe.begin(), pe.end()); |
| *param_step = nnvm::Tuple<dmlc::optional<int>>(ps.begin(), ps.end()); |
| } |
| |
| template<typename xpu> |
| void SliceLikeForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 2U); |
| CHECK_EQ(outputs.size(), 1U); |
| CHECK_EQ(req.size(), 1U); |
| using namespace mshadow::expr; |
| const SliceLikeParam& param = nnvm::get<SliceLikeParam>(attrs.parsed); |
| mshadow::Stream<xpu> *s = ctx.get_stream<xpu>(); |
| const TBlob& data = inputs[0]; |
| const TBlob& out = outputs[0]; |
| const mxnet::TShape& ishape = data.shape_; |
| const mxnet::TShape& from_shape = inputs[1].shape_; |
| nnvm::Tuple<dmlc::optional<int>> param_begin; |
| nnvm::Tuple<dmlc::optional<int>> param_end; |
| nnvm::Tuple<dmlc::optional<int>> param_step; |
| SliceLikeInferRanges(ishape, from_shape, param.axes, ¶m_begin, ¶m_end, ¶m_step); |
| |
| MXNET_NDIM_SWITCH(data.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(data.shape_, param_begin, param_end, param_step, &begin, &end, &step); |
| MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { |
| int num_threads = out.shape_.FlatTo2D()[0]; |
| if (std::is_same<xpu, gpu>::value) { |
| num_threads *= out.shape_.get<ndim>()[ndim - 1]; |
| } |
| mxnet_op::Kernel<slice_forward<ndim, Req, xpu>, xpu>::Launch(s, |
| num_threads, out.dptr<DType>(), data.dptr<DType>(), |
| data.shape_.get<ndim>(), out.shape_.get<ndim>(), begin, step); |
| }) |
| }) |
| }) |
| } |
| |
| template<typename xpu> |
| void SliceLikeBackward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 2U); |
| CHECK_EQ(req.size(), 2U); |
| using namespace mshadow; |
| Stream<xpu>* s = ctx.get_stream<xpu>(); |
| if (req[1] != kNullOp && req[1] != kAddTo) { |
| Fill(s, outputs[1], req[1], 0); // Second input not relavant to gradients. |
| } |
| if (req[0] == kNullOp) return; |
| const TBlob& ograd = inputs[0]; |
| const TBlob& igrad = outputs[0]; |
| const SliceLikeParam& param = nnvm::get<SliceLikeParam>(attrs.parsed); |
| if (req[0] == kWriteTo) { |
| Fill(s, igrad, req[0], 0); |
| } else if (req[0] == kWriteInplace) { |
| LOG(FATAL) << "_slice_like_backward does not support kWriteInplace"; |
| } |
| |
| const mxnet::TShape& ishape = ograd.shape_; |
| const mxnet::TShape& from_shape = outputs[1].shape_; |
| nnvm::Tuple<dmlc::optional<int>> param_begin; |
| nnvm::Tuple<dmlc::optional<int>> param_end; |
| nnvm::Tuple<dmlc::optional<int>> param_step; |
| SliceLikeInferRanges(ishape, from_shape, param.axes, ¶m_begin, ¶m_end, ¶m_step); |
| |
| MXNET_NDIM_SWITCH(ograd.ndim(), ndim, { |
| common::StaticArray<index_t, ndim> begin, end, step; |
| GetIndexRange(ograd.shape_, param_begin, param_end, param_step, &begin, &end, &step); |
| MSHADOW_TYPE_SWITCH(ograd.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { |
| int num_threads = ograd.shape_.FlatTo2D()[0]; |
| if (std::is_same<xpu, gpu>::value) { |
| num_threads *= ograd.shape_.get<ndim>()[ndim - 1]; |
| } |
| mxnet_op::Kernel<slice_assign<ndim, Req, xpu>, xpu>::Launch(s, num_threads, |
| igrad.dptr<DType>(), ograd.dptr<DType>(), |
| igrad.shape_.get<ndim>(), ograd.shape_.get<ndim>(), begin, step); |
| }) |
| }) |
| }) |
| } |
| |
| struct ClipParam : public dmlc::Parameter<ClipParam> { |
| real_t a_min, a_max; |
| DMLC_DECLARE_PARAMETER(ClipParam) { |
| DMLC_DECLARE_FIELD(a_min) |
| .describe("Minimum value"); |
| DMLC_DECLARE_FIELD(a_max) |
| .describe("Maximum value"); |
| } |
| }; |
| |
| |
| struct clip { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* datas, |
| DType a_min, DType a_max) { |
| DType data = datas[i]; |
| if (data > a_max) { |
| out[i] = a_max; |
| } else if (data < a_min) { |
| out[i] = a_min; |
| } else { |
| out[i] = data; |
| } |
| } |
| }; |
| |
| |
| struct clip_grad { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* grad, const DType* datas, |
| DType a_min, DType a_max) { |
| DType data = datas[i]; |
| if (data > a_max) { |
| out[i] = 0; |
| } else if (data < a_min) { |
| out[i] = 0; |
| } else { |
| out[i] = grad[i]; |
| } |
| } |
| }; |
| |
| |
| template<typename xpu> |
| void Clip(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| const ClipParam& param = nnvm::get<ClipParam>(attrs.parsed); |
| CHECK_EQ(inputs[0].type_flag_, outputs[0].type_flag_); |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| mxnet_op::Kernel<mxnet::op::clip, xpu>::Launch(s, outputs[0].Size(), outputs[0].dptr<DType>(), |
| inputs[0].dptr<DType>(), |
| DType(param.a_min), DType(param.a_max)); |
| }); |
| } |
| |
| template<typename xpu> |
| void ClipEx(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<NDArray>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<NDArray>& outputs) { |
| CHECK_EQ(inputs[0].dtype(), outputs[0].dtype()); |
| CHECK_EQ(inputs[0].storage_type(), outputs[0].storage_type()); |
| CHECK_NE(inputs[0].storage_type(), kDefaultStorage); |
| UnaryOp::MapToFCompute<xpu>(attrs, ctx, inputs, req, outputs, Clip<xpu>); |
| } |
| |
| template<typename xpu> |
| void ClipGrad_(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| using namespace mxnet_op; |
| const ClipParam& param = nnvm::get<ClipParam>(attrs.parsed); |
| CHECK_EQ(inputs[0].type_flag_, outputs[0].type_flag_); |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| Kernel<clip_grad, xpu>::Launch(s, outputs[0].Size(), outputs[0].dptr<DType>(), |
| inputs[0].dptr<DType>(), inputs[1].dptr<DType>(), DType(param.a_min), DType(param.a_max)); |
| }); |
| } |
| |
| /*! |
| * \brief The parameters of the repeat operator include |
| * the number of repeating time and axis (optional). |
| * The parameters will be later used to deduce the |
| * output ndarray shape in bool RepeatShape() function. |
| */ |
| struct RepeatParam : public dmlc::Parameter<RepeatParam> { |
| int repeats = 1; |
| dmlc::optional<int> axis; |
| DMLC_DECLARE_PARAMETER(RepeatParam) { |
| DMLC_DECLARE_FIELD(repeats) |
| .describe("The number of repetitions for each element."); |
| DMLC_DECLARE_FIELD(axis) |
| .set_default(dmlc::optional<int>()) |
| .describe("The axis along which to repeat values." |
| " The negative numbers are interpreted counting from the backward." |
| " By default, use the flattened input array," |
| " and return a flat output array."); |
| } |
| }; |
| |
| /*! |
| * \brief Helper function for getting user input params for the operator repeat. |
| * Sanity check the user input values. |
| */ |
| inline void GetRepeatParams(const RepeatParam& param, const mxnet::TShape& ishape, |
| int* repeats, dmlc::optional<int>* axisOpt) { |
| *repeats = param.repeats; |
| CHECK_GE(*repeats, 0) << "repeats cannot be a negative number"; |
| *axisOpt = param.axis; |
| if (static_cast<bool>(*axisOpt)) { |
| int ndims = static_cast<int>(ishape.ndim()); |
| int axis = axisOpt->value(); |
| if (axis < 0) { |
| axis += ndims; |
| } |
| CHECK(axis >= 0 && axis < ndims) << "axis = " << axisOpt->value() << " out of bounds"; |
| } |
| } |
| |
| inline bool RepeatOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const RepeatParam& param = nnvm::get<RepeatParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| const mxnet::TShape& ishape = (*in_attrs)[0]; |
| int repeats = 0; |
| dmlc::optional<int> axisOpt; |
| GetRepeatParams(param, ishape, &repeats, &axisOpt); |
| // If 0 repeats, return an empty 0 dim array |
| if (0 == repeats) { |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape()); |
| return true; |
| } |
| |
| // If repeats > 0, multiply the size of the corresponding axis by repeats |
| if (static_cast<bool>(axisOpt)) { |
| int ndims = static_cast<int>(ishape.ndim()); |
| int axis = axisOpt.value(); |
| if (axis < 0) { |
| axis += ndims; |
| } |
| mxnet::TShape shape(ishape.ndim()); |
| for (index_t i = 0; i < ishape.ndim(); ++i) { |
| if (static_cast<int>(i) == axis) { |
| shape[i] = static_cast<index_t>(repeats) * ishape[i]; |
| } else { |
| shape[i] = ishape[i]; |
| } |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); |
| } else { // If axis is not input by user, return a flat 1D array of size = in.size*repeats |
| mxnet::TShape shape(1); |
| shape[0] = ishape.Size() * static_cast<index_t>(repeats); |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); |
| } |
| return true; |
| } |
| |
| inline bool RepeatOpType(const nnvm::NodeAttrs& attrs, |
| std::vector<int>* in_attrs, |
| std::vector<int>* out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| if ((*in_attrs)[0] != -1) { |
| TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]); |
| } else if ((*out_attrs)[0] != -1) { |
| TYPE_ASSIGN_CHECK(*in_attrs, 0, (*out_attrs)[0]); |
| } |
| return true; |
| } |
| |
| /*! |
| * \brief Reshape the input and output tensors for |
| * using broadcast_to to achieve the funcitonality |
| * of operator repeat. |
| * \return a pair of mxnet::TShape's, first is the reshaped |
| * input shape, second is the reshaped output shape. |
| */ |
| inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForRepeatOp( |
| const mxnet::TShape& ishape, |
| const dmlc::optional<int>& axisOpt, |
| const int repeats) { |
| if (static_cast<bool>(axisOpt)) { |
| int axis = axisOpt.value(); |
| int ndim = static_cast<int>(ishape.ndim()); |
| if (axis < 0) { |
| axis += ndim; |
| } |
| CHECK(axis >= 0 && axis < static_cast<int>(ishape.ndim())) << "Invalid input of axis"; |
| |
| // reshape the input tensor by adding a dim at the (axis+1)-th dim |
| mxnet::TShape rshape(ishape.ndim()+1); |
| // the shape we want to broadcast to |
| mxnet::TShape bshape(rshape.ndim()); |
| int i = 0; |
| while (i <= axis) { |
| rshape[i] = bshape[i] = ishape[i]; |
| ++i; |
| } |
| rshape[i] = 1; |
| bshape[i] = repeats; |
| while (i < static_cast<int>(ishape.ndim())) { |
| rshape[i+1] = ishape[i]; |
| bshape[i+1] = ishape[i]; |
| ++i; |
| } |
| return std::make_pair(rshape, bshape); |
| } else { |
| // axis is not input by user |
| // reshape the tensor into shape (ishape.Size(), 1) |
| // then add one dim at axis = 1 and broadcast to |
| // shape (ishape.Size(), repeats) |
| mxnet::TShape rshape(2); |
| rshape[0] = ishape.Size(); |
| rshape[1] = 1; |
| |
| mxnet::TShape bshape(2); |
| bshape[0] = rshape[0]; |
| bshape[1] = repeats; |
| return std::make_pair(rshape, bshape); |
| } |
| } |
| |
| template<typename xpu> |
| void RepeatOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| const TBlob& iTBlob = inputs[0]; |
| const mxnet::TShape& ishape = iTBlob.shape_; |
| if (ishape.ndim() == 0) return; |
| |
| int repeats = 0; |
| dmlc::optional<int> axisOpt; |
| const RepeatParam& param = nnvm::get<RepeatParam>(attrs.parsed); |
| GetRepeatParams(param, ishape, &repeats, &axisOpt); |
| if (0 == repeats) return; |
| |
| std::pair<mxnet::TShape, mxnet::TShape> rshapes = \ |
| ReshapeInputOutputForRepeatOp(ishape, axisOpt, repeats); |
| |
| // reshaped input tblob |
| TBlob iblob(inputs[0].dptr_, rshapes.first, inputs[0].dev_mask(), |
| inputs[0].type_flag_, inputs[0].dev_id()); |
| std::vector<TBlob> newInputs = {iblob}; |
| |
| // reshaped output tblob |
| TBlob oblob(outputs[0].dptr_, rshapes.second, outputs[0].dev_mask(), |
| outputs[0].type_flag_, outputs[0].dev_id()); |
| std::vector<TBlob> newOutputs = {oblob}; |
| |
| BroadcastCompute<xpu>(attrs, ctx, newInputs, req, newOutputs); |
| } |
| |
| /*! |
| * \brief Compute the gradient of the loss function |
| * with respect to the input of the operator. |
| * Backpropagation is employed to implement the |
| * chain rule. |
| * \param inputs the gradient of the loss function |
| * with respect to the outputs of the operator |
| * \param outputs the gradient of the loss function |
| * with respect to the inputs of the operator |
| */ |
| template<typename xpu> |
| void RepeatOpBackward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| |
| const mxnet::TShape& oshape = outputs[0].shape_; |
| if (oshape.ndim() == 0) return; |
| |
| int repeats = 0; |
| dmlc::optional<int> axisOpt; |
| const RepeatParam& param = nnvm::get<RepeatParam>(attrs.parsed); |
| GetRepeatParams(param, oshape, &repeats, &axisOpt); |
| if (0 == repeats) return; |
| |
| std::pair<mxnet::TShape, mxnet::TShape> rshapes = |
| ReshapeInputOutputForRepeatOp(oshape, axisOpt, repeats); |
| |
| // reshaped output grad tblob |
| TBlob oblob(outputs[0].dptr_, rshapes.first, outputs[0].dev_mask(), |
| outputs[0].type_flag_, outputs[0].dev_id()); |
| std::vector<TBlob> newOutputs = {oblob}; |
| |
| // reshaped input grad tblob |
| TBlob iblob(inputs[0].dptr_, rshapes.second, inputs[0].dev_mask(), |
| inputs[0].type_flag_, inputs[0].dev_id()); |
| std::vector<TBlob> newInputs = {iblob}; |
| |
| ReduceAxesComputeImpl<xpu, mshadow::red::sum, false>( |
| ctx, newInputs, req, newOutputs, rshapes.first); |
| } |
| |
| struct TileParam : public dmlc::Parameter<TileParam> { |
| mxnet::TShape reps; |
| DMLC_DECLARE_PARAMETER(TileParam) { |
| DMLC_DECLARE_FIELD(reps) |
| .describe("The number of times for repeating the tensor a. Each dim size of reps" |
| " must be a positive integer." |
| " If reps has length d, the result will have dimension of max(d, a.ndim);" |
| " If a.ndim < d, a is promoted to be d-dimensional by prepending new axes." |
| " If a.ndim > d, reps is promoted to a.ndim by pre-pending 1's to it."); |
| } |
| }; |
| |
| inline bool TileOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| const TileParam& param = nnvm::get<TileParam>(attrs.parsed); |
| const mxnet::TShape& ishape = (*in_attrs)[0]; |
| const mxnet::TShape& reps = param.reps; |
| // If reps is empty, return a identical input array |
| if (reps.ndim() == 0 || ishape.ndim() == 0) { |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape); |
| return true; |
| } |
| for (size_t i = 0; i < reps.ndim(); ++i) { |
| CHECK_GT(reps[i], 0) << "invalid reps=" << i << ", dim size must be greater than zero"; |
| } |
| mxnet::TShape oshape(std::max(ishape.ndim(), reps.ndim())); |
| int i1 = static_cast<int>(ishape.ndim()) - 1; |
| int i2 = static_cast<int>(reps.ndim()) - 1; |
| for (int i = static_cast<int>(oshape.ndim()) - 1; i >= 0; --i) { |
| if (i1 >= 0 && i2 >= 0) { |
| oshape[i] = ishape[i1--] * reps[i2--]; |
| } else if (i1 >= 0) { |
| oshape[i] = ishape[i1--]; |
| } else if (i2 >= 0) { |
| oshape[i] = reps[i2--]; |
| } |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); |
| return true; |
| } |
| |
| inline bool TileOpType(const nnvm::NodeAttrs& attrs, |
| std::vector<int>* in_attrs, |
| std::vector<int>* out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| if ((*in_attrs)[0] != -1) { |
| TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]); |
| } else if ((*out_attrs)[0] != -1) { |
| TYPE_ASSIGN_CHECK(*in_attrs, 0, (*out_attrs)[0]); |
| } |
| return true; |
| } |
| |
| /*! |
| * \brief Reshape the input and output tensors for |
| * using broadcast_to to achieve the funcitonality |
| * of operator tile. |
| * \return a pair of mxnet::TShape's, first is the reshaped |
| * input shape, second is the reshaped output shape. |
| */ |
| inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForTileOp( |
| const mxnet::TShape& ishape, |
| const mxnet::TShape& reps) { |
| if (ishape.ndim() == 0 || reps.ndim() == 0) { |
| return std::make_pair(ishape, ishape); |
| } |
| |
| // The shape we want to broadcast to |
| mxnet::TShape bshape(std::max(ishape.ndim(), reps.ndim()) * 2); |
| |
| // The shape of the input tensor after adding new axes before each dim |
| mxnet::TShape rshape(bshape.ndim()); |
| |
| int i1 = static_cast<int>(ishape.ndim()) - 1; |
| int i2 = static_cast<int>(reps.ndim()) - 1; |
| for (int i = static_cast<int>(bshape.ndim()) - 1; i >= 0; --i) { |
| if (0 == (i & 1)) { |
| bshape[i] = (i2 >= 0? reps[i2--] : 1); |
| rshape[i] = 1; |
| } else { |
| rshape[i] = bshape[i] = (i1 >= 0? ishape[i1--] : 1); |
| } |
| } |
| |
| return std::make_pair(rshape, bshape); |
| } |
| |
| /*! |
| * \brief Implementation of tiling the input tensor a based |
| * on the user-input shape, reps. |
| * If a.ndim < reps.ndim, new axes are pre-pended to a. For example, |
| * the input tensor has shape (3,), and the reps is (2, 4); the input |
| * tensor would be reshaped to (1, 3). |
| * If a.ndim > reps.ndim, pre-pending 1's to reps. For example, |
| * the input tensor has shape (2, 3, 4, 5), and reps is (2, 2); |
| * the reps would be changed to (1, 1, 2, 2). |
| * Suppose we have a.ndim = reps.ndim now. To achieve tiling, |
| * we utilize the operator broadcast_to. For example, for a tensor |
| * of shape (2, 3, 4, 5) and reps (2, 8, 9, 3), we first reshape |
| * the tensor to the shape (1, 2, 1, 3, 1, 4, 1, 5) by adding |
| * one axis before each dimension. Then, we want to broadcast |
| * the new tensor to shape (2, 2, 8, 3, 9, 4, 3, 5). The final |
| * output tensor would have shape (2*2, 8*3, 9*4, 3*5). |
| */ |
| template<typename xpu> |
| void TileOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| |
| if (inputs[0].Size() == 0) return; |
| const mxnet::TShape& ishape = inputs[0].shape_; |
| const mxnet::TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps; |
| |
| // If any one of the number in reps is zero, return immediately |
| for (index_t i = 0; i < reps.ndim(); ++i) { |
| if (0 == reps[i]) return; |
| } |
| |
| std::pair<mxnet::TShape, mxnet::TShape> rshapes = ReshapeInputOutputForTileOp(ishape, reps); |
| |
| // reshaped input tblob |
| TBlob iblob(inputs[0].dptr_, rshapes.first, inputs[0].dev_mask(), |
| inputs[0].type_flag_, inputs[0].dev_id()); |
| std::vector<TBlob> newInputs = {iblob}; |
| // reshaped output tblob |
| TBlob oblob(outputs[0].dptr_, rshapes.second, outputs[0].dev_mask(), |
| outputs[0].type_flag_, outputs[0].dev_id()); |
| std::vector<TBlob> newOutputs = {oblob}; |
| |
| BroadcastCompute<xpu>(attrs, ctx, newInputs, req, newOutputs); |
| } |
| |
| /*! |
| * \brief Compute the gradient of the loss function |
| * with respect to the input of the operator. |
| * Backpropagation is employed to implement the |
| * chain rule. |
| * \param inputs the gradient of the loss function |
| * with respect to the outputs of the operator |
| * \param outputs the gradient of the loss function |
| * with respect to the inputs of the operator |
| */ |
| template<typename xpu> |
| void TileOpBackward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| |
| if (inputs[0].Size() == 0) return; |
| const mxnet::TShape& oshape = outputs[0].shape_; |
| const mxnet::TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps; |
| |
| // If any one of the number in reps is zero, return immediately |
| for (index_t i = 0; i < reps.ndim(); ++i) { |
| if (0 == reps[i]) return; |
| } |
| |
| std::pair<mxnet::TShape, mxnet::TShape> rshapes = ReshapeInputOutputForTileOp(oshape, reps); |
| |
| // reshaped output grad tblob |
| TBlob oblob(outputs[0].dptr_, rshapes.first, outputs[0].dev_mask(), |
| outputs[0].type_flag_, outputs[0].dev_id()); |
| std::vector<TBlob> newOutputs = {oblob}; |
| // reshaped input grad tblob |
| TBlob iblob(inputs[0].dptr_, rshapes.second, inputs[0].dev_mask(), |
| inputs[0].type_flag_, inputs[0].dev_id()); |
| std::vector<TBlob> newInputs = {iblob}; |
| |
| ReduceAxesComputeImpl<xpu, mshadow::red::sum, false>( |
| ctx, newInputs, req, newOutputs, rshapes.first); |
| } |
| |
| struct ReverseParam : public dmlc::Parameter<ReverseParam> { |
| nnvm::Tuple<int> axis; |
| DMLC_DECLARE_PARAMETER(ReverseParam) { |
| DMLC_DECLARE_FIELD(axis) |
| .describe("The axis which to reverse elements."); |
| } |
| }; |
| |
| |
| #define REVERSE_MAX_DIM 10U |
| |
| struct reverse { |
| MSHADOW_XINLINE static int ReverseIndex(index_t idx, |
| index_t nreversedim, |
| const index_t * stride_, |
| const index_t * trailing_) { |
| index_t outputIndex = idx; |
| for (index_t i = 0; i < nreversedim; ++i) { |
| const index_t low = outputIndex % trailing_[i]; |
| index_t high = outputIndex / trailing_[i]; |
| const index_t x = high%stride_[i]; |
| high /= stride_[i]; |
| outputIndex = (high*stride_[i] + stride_[i] - 1 - x)*trailing_[i] + low; |
| } |
| return outputIndex; |
| } |
| #ifdef __CUDACC__ |
| template<typename DType> |
| __device__ static void Map(index_t index, index_t nreversedim, const DType *src, DType *dst, |
| const index_t * stride_, |
| const index_t * trailing_) { |
| __shared__ index_t stride_share[REVERSE_MAX_DIM]; |
| __shared__ index_t trailing_share[REVERSE_MAX_DIM]; |
| if (threadIdx.x < REVERSE_MAX_DIM) { |
| stride_share[threadIdx.x] = stride_[threadIdx.x]; |
| trailing_share[threadIdx.x] = trailing_[threadIdx.x]; |
| } |
| __syncthreads(); |
| index_t new_idx = ReverseIndex(index, nreversedim, stride_share, trailing_share); |
| dst[new_idx] = src[index]; |
| } |
| #else |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t index, index_t nreversedim, const DType *src, DType *dst, |
| const index_t * stride_, |
| const index_t * trailing_) { |
| index_t new_idx = ReverseIndex(index, nreversedim, stride_, trailing_); |
| dst[new_idx] = src[index]; |
| } |
| #endif |
| }; |
| |
| |
| template<typename xpu> |
| void ReverseOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| using namespace mxnet_op; |
| const ReverseParam& param = nnvm::get<ReverseParam>(attrs.parsed); |
| CHECK_EQ(inputs[0].type_flag_, outputs[0].type_flag_); |
| CHECK_LT(param.axis.ndim(), REVERSE_MAX_DIM); |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| |
| const mxnet::TShape& ishape = inputs[0].shape_; |
| |
| std::vector<index_t> stride_(param.axis.ndim()); |
| std::vector<index_t> trailing_(param.axis.ndim()); |
| index_t reverse_index = 0; |
| for (int axis : param.axis) { |
| CHECK_LT(axis, static_cast<int>(ishape.ndim())); |
| stride_[reverse_index] = ishape[axis]; |
| trailing_[reverse_index] = 1; |
| for (index_t i2 = axis + 1; i2 < ishape.ndim(); ++i2) { |
| trailing_[reverse_index] *= ishape[i2]; |
| } |
| reverse_index++; |
| } |
| |
| #ifdef __CUDACC__ |
| mshadow::Tensor<xpu, 1, uint8_t> workspace = |
| ctx.requested[0].get_space_typed<xpu, 1, uint8_t>( |
| mshadow::Shape1(reverse_index * sizeof(index_t) * 2), s); |
| |
| auto stride_workspace = workspace.dptr_; |
| auto trailing_workspace = workspace.dptr_ + reverse_index * sizeof(index_t); |
| |
| cudaMemcpyAsync(stride_workspace, thrust::raw_pointer_cast(stride_.data()), |
| stride_.size() * sizeof(index_t), |
| cudaMemcpyHostToDevice, mshadow::Stream<gpu>::GetStream(s)); |
| cudaMemcpyAsync(trailing_workspace, thrust::raw_pointer_cast(trailing_.data()), |
| trailing_.size() * sizeof(index_t), |
| cudaMemcpyHostToDevice, mshadow::Stream<gpu>::GetStream(s)); |
| |
| #endif |
| |
| #ifdef __CUDACC__ |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| Kernel<reverse, xpu>::Launch(s, inputs[0].Size(), reverse_index, |
| inputs[0].dptr<DType>(), outputs[0].dptr<DType>(), |
| reinterpret_cast<index_t*>(stride_workspace), reinterpret_cast<index_t*>(trailing_workspace)); |
| }); |
| #else |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| Kernel<reverse, xpu>::Launch(s, inputs[0].Size(), reverse_index, |
| inputs[0].dptr<DType>(), outputs[0].dptr<DType>(), |
| stride_.data(), trailing_.data()); |
| }); |
| #endif |
| } |
| |
| |
| struct StackParam : public dmlc::Parameter<StackParam> { |
| int axis; |
| int num_args; |
| DMLC_DECLARE_PARAMETER(StackParam) { |
| DMLC_DECLARE_FIELD(axis) |
| .set_default(0) |
| .describe("The axis in the result array along which the input arrays are stacked."); |
| DMLC_DECLARE_FIELD(num_args).set_lower_bound(1) |
| .describe("Number of inputs to be stacked."); |
| } |
| }; |
| |
| |
| inline bool StackOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const StackParam& param = dmlc::get<StackParam>(attrs.parsed); |
| |
| mxnet::TShape dshape; |
| for (const mxnet::TShape& i : (*in_attrs)) { |
| shape_assign(&dshape, i); |
| } |
| if (dshape.ndim() == 0) return false; |
| |
| mxnet::TShape oshape(dshape.ndim() + 1); |
| int axis = CheckAxis(param.axis, oshape.ndim()); |
| for (int i = 0; i < axis; ++i) { |
| oshape[i] = dshape[i]; |
| } |
| oshape[axis] = param.num_args; |
| for (index_t i = axis + 1; i < oshape.ndim(); ++i) { |
| oshape[i] = dshape[i-1]; |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); |
| |
| return true; |
| } |
| |
| |
| template<typename xpu> |
| void StackOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| using namespace mshadow::expr; |
| const StackParam& param = dmlc::get<StackParam>(attrs.parsed); |
| int axis = CheckAxis(param.axis, outputs[0].ndim()); |
| |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { |
| std::vector<Tensor<xpu, 3, DType> > data(inputs.size()); |
| Tensor<xpu, 3, DType> out; |
| size_t leading = 1, trailing = 1; |
| for (int i = 0; i < axis; ++i) { |
| leading *= outputs[0].shape_[i]; |
| } |
| for (int i = axis + 1; i < outputs[0].ndim(); ++i) { |
| trailing *= outputs[0].shape_[i]; |
| } |
| size_t mid = outputs[0].shape_[axis]; |
| Shape<3> oshape = Shape3(leading, mid, trailing); |
| out = outputs[0].get_with_shape<xpu, 3, DType>(oshape, s); |
| |
| for (size_t i = 0; i < inputs.size(); ++i) { |
| Shape<3> dshape = Shape3(leading, 1, trailing); |
| data[i] = inputs[i].get_with_shape<xpu, 3, DType>(dshape, s); |
| } |
| Concatenate(data, &out, 1, req[0]); |
| }) |
| } |
| |
| template<typename xpu> |
| void StackOpBackward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| using namespace mshadow::expr; |
| const StackParam& param = dmlc::get<StackParam>(attrs.parsed); |
| int axis = CheckAxis(param.axis, inputs[0].ndim()); |
| |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, { |
| std::vector<Tensor<xpu, 3, DType> > grad_in(outputs.size()); |
| Tensor<xpu, 3, DType> grad; |
| size_t leading = 1, trailing = 1; |
| for (int i = 0; i < axis; ++i) { |
| leading *= inputs[0].shape_[i]; |
| } |
| for (int i = axis + 1; i < inputs[0].ndim(); ++i) { |
| trailing *= inputs[0].shape_[i]; |
| } |
| size_t mid = inputs[0].shape_[axis]; |
| Shape<3> oshape = Shape3(leading, mid, trailing); |
| grad = inputs[0].get_with_shape<xpu, 3, DType>(oshape, s); |
| |
| for (size_t i = 0; i < outputs.size(); ++i) { |
| Shape<3> dshape = Shape3(leading, 1, trailing); |
| grad_in[i] = outputs[i].get_with_shape<xpu, 3, DType>(dshape, s); |
| } |
| Split(grad, &grad_in, 1, req); |
| }) |
| } |
| |
| struct SqueezeParam : public dmlc::Parameter<SqueezeParam> { |
| dmlc::optional<mxnet::TShape> axis; |
| DMLC_DECLARE_PARAMETER(SqueezeParam) { |
| DMLC_DECLARE_FIELD(axis) |
| .set_default(dmlc::optional<mxnet::TShape>()) |
| .describe("Selects a subset of the single-dimensional entries in the shape." |
| " If an axis is selected with shape entry greater than one, an error is raised."); |
| } |
| }; |
| |
| // Given a shape that may have dim size equal to 0, |
| // move all the zeros to the last of the shape array |
| // and keep the relative order of the non-zero values. |
| // Returns the new shape size after moving all zeros to the end. |
| inline size_t SqueezeShapeHelper(mxnet::TShape* shape) { |
| CHECK(shape != nullptr); |
| size_t count = 0; |
| for (size_t i = 0; i < shape->ndim(); ++i) { |
| if ((*shape)[i] == 0) { |
| ++count; |
| } else { |
| std::swap((*shape)[i], (*shape)[i-count]); |
| } |
| } |
| return shape->ndim() - count; |
| } |
| |
| inline bool SqueezeShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector *in_attrs, |
| mxnet::ShapeVector *out_attrs) { |
| const SqueezeParam& param = nnvm::get<SqueezeParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; |
| CHECK_EQ(out_attrs->size(), 1U); |
| const mxnet::TShape& dshape = in_attrs->at(0); |
| const int dndim = dshape.ndim(); |
| if (shape_is_none(dshape)) return false; |
| mxnet::TShape oshape = dshape; |
| if (param.axis.has_value()) { |
| // preprocess axis |
| mxnet::TShape axes = param.axis.value(); |
| for (size_t i = 0; i < axes.ndim(); ++i) { |
| if (axes[i] < 0) { |
| axes[i] += dndim; |
| CHECK_GE(axes[i], 0) |
| << "axis " << axes[i] - dndim << " is out of bounds for array of dimension " << dndim; |
| } |
| CHECK_LT(axes[i], dndim) |
| << "axis " << axes[i] << " is out of bounds for array of dimension " << dndim; |
| CHECK_EQ(dshape[axes[i]], 1) |
| << "cannot select an axis to squeeze out which has size=" |
| << dshape[axes[i]] << " not equal to one"; |
| CHECK_NE(oshape[axes[i]], 0) << "duplicate value in axis"; |
| oshape[axes[i]] = 0; |
| } |
| } else { |
| for (size_t i = 0; i < oshape.ndim(); ++i) { |
| if (oshape[i] == 1) oshape[i] = 0; |
| } |
| } |
| size_t oshape_size = SqueezeShapeHelper(&oshape); |
| if (oshape_size == 0) { // corner case when dshape is (1, 1, 1, 1) |
| oshape[0] = 1; |
| oshape_size = 1; |
| } |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(oshape.data(), oshape.data()+oshape_size)); |
| return true; |
| } |
| |
| struct DepthToSpaceParam : public dmlc::Parameter<DepthToSpaceParam> { |
| int block_size; |
| DMLC_DECLARE_PARAMETER(DepthToSpaceParam) { |
| DMLC_DECLARE_FIELD(block_size) |
| .describe("Blocks of [block_size. block_size] are moved"); |
| } |
| }; |
| |
| inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector* in_attrs, |
| mxnet::ShapeVector* out_attrs) { |
| const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Depth To Space requires exactly 4D tensor"; |
| |
| mxnet::TShape expected_out(4); |
| |
| mxnet::TShape& in_shape = in_attrs->at(0); |
| int block = param.block_size; |
| CHECK_NE(block, 0) << "block_size must be a positive integer value"; |
| CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0"; |
| CHECK_EQ(in_shape[1] % (block * block), 0) |
| << "Cannot perform Depth To Space operation on the specified tensor." |
| " Dimension:1(depth dimension) should be a multiple of 'block^2'"; |
| CHECK_NE(in_shape[0], 0) |
| << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0"; |
| CHECK_NE(in_shape[2], 0) |
| << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0"; |
| CHECK_NE(in_shape[3], 0) |
| << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0"; |
| |
| expected_out[0] = in_shape[0]; |
| expected_out[1] = in_shape[1] / (block * block); |
| size_t i = 2; |
| while (i < expected_out.ndim()) { |
| expected_out[i] = in_shape[i] * block; |
| ++i; |
| } |
| |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out); |
| return true; |
| } |
| |
| inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs, |
| std::vector<int>* in_attrs, |
| std::vector<int>* out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| |
| TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); |
| TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); |
| return out_attrs->at(0) != -1; |
| } |
| |
| /*! |
| * \brief This function updates the value of input index from where the data element |
| * needs to be fetched and written out to the ith location in output tensor |
| * \param index_position index within offset array to get offset of given dimension |
| * \param dim_size size of current dimension |
| * \param idx output tensor index |
| * \param inp_index index within input tensor from where value is retrieved |
| * \param offset_arr array containing the linear offset of input tensor |
| */ |
| MSHADOW_XINLINE void update_index(index_t index_position, index_t dim_size, index_t *idx, |
| index_t *inp_index, const index_t* offset_arr) { |
| index_t next_idx_val = *idx / dim_size; |
| *inp_index += (*idx - next_idx_val * dim_size) * offset_arr[index_position]; |
| *idx = next_idx_val; |
| } |
| |
| /*! |
| * \brief This function performs the tensor transpose (0, 1, 2, 3, 4, 5) -> |
| * (0, 3, 4, 1, 5, 2) by computing linear index within input tensor to be mapped |
| * to the ith index of output tensor |
| * \param i tensor index |
| * \param out_data output tensor |
| * \param in_data input tensor |
| * \param block size of chunks to be moved out of depth dimension |
| * \param size array containing the size of each dimension of input tensor |
| * \param offset_arr array containing the linear offset of input tensor |
| */ |
| template<int req> |
| struct depth_to_space_forward { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data, |
| const int block, const index_t* size, const index_t* offset_arr) { |
| index_t inp_index = 0, idx = i, dim_size; |
| dim_size = block; |
| update_index(2, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = size[3]; |
| update_index(5, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = block; |
| update_index(1, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = size[2]; |
| update_index(4, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = size[1] / (block * block); |
| update_index(3, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = size[0]; |
| update_index(0, dim_size, &idx, &inp_index, offset_arr); |
| KERNEL_ASSIGN(out_data[i], req, in_data[inp_index]); |
| } |
| }; |
| |
| /*! |
| * \brief This function calculates the linear offset for each dimension of |
| * input tensor and stores them in an array, which is later used in |
| * performing depth_to_space operation |
| * \param i global thread id |
| * \param offset_arr array to be populated with offset values |
| * \param size array to be populated with size of each dimension of input tensor |
| * \param block size of chunks to be moved out of depth dimension |
| * \param size0 size of Dim 0 of input tensor |
| * \param size1 size of Dim 1 of input tensor |
| * \param size2 size of Dim 2 of input tensor |
| * \param size3 size of Dim 3 of input tensor |
| */ |
| template<int req> |
| struct compute_offset_for_depth_to_space { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* offset_arr, DType* size, const int block, |
| const index_t size0, const index_t size1, const index_t size2, |
| const index_t size3) { |
| size[0] = size0; |
| size[1] = size1; |
| size[2] = size2; |
| size[3] = size3; |
| |
| offset_arr[5] = 1; |
| offset_arr[4] = offset_arr[5] * size[3]; |
| offset_arr[3] = offset_arr[4] * size[2]; |
| offset_arr[2] = offset_arr[3] * size[1] / (block * block); |
| offset_arr[1] = offset_arr[2] * block; |
| offset_arr[0] = offset_arr[1] * block; |
| } |
| }; |
| |
| template<typename xpu> |
| void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| CHECK_EQ(req.size(), 1U); |
| mshadow::Stream<xpu> *s = ctx.get_stream<xpu>(); |
| const TBlob& in_data = inputs[0]; |
| const TBlob& out_data = outputs[0]; |
| const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed); |
| using namespace mxnet_op; |
| int block = param.block_size; |
| |
| mshadow::Tensor<xpu, 1, char> workspace = |
| ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(index_t) * 10), s); |
| char* workspace_curr_ptr = workspace.dptr_; |
| index_t* offset_arr = reinterpret_cast<index_t*>(workspace_curr_ptr); |
| index_t* size = reinterpret_cast<index_t*>(workspace_curr_ptr + sizeof(index_t) * 6); |
| |
| MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { |
| Kernel<compute_offset_for_depth_to_space<req_type>, xpu>::Launch( |
| s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1], |
| in_data.shape_[2], in_data.shape_[3]); |
| |
| Kernel<depth_to_space_forward<req_type>, xpu>::Launch( |
| s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(), |
| block, size, offset_arr); |
| }); |
| }); |
| } |
| |
| inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector* in_attrs, |
| mxnet::ShapeVector* out_attrs) { |
| const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Space To Depth requires exactly 4D tensor"; |
| |
| mxnet::TShape expected_out(in_attrs->at(0).ndim()); |
| |
| mxnet::TShape& in_shape = in_attrs->at(0); |
| int block = param.block_size; |
| CHECK_NE(block, 0) << "block_size must be a positive integer value"; |
| CHECK_NE(in_shape[0], 0) |
| << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0"; |
| CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0"; |
| CHECK_NE(in_shape[2], 0) |
| << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0"; |
| CHECK_EQ(in_shape[2] % block, 0) |
| << "Cannot perform Depth To Space operation on the specified tensor." |
| " Dimension:2(1st Space dimension) should be a multiple of 'block' "; |
| CHECK_NE(in_shape[3], 0) |
| << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0"; |
| CHECK_EQ(in_shape[3] % block, 0) |
| << "Cannot perform Depth To Space operation on the specified tensor." |
| " Dimension:3(2nd space dimension) should be a multiple of 'block' "; |
| |
| expected_out[0] = in_shape[0]; |
| expected_out[1] = in_shape[1] * block * block; |
| uint32_t i = 2; |
| while (i < expected_out.ndim()) { |
| expected_out[i] = in_shape[i] / block; |
| ++i; |
| } |
| |
| SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out); |
| return true; |
| } |
| |
| inline bool SpaceToDepthOpType(const nnvm::NodeAttrs& attrs, |
| std::vector<int>* in_attrs, |
| std::vector<int>* out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| CHECK_EQ(out_attrs->size(), 1U); |
| |
| TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); |
| TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); |
| return out_attrs->at(0) != -1; |
| } |
| |
| /*! |
| * \brief This function preforms the tensor transpose (0, 1, 2, 3, 4, 5) -> |
| * (0, 3, 5, 1, 2, 4) by computing linear index within input tensor to be mapped |
| * to the ith index of output tensor |
| * \param i tensor index |
| * \param out_data output tensor |
| * \param in_data input tensor |
| * \param block size of chunks to be moved out of depth dimension |
| * \param size array containing the size of each dimension of input tensor |
| * \param offset_arr array containing the linear offset of input tensor |
| */ |
| template<int req> |
| struct space_to_depth_forward { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data, const int block, |
| const index_t* size, const index_t* offset_arr) { |
| index_t inp_index = 0, idx = i, dim_size; |
| dim_size = size[3] / block; |
| update_index(4, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = size[2] / block; |
| update_index(2, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = size[1]; |
| update_index(1, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = block; |
| update_index(5, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = block; |
| update_index(3, dim_size, &idx, &inp_index, offset_arr); |
| dim_size = size[0]; |
| update_index(0, dim_size, &idx, &inp_index, offset_arr); |
| KERNEL_ASSIGN(out_data[i], req, in_data[inp_index]); |
| } |
| }; |
| |
| /*! |
| * \brief This function calculates the linear offset for each dimension of |
| * input tensor and stores them in an array, which is later used in |
| * performing space_to_depth operation |
| * \param i global thread id |
| * \param offset_arr array to be populated with offset values |
| * \param size array to be populated with size of each dimension of input tensor |
| * \param block size of chunks to be moved out of depth dimension |
| * \param size0 size of Dim 0 of input tensor |
| * \param size1 size of Dim 1 of input tensor |
| * \param size2 size of Dim 2 of input tensor |
| * \param size3 size of Dim 3 of input tensor |
| */ |
| template<int req> |
| struct compute_offset_for_space_to_depth { |
| template<typename DType> |
| MSHADOW_XINLINE static void Map(index_t i, DType* offset_arr, DType* size, const int block, |
| const index_t size0, const index_t size1, |
| const index_t size2, const index_t size3) { |
| size[0] = size0; |
| size[1] = size1; |
| size[2] = size2; |
| size[3] = size3; |
| |
| offset_arr[5] = 1; |
| offset_arr[4] = offset_arr[5] * block; |
| offset_arr[3] = offset_arr[4] * size[3] / block; |
| offset_arr[2] = offset_arr[3] * block; |
| offset_arr[1] = offset_arr[2] * size[2] / block; |
| offset_arr[0] = offset_arr[1] * size[1]; |
| } |
| }; |
| |
| template<typename xpu> |
| void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), 1U); |
| CHECK_EQ(req.size(), 1U); |
| mshadow::Stream<xpu> *s = ctx.get_stream<xpu>(); |
| const TBlob& in_data = inputs[0]; |
| const TBlob& out_data = outputs[0]; |
| const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed); |
| using namespace mxnet_op; |
| int block = param.block_size; |
| |
| mshadow::Tensor<xpu, 1, char> workspace = |
| ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(index_t) * 10), s); |
| char* workspace_curr_ptr = workspace.dptr_; |
| index_t* offset_arr = reinterpret_cast<index_t*>(workspace_curr_ptr); |
| index_t* size = reinterpret_cast<index_t*>(workspace_curr_ptr + sizeof(index_t) * 6); |
| |
| MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { |
| MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { |
| Kernel<compute_offset_for_space_to_depth<req_type>, xpu>::Launch( |
| s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1], |
| in_data.shape_[2], in_data.shape_[3]); |
| Kernel<space_to_depth_forward<req_type>, xpu>::Launch( |
| s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(), |
| block, size, offset_arr); |
| }); |
| }); |
| } |
| |
| namespace split_enum { |
| enum SplitOpInputs {kData}; |
| } // namespace split_enum |
| |
| struct SplitParam : public dmlc::Parameter<SplitParam> { |
| mxnet::TShape indices; |
| int axis; |
| bool squeeze_axis; |
| int sections; |
| DMLC_DECLARE_PARAMETER(SplitParam) { |
| DMLC_DECLARE_FIELD(indices) |
| .describe("Indices of splits. The elements should denote the boundaries of at which split" |
| " is performed along the `axis`."); |
| DMLC_DECLARE_FIELD(axis).set_default(1) |
| .describe("Axis along which to split."); |
| DMLC_DECLARE_FIELD(squeeze_axis).set_default(0) |
| .describe("If true, Removes the axis with length 1 from the shapes of the output arrays." |
| " **Note** that setting `squeeze_axis` to ``true`` removes axis with length 1" |
| " only along the `axis` which it is split." |
| " Also `squeeze_axis` can be set to ``true``" |
| " only if ``input.shape[axis] == num_outputs``."); |
| DMLC_DECLARE_FIELD(sections).set_default(0) |
| .describe("Number of sections if equally splitted. Default to 0 which means split by indices."); |
| } |
| }; // struct SplitParam |
| |
| inline mxnet::TShape GetSplitIndices(const mxnet::TShape& ishape, int axis, int sections) { |
| mxnet::TShape indices(sections+1); |
| indices[0] = 0; |
| int64_t section_size = ishape[axis] / sections; |
| for (int i = 0; i < sections; ++i) { |
| indices[i+1] = section_size * (i + 1); |
| } |
| return indices; |
| } |
| |
| inline bool SplitOpType(const nnvm::NodeAttrs& attrs, |
| std::vector<int>* in_attrs, |
| std::vector<int>* out_attrs) { |
| CHECK_EQ(in_attrs->size(), 1U); |
| int dtype = (*in_attrs)[0]; |
| CHECK_NE(dtype, -1) << "First input must have specified type"; |
| const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed); |
| out_attrs->clear(); |
| int num_outputs = (param.sections > 0) ? param.sections : param.indices.ndim(); |
| for (int i = 0; i < num_outputs; ++i) { |
| out_attrs->push_back(dtype); |
| } |
| return true; |
| } |
| |
| inline bool SplitOpShape(const nnvm::NodeAttrs& attrs, |
| mxnet::ShapeVector* in_attrs, |
| mxnet::ShapeVector* out_attrs) { |
| using namespace mshadow; |
| const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed); |
| CHECK_EQ(in_attrs->size(), 1U); |
| mxnet::TShape dshape = in_attrs->at(split_enum::kData); |
| mxnet::TShape ishape = in_attrs->at(split_enum::kData); |
| if (dshape.ndim() == 0) return false; |
| if (param.axis >= 0) { |
| CHECK_LT(static_cast<size_t>(param.axis), dshape.ndim()); |
| } else { |
| CHECK_LT(param.axis + dshape.ndim(), dshape.ndim()); |
| } |
| int real_axis = param.axis; |
| if (real_axis < 0) { |
| real_axis += dshape.ndim(); |
| } |
| const mxnet::TShape indices = |
| (param.sections > 0) ? GetSplitIndices(ishape, real_axis, param.sections) : param.indices; |
| int num_outputs = (param.sections > 0) ? indices.ndim() - 1 : indices.ndim(); |
| // Pre-compute squeezed output shape for future usage |
| mxnet::TShape squeezed_dshape = dshape; |
| for (int d = real_axis; d < static_cast<int>(squeezed_dshape.ndim()) - 1; ++d) { |
| squeezed_dshape[d] = squeezed_dshape[d+1]; |
| } |
| squeezed_dshape = mxnet::TShape(&squeezed_dshape[0], &squeezed_dshape[squeezed_dshape.ndim()-1]); |
| // Assign shape to every output |
| for (int i = 0; i < num_outputs; ++i) { |
| int start = indices[i]; |
| int end = (i < num_outputs - 1) ? indices[i + 1] : ishape[real_axis]; |
| CHECK(start < end) |
| << "start " << start << " is not less than end " << end << "for subarray " << i; |
| CHECK(end <= ishape[real_axis]) |
| << "end " << end << " is no less than the size of the axis " << ishape[real_axis]; |
| dshape[real_axis] = (end - start); |
| if (param.squeeze_axis) { |
| CHECK_EQ(end - start, 1U) << "expected axis size of 1 but got " << end - start; |
| SHAPE_ASSIGN_CHECK(*out_attrs, i, squeezed_dshape); |
| } else { |
| SHAPE_ASSIGN_CHECK(*out_attrs, i, dshape); |
| } |
| } |
| mxnet::TShape back_calculate_dshape = ishape; |
| back_calculate_dshape[real_axis] = 0; |
| for (int d = 0; d < real_axis; ++d) { |
| back_calculate_dshape[d] = (*out_attrs)[0][d]; |
| } |
| if (param.squeeze_axis) { |
| back_calculate_dshape[real_axis] = num_outputs; |
| } else { |
| for (int i = 0; i < num_outputs; ++i) { |
| back_calculate_dshape[real_axis] += (*out_attrs)[i][real_axis]; |
| } |
| } |
| for (int d = real_axis + 1; d < static_cast<int>(ishape.ndim()); ++d) { |
| if (param.squeeze_axis) { |
| back_calculate_dshape[d] = (*out_attrs)[0][d - 1]; |
| } else { |
| back_calculate_dshape[d] = (*out_attrs)[0][d]; |
| } |
| } |
| SHAPE_ASSIGN_CHECK(*in_attrs, split_enum::kData, back_calculate_dshape); |
| return true; |
| } |
| |
| struct SplitKernel { |
| /*! |
| * \brief Map function for forward split_v2 operator |
| * \param i global thread id |
| * \param in_data ptr to input buffer |
| * \param out_data ptr to ptr of outputs buffer |
| * \param indices ptr to indices buffer |
| * \param num_sections # of sections after split |
| * \param axis_size size of axis to be splitted on |
| * \param trailing_size step size within the data buffer of the axis to be splitted on |
| */ |
| template<typename DType> |
| static MSHADOW_XINLINE void Map(size_t i, |
| const DType *in_data, DType** out_data, const size_t* indices, |
| const size_t num_sections, const size_t axis_size, |
| const size_t trailing_size) { |
| size_t idx = i / trailing_size % axis_size; |
| size_t target = 0; |
| for (size_t section = 0; |
| section < num_sections && indices[section] <= idx; |
| target = section++) {} |
| DType* target_data = out_data[target]; |
| const size_t mid_idx = idx - indices[target]; |
| const size_t head_idx = i / (trailing_size * axis_size); |
| const size_t tail_idx = i % trailing_size; |
| const size_t section_size = indices[target + 1] - indices[target]; |
| const size_t target_idx = |
| head_idx * trailing_size * section_size + mid_idx * trailing_size + tail_idx; |
| target_data[target_idx] = in_data[i]; |
| } |
| }; |
| |
| struct ConcatenateKernel { |
| /*! |
| * \brief Map function for backward split_v2 operator |
| * \param i global thread id |
| * \param out_grad ptr to ptr of out grads buffer |
| * \param in_grad ptr to input grad buffer |
| * \param indices ptr to indices buffer |
| * \param num_sections # of sections after split |
| * \param axis_size size of axis to be splitted on |
| * \param trailing_size step size within the data buffer of the axis to be splitted on |
| */ |
| template<typename DType> |
| static MSHADOW_XINLINE void Map(size_t i, |
| DType** out_grad, DType* in_grad, const size_t* indices, |
| const size_t num_sections, const size_t axis_size, |
| const size_t trailing_size) { |
| size_t idx = i / trailing_size % axis_size; |
| size_t src = 0; |
| for (size_t section = 0; |
| section < num_sections && indices[section] <= idx; |
| src = section++) {} |
| DType* src_grad = out_grad[src]; |
| const size_t mid_idx = idx - indices[src]; |
| const size_t head_idx = i / (trailing_size * axis_size); |
| const size_t tail_idx = i % trailing_size; |
| const size_t section_size = indices[src + 1] - indices[src]; |
| const size_t src_idx = |
| head_idx * trailing_size * section_size + mid_idx * trailing_size + tail_idx; |
| in_grad[i] = src_grad[src_idx]; |
| } |
| }; |
| |
| template<typename xpu> |
| inline void SplitOpForward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| using namespace mshadow::expr; |
| using namespace mxnet_op; |
| const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed); |
| CHECK_EQ(inputs.size(), 1U); |
| CHECK_EQ(outputs.size(), (param.sections > 0) ? param.sections : param.indices.ndim()); |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| const TBlob& input_data = inputs[split_enum::kData]; |
| size_t leading = 1, trailing = 1; |
| int real_axis = param.axis; |
| if (real_axis < 0) { |
| real_axis += input_data.ndim(); |
| } |
| CHECK_LT(real_axis, input_data.ndim()); |
| size_t mid = input_data.shape_[real_axis]; |
| for (int i = 0; i < real_axis; ++i) { |
| leading *= input_data.shape_[i]; |
| } |
| for (int i = real_axis + 1; i < input_data.ndim(); ++i) { |
| trailing *= input_data.shape_[i]; |
| } |
| |
| size_t workspace_size = 0; |
| const mxnet::TShape& ishape = input_data.shape_; |
| const mxnet::TShape split_pts = |
| (param.sections > 0) ? GetSplitIndices(ishape, real_axis, param.sections) : param.indices; |
| std::vector<size_t> indices; |
| for (const auto& section : split_pts) { |
| indices.push_back(section); |
| } |
| if (param.sections == 0) { |
| indices.push_back(ishape[real_axis]); |
| } |
| workspace_size += indices.size() * sizeof(size_t); |
| MSHADOW_TYPE_SWITCH(input_data.type_flag_, DType, { |
| std::vector<DType*> output_data; |
| for (const TBlob& data : outputs) { |
| output_data.push_back(data.dptr<DType>()); |
| } |
| workspace_size += output_data.size() * sizeof(DType*); |
| Tensor<xpu, 1, char> workspace = |
| ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(workspace_size), s); |
| Tensor<cpu, 1, size_t> indices_cpu_tensor(indices.data(), Shape1(indices.size())); |
| Tensor<xpu, 1, size_t> indices_xpu_tensor( |
| reinterpret_cast<size_t*>(workspace.dptr_), Shape1(indices.size())); |
| Tensor<cpu, 1, DType*> ptrs_cpu_tensor(output_data.data(), Shape1(output_data.size())); |
| Tensor<xpu, 1, DType*> ptrs_xpu_tensor( |
| reinterpret_cast<DType**>(workspace.dptr_ + indices.size() * sizeof(size_t)), |
| Shape1(output_data.size())); |
| mshadow::Copy(indices_xpu_tensor, indices_cpu_tensor, s); |
| mshadow::Copy(ptrs_xpu_tensor, ptrs_cpu_tensor, s); |
| Kernel<SplitKernel, xpu>::Launch( |
| s, input_data.Size(), input_data.dptr<DType>(), ptrs_xpu_tensor.dptr_, |
| indices_xpu_tensor.dptr_, indices.size() - 1, mid, trailing); |
| }); |
| } |
| |
| template<typename xpu> |
| inline void SplitOpBackward(const nnvm::NodeAttrs& attrs, |
| const OpContext& ctx, |
| const std::vector<TBlob>& inputs, |
| const std::vector<OpReqType>& req, |
| const std::vector<TBlob>& outputs) { |
| using namespace mshadow; |
| using namespace mshadow::expr; |
| using namespace mxnet_op; |
| const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed); |
| CHECK_EQ(inputs.size(), (param.sections > 0) ? param.sections : param.indices.ndim()) |
| << "out grad vector size mush match the output size"; |
| CHECK_EQ(outputs.size(), 1U); |
| Stream<xpu> *s = ctx.get_stream<xpu>(); |
| TBlob input_grad = outputs[split_enum::kData]; |
| size_t leading = 1, trailing = 1; |
| int real_axis = param.axis; |
| if (real_axis < 0) { |
| real_axis += input_grad.ndim(); |
| } |
| CHECK_LT(real_axis, input_grad.ndim()); |
| size_t mid = input_grad.shape_[real_axis]; |
| for (int i = 0; i < real_axis; ++i) { |
| leading *= input_grad.shape_[i]; |
| } |
| for (int i = real_axis + 1; i < input_grad.ndim(); ++i) { |
| trailing *= input_grad.shape_[i]; |
| } |
| |
| size_t workspace_size = 0; |
| const mxnet::TShape& ishape = input_grad.shape_; |
| const mxnet::TShape split_pts = |
| (param.sections > 0) ? GetSplitIndices(ishape, real_axis, param.sections) : param.indices; |
| std::vector<size_t> indices; |
| for (const auto& section : split_pts) { |
| indices.push_back(section); |
| } |
| if (param.sections == 0) { |
| indices.push_back(ishape[real_axis]); |
| } |
| workspace_size += indices.size() * sizeof(size_t); |
| MSHADOW_TYPE_SWITCH(input_grad.type_flag_, DType, { |
| std::vector<DType*> out_grads; |
| for (const TBlob& output_grad : inputs) { |
| out_grads.push_back(output_grad.dptr<DType>()); |
| } |
| workspace_size += out_grads.size() * sizeof(DType*); |
| Tensor<xpu, 1, char> workspace = |
| ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(workspace_size), s); |
| Tensor<cpu, 1, size_t> indices_cpu_tensor(indices.data(), Shape1(indices.size())); |
| Tensor<xpu, 1, size_t> indices_xpu_tensor( |
| reinterpret_cast<size_t*>(workspace.dptr_), Shape1(indices.size())); |
| Tensor<cpu, 1, DType*> ptrs_cpu_tensor(out_grads.data(), Shape1(inputs.size())); |
| Tensor<xpu, 1, DType*> ptrs_xpu_tensor( |
| reinterpret_cast<DType**>(workspace.dptr_ + indices.size() * sizeof(size_t)), |
| Shape1(inputs.size())); |
| mshadow::Copy(indices_xpu_tensor, indices_cpu_tensor, s); |
| mshadow::Copy(ptrs_xpu_tensor, ptrs_cpu_tensor, s); |
| Kernel<ConcatenateKernel, xpu>::Launch( |
| s, input_grad.Size(), ptrs_xpu_tensor.dptr_, input_grad.dptr<DType>(), |
| indices_xpu_tensor.dptr_, indices.size() - 1, mid, trailing); |
| }); |
| } |
| |
| inline uint32_t SplitNumOutputs(const NodeAttrs& attrs) { |
| const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed); |
| return (param.sections > 0) ? param.sections : param.indices.ndim(); |
| } |
| |
| } // namespace op |
| } // namespace mxnet |
| |
| #endif // MXNET_OPERATOR_TENSOR_MATRIX_OP_INL_H_ |