src/operator/tensor/init_op.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file init_op.h
  * \brief Function definition of initialization op
  */
 #ifndef MXNET_OPERATOR_TENSOR_INIT_OP_H_
 #define MXNET_OPERATOR_TENSOR_INIT_OP_H_

 #include <mxnet/base.h>
 #include <mxnet/operator_util.h>
 #include <mxnet/op_attr_types.h>
 #include <mxnet/imperative.h>
 #include <dmlc/parameter.h>
 #include <dmlc/optional.h>
 #include <vector>
 #include <string>
 #include <algorithm>
 #include <limits>
 #include "../../api/operator/op_utils.h"
 #include "../../common/utils.h"
 #include "../mshadow_op.h"
 #include "../elemwise_op_common.h"
 #include "../mxnet_op.h"
 #include "../mshadow_op.h"
 #include "../../api/operator/op_utils.h"

 namespace mxnet {
 namespace op {

 struct InitOpParam : public dmlc::Parameter<InitOpParam> {
   mxnet::TShape shape;
   std::string ctx;
   int dtype;
   DMLC_DECLARE_PARAMETER(InitOpParam) {
     DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape(0, 1)).describe("The shape of the output");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(dtype).set_default(-1).add_enum("None", -1)
         MXNET_ADD_ALL_TYPES_EXT_WITH_BOOL.describe("Target data type.");
   }
   void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
     std::ostringstream shape_s;
     shape_s << shape;
     (*dict)["shape"] = shape_s.str();
     (*dict)["dtype"] = MXNetTypeWithBool2String(dtype);
     // We do not set ctx, because ctx has been set in dict instead of InitOpParam.
     // Setting ctx here results in an error.
   }
 };

 struct InitOpWithoutDTypeParam : public dmlc::Parameter<InitOpWithoutDTypeParam> {
   mxnet::TShape shape;
   std::string ctx;
   int dtype;
   DMLC_DECLARE_PARAMETER(InitOpWithoutDTypeParam) {
     DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape()).describe("The shape of the output");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(dtype).set_default(-1).describe("Target data type.");
   }
 };

 struct FullLikeOpParam : public dmlc::Parameter<FullLikeOpParam> {
   double fill_value;
   std::string ctx;
   dmlc::optional<int> dtype;
   DMLC_DECLARE_PARAMETER(FullLikeOpParam) {
     DMLC_DECLARE_FIELD(fill_value).describe("Value with which to fill newly created tensor");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(dtype).set_default(dmlc::optional<int>())
         MXNET_ADD_ALL_TYPES_EXT_WITH_BOOL.describe("Target data type.");
   }
   void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
     std::ostringstream fill_value_s, dtype_s;
     fill_value_s << fill_value;
     dtype_s << dtype;
     (*dict)["fill_value"] = fill_value_s.str();
     if (dtype.has_value()) {
       (*dict)["dtype"] = MXNetTypeWithBool2String(dtype.value());
     } else {
       (*dict)["dtype"] = dtype_s.str();
     }
   }
 };

 /*! \brief Infer type of FullLikeOpCompute*/
 template <typename ParamType>
 inline bool FullLikeOpType(const nnvm::NodeAttrs& attrs,
                            std::vector<int>* in_attrs,
                            std::vector<int>* out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   if (param.dtype.has_value()) {
     TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value());
   } else {
     TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
   }
   return out_attrs->at(0) != -1;
 }

 struct EyeParam : public dmlc::Parameter<EyeParam> {
   nnvm::dim_t N;
   nnvm::dim_t M;
   nnvm::dim_t k;
   std::string ctx;
   int dtype;

   DMLC_DECLARE_PARAMETER(EyeParam) {
     DMLC_DECLARE_FIELD(N).describe("Number of rows in the output.");
     DMLC_DECLARE_FIELD(M).set_default(0).describe(
         "Number of columns in the output. If 0, defaults to N");
     DMLC_DECLARE_FIELD(k).set_default(0).describe(
         "Index of the diagonal. 0 (the default) refers to the main diagonal."
         "A positive value refers to an upper diagonal."
         "A negative value to a lower diagonal.");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(dtype).set_default(-1).add_enum("None", -1)
         MXNET_ADD_ALL_TYPES.describe("Target data type.");
   }
 };

 template <typename ParamType>
 inline bool InitEyeShape(const nnvm::NodeAttrs& attrs,
                          mxnet::ShapeVector* in_attrs,
                          mxnet::ShapeVector* out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape2(param.N, param.M > 0 ? param.M : param.N));
   return true;
 }

 template <int req>
 struct eye_dns_fill {
   template <typename DType>
   MSHADOW_XINLINE static void Map(int i,
                                   DType* out_data,
                                   const nnvm::dim_t init_col,
                                   const nnvm::dim_t k,
                                   const nnvm::dim_t num_cols) {
     KERNEL_ASSIGN(
         out_data[(i + init_col - k) * num_cols + i + init_col], req, static_cast<DType>(1));
   }
 };

 struct RangeParam : public dmlc::Parameter<RangeParam> {
   double start;
   dmlc::optional<double> stop;
   double step;
   int repeat;
   bool infer_range;
   std::string ctx;
   int dtype;
   DMLC_DECLARE_PARAMETER(RangeParam) {
     DMLC_DECLARE_FIELD(start).describe(
         "Start of interval. The interval includes this value. The default start value is 0.");
     DMLC_DECLARE_FIELD(stop)
         .set_default(dmlc::optional<double>())
         .describe(
             "End of interval. The interval does not include this value,"
             " except in some cases where step is not an integer and"
             " floating point round-off affects the length of out.");
     DMLC_DECLARE_FIELD(step).set_default(1).describe("Spacing between values.");
     DMLC_DECLARE_FIELD(repeat).set_default(1).describe(
         "The repeating time of all elements."
         " E.g repeat=3, the element a will be repeated three times --> a, a, a.");
     DMLC_DECLARE_FIELD(infer_range)
         .set_default(false)
         .describe(
             "When set to True, infer the stop position from the start, step, "
             "repeat, and output tensor size.");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(dtype).set_default(-1).add_enum("None", -1)
         MXNET_ADD_ALL_TYPES.describe("Target data type.");
   }
   void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
     std::ostringstream start_s, stop_s, step_s, repeat_s, infer_range_s, dtype_s;
     start_s << start;
     stop_s << stop;
     step_s << step;
     repeat_s << repeat;
     infer_range_s << infer_range;
     dtype_s << dtype;
     (*dict)["start"]       = start_s.str();
     (*dict)["stop"]        = stop_s.str();
     (*dict)["step"]        = step_s.str();
     (*dict)["repeat"]      = repeat_s.str();
     (*dict)["infer_range"] = infer_range_s.str();
     (*dict)["dtype"]       = MXNetTypeWithBool2String(dtype);
   }
 };

 struct RangeLikeParam : public dmlc::Parameter<RangeLikeParam> {
   double start;
   double step;
   int repeat;
   std::string ctx;
   dmlc::optional<int> axis;

   DMLC_DECLARE_PARAMETER(RangeLikeParam) {
     DMLC_DECLARE_FIELD(start).set_default(0).describe(
         "Start of interval. The interval includes this value. The default start value is 0.");
     DMLC_DECLARE_FIELD(step).set_default(1).describe("Spacing between values.");
     DMLC_DECLARE_FIELD(repeat).set_default(1).describe(
         "The repeating time of all elements."
         " E.g repeat=3, the element a will be repeated three times --> a, a, a.");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(axis)
         .set_default(dmlc::optional<int>())
         .describe(
             "Arange elements according to the size of a certain axis of input array."
             " The negative numbers are interpreted counting from the backward."
             " If not provided, will arange elements according to the input shape.");
   }
   void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
     std::ostringstream start_s, step_s, repeat_s, axis_s;
     start_s << start;
     step_s << step;
     repeat_s << repeat;
     axis_s << axis;
     (*dict)["start"]  = start_s.str();
     (*dict)["step"]   = step_s.str();
     (*dict)["repeat"] = repeat_s.str();
     (*dict)["axis"]   = axis_s.str();
   }
 };

 /*! \brief Initialize and fill output with an arbitrary value */
 struct InitOpWithScalarParam : dmlc::Parameter<InitOpWithScalarParam> {
   mxnet::TShape shape;
   std::string ctx;
   int dtype;
   double value;
   DMLC_DECLARE_PARAMETER(InitOpWithScalarParam) {
     DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape()).describe("The shape of the output");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(dtype).set_default(-1).add_enum("None", -1)
         MXNET_ADD_ALL_TYPES_EXT_WITH_BOOL.describe("Target data type.");
     DMLC_DECLARE_FIELD(value).describe("Value with which to fill newly created tensor");
   }

   void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
     std::ostringstream shape_s, dtype_s, value_s;
     shape_s << shape;
     dtype_s << dtype;
     value_s << value;
     (*dict)["shape"] = shape_s.str();
     (*dict)["dtype"] = MXNetTypeWithBool2String(dtype);
     (*dict)["value"] = value_s.str();
     // We do not set ctx, because ctx has been set in dict instead of InitOpParam.
     // Setting ctx here results in an error.
   }
 };

 /*! \brief Parse keyword arguments as PType arguments and save to parsed */
 inline void RangeParamParser(nnvm::NodeAttrs* attrs) {
   RangeParam param;
   param.Init(attrs->dict);
   if (!static_cast<bool>(param.infer_range) && !static_cast<bool>(param.stop)) {
     param.stop  = param.start;
     param.start = 0;
   }
   attrs->parsed = std::move(param);
 }

 struct LinspaceParam : public dmlc::Parameter<LinspaceParam> {
   double start;
   double stop;
   index_t num;
   bool endpoint;
   std::string ctx;
   int dtype;
   DMLC_DECLARE_PARAMETER(LinspaceParam) {
     DMLC_DECLARE_FIELD(start).describe("The starting value of the sequence.");
     DMLC_DECLARE_FIELD(stop).describe("The ending value of the sequence");
     DMLC_DECLARE_FIELD(num).describe("Number of samples to generate. Must be non-negative.");
     DMLC_DECLARE_FIELD(endpoint).set_default(true).describe(
         "If True, stop is the last sample. Otherwise, it is not included.");
     DMLC_DECLARE_FIELD(ctx).set_default("").describe(
         "Context of output, in format [cpu|gpu|cpu_pinned](n)."
         "Only used for imperative calls.");
     DMLC_DECLARE_FIELD(dtype).set_default(-1).add_enum("None", -1)
         MXNET_ADD_ALL_TYPES_EXT_WITH_BOOL.describe("Target data type.");
   }
   void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
     std::ostringstream start_s, stop_s, num_s, endpoint_s, dtype_s;
     start_s << start;
     stop_s << stop;
     num_s << num;
     endpoint_s << endpoint;
     dtype_s << dtype;
     (*dict)["start"]    = start_s.str();
     (*dict)["stop"]     = stop_s.str();
     (*dict)["num"]      = num_s.str();
     (*dict)["endpoint"] = endpoint_s.str();
     (*dict)["dtype"]    = MXNetTypeWithBool2String(dtype);
   }
 };

 template <typename ParamType>
 inline bool InitShape(const nnvm::NodeAttrs& attrs,
                       mxnet::ShapeVector* in_attrs,
                       mxnet::ShapeVector* out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape param_shape = param.shape;
   if (shape_is_known(param_shape) && !features::is_enabled(features::INT64_TENSOR_SIZE)) {
     CHECK_LT(param_shape.Size(), (int64_t{1} << 31) - 1)
         << "[InitShape-input] Size of tensor you are trying to allocate is larger than "
            "2^31 elements. Please build with flag USE_INT64_TENSOR_SIZE=1";
   }
   if (!Imperative::Get()->is_np_shape()) {
     common::ConvertToNumpyShape(&param_shape);
   }
   if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param_shape)) {
     if (!features::is_enabled(features::INT64_TENSOR_SIZE)) {
       CHECK_LT(out_attrs->at(0).Size(), (int64_t{1} << 31) - 1)
           << "[InitShape-output] Size of tensor you are trying to allocate is larger than "
              "2^31 elements. Please build with flag USE_INT64_TENSOR_SIZE=1";
     }
     return true;
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, param_shape);
   return shape_is_known(out_attrs->at(0));
 }

 template <typename ParamType, int num_in = 0U>
 inline bool InitType(const nnvm::NodeAttrs& attrs,
                      std::vector<int>* in_attrs,
                      std::vector<int>* out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), num_in);
   CHECK_EQ(out_attrs->size(), 1U);
   TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype);
   return true;
 }

 template <typename ParamType, int num_in = 0U>
 inline bool InitNumpyType(const nnvm::NodeAttrs& attrs,
                           std::vector<int>* in_attrs,
                           std::vector<int>* out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), num_in);
   CHECK_EQ(out_attrs->size(), 1U);
   TYPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::common::GetDefaultDtype(param.dtype));
   return true;
 }

 template <typename ParamType, bool rsp, bool csr>
 inline bool InitStorageType(const nnvm::NodeAttrs& attrs,
                             const int dev_mask,
                             DispatchMode* dispatch_mode,
                             std::vector<int>* in_attrs,
                             std::vector<int>* out_attrs) {
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
   auto& out_stype = out_attrs->at(0);
   bool dispatched = false;
   type_assign(&out_stype, kDefaultStorage);
   if (!dispatched && out_stype == kDefaultStorage) {
     // default
     dispatched =
         storage_type_assign(out_attrs, kDefaultStorage, dispatch_mode, DispatchMode::kFCompute);
   }
   if (!dispatched && rsp && out_stype == kRowSparseStorage) {
     // rsp
     dispatched =
         storage_type_assign(out_attrs, kRowSparseStorage, dispatch_mode, DispatchMode::kFComputeEx);
   }
   if (!dispatched && csr && out_stype == kCSRStorage) {
     // csr
     dispatched =
         storage_type_assign(out_attrs, kCSRStorage, dispatch_mode, DispatchMode::kFComputeEx);
   }
   if (!dispatched) {
     dispatched = dispatch_fallback(out_attrs, dispatch_mode);
   }
   return dispatched;
 }

 /*!
  * \brief General-purpose blob value-filling function
  * \tparam xpu cpu or gpu
  * \tparam ValueType Data type of supplied value
  * \tparam is_integer Whether to optimize for an integer value
  * \param s Stream
  * \param b The blob to fill with a value
  * \param req Request type (kNullOp, kWriteTo, etc)
  * \param val The value to use for the filling operation
  */
 template <bool is_integer = false, typename ValueType, typename xpu>
 void Fill(mshadow::Stream<xpu>* s, const TBlob& b, const OpReqType req, ValueType val) {
   // If b is a zero-size tensor, do nothing.
   if (!features::is_enabled(features::INT64_TENSOR_SIZE)) {
     CHECK_LT(b.Size(), (int64_t{1} << 31) - 1)
         << "[Fill] Size of tensor you are trying to allocate is larger than "
            "2^31 elements. Please build with flag USE_INT64_TENSOR_SIZE=1";
   }
   if (b.Size() == 0)
     return;
   if (req != kNullOp) {
     const size_t size = b.Size();
     if (val == 0) {
       if (req != kAddTo) {
         if (b.dev_mask() == cpu::kDevMask && size < 50000) {
           MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(
               b.type_flag_, DType, { memset(b.dptr_, 0, size * sizeof(DType)); });
         } else {
           // Optimize common use-case of filling with ones
           MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(b.type_flag_, DType, {
             MXNET_ASSIGN_REQ_SWITCH(req, Req, {
               mxnet_op::Kernel<mxnet_op::op_with_req<mxnet_op::set_to_int<0>, Req>, xpu>::Launch(
                   s, b.Size(), b.dptr<DType>());
             });
           });
         }
       }
     } else if (is_integer && val == 1) {
       // Optimize common use-case of filling with ones
       MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(b.type_flag_, DType, {
         MXNET_ASSIGN_REQ_SWITCH(req, Req, {
           mxnet_op::Kernel<mxnet_op::op_with_req<mxnet_op::set_one, Req>, xpu>::Launch(
               s, b.Size(), b.dptr<DType>());
         });
       });
     } else {
       // Generic fill kernel from variable
       MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(b.type_flag_, DType, {
         MXNET_ASSIGN_REQ_SWITCH(req, Req, {
           mxnet_op::Kernel<mxnet_op::op_with_req<mshadow_op::identity, Req>, xpu>::Launch(
               s, b.Size(), b.dptr<DType>(), static_cast<DType>(val));
         });
       });
     }
   }
 }

 /*! \brief Fill output with a scalar integer value */
 template <typename xpu, int value>
 void FillCompute(const nnvm::NodeAttrs& attrs,
                  const OpContext& ctx,
                  const std::vector<TBlob>& inputs,
                  const std::vector<OpReqType>& req,
                  const std::vector<TBlob>& outputs) {
   Fill<true>(ctx.get_stream<xpu>(), outputs[0], req[0], value);
 }

 /*! \brief Fill output with a scalar integer value */
 template <typename xpu>
 void FullLikeOpCompute(const nnvm::NodeAttrs& attrs,
                        const OpContext& ctx,
                        const std::vector<TBlob>& inputs,
                        const std::vector<OpReqType>& req,
                        const std::vector<TBlob>& outputs) {
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
   const auto& param = nnvm::get<FullLikeOpParam>(attrs.parsed);
   Fill<false>(ctx.get_stream<xpu>(), outputs[0], req[0], param.fill_value);
 }

 /*! \brief Fill output with an arbitrary value */
 template <typename xpu>
 void InitFillWithScalarCompute(const nnvm::NodeAttrs& attrs,
                                const OpContext& ctx,
                                const std::vector<TBlob>& inputs,
                                const std::vector<OpReqType>& req,
                                const std::vector<TBlob>& outputs) {
   CHECK_EQ(inputs.size(), 0);
   CHECK_EQ(outputs.size(), 1U);
   const auto& param = nnvm::get<InitOpWithScalarParam>(attrs.parsed);
   Fill<false>(ctx.get_stream<xpu>(), outputs[0], req[0], param.value);
 }

 struct PopulateFullIdxRspKernel : public mxnet_op::tunable {
   template <typename IType>
   MSHADOW_XINLINE static void Map(int i, IType* out) {
     KERNEL_ASSIGN(out[i], kWriteTo, i);
   }
 };

 // Fill in the indices and values of a RowSparse NDArray to represent a zeros NDArray,
 // instead of the usual compact representation.
 template <typename xpu>
 inline void FillDnsZerosRspImpl(mshadow::Stream<xpu>* s, NDArray* dst) {
   using namespace rowsparse;
   using namespace mshadow::expr;
   using namespace mshadow;
   using namespace mxnet_op;
   CHECK_EQ(dst->storage_type(), kRowSparseStorage);
   MSHADOW_IDX_TYPE_SWITCH(dst->aux_type(kIdx), IType, {
     const index_t num_rows = dst->shape()[0];
     dst->CheckAndAlloc({Shape1(num_rows)});
     Fill<true>(s, dst->data(), kWriteTo, 0);
     auto idx = dst->aux_data(kIdx).FlatTo1D<xpu, IType>(s);
     Kernel<PopulateFullIdxRspKernel, xpu>::Launch(s, num_rows, idx.dptr_);
   });
 }

 /*!
  * \brief Fill a rsp NDArray with zeros by updating the aux shape.
  * \tparam xpu - cpu or gpu
  * \param s - The device stream
  * \param dst - NDArray which is to be set to "all zeroes"
  */
 template <typename xpu>
 void FillZerosRspImpl(mshadow::Stream<xpu>*, const NDArray& dst) {
   CHECK_EQ(dst.storage_type(), kRowSparseStorage) << "dst should be an RSP NDArray";
   if (dst.storage_initialized()) {
     // reset the shapes if it's not zeros (set_aux_shape() will set storage_shape to zero as well)
     dst.set_aux_shape(rowsparse::kIdx, mxnet::TShape(mshadow::Shape1(0)));
   }
 }

 /*!
  * \brief Fill a CSR NDArray with zeros by updating the aux shape
  * \param s - The device stream
  * \param dst - NDArray which is to be set to "all zeroes"
  */
 inline void FillZerosCsrImpl(mshadow::Stream<mshadow::cpu>* s, const NDArray& dst) {
   CHECK_EQ(dst.storage_type(), kCSRStorage) << "dst is not a CSR NDArray";
   dst.set_aux_shape(csr::kIdx, mshadow::Shape1(0));
   dst.CheckAndAllocAuxData(csr::kIndPtr, mshadow::Shape1(dst.shape()[0] + 1));
   TBlob indptr_data = dst.aux_data(csr::kIndPtr);
   Fill<true>(s, dst.aux_data(csr::kIndPtr), kWriteTo, 0);
 }
 void FillZerosCsrImpl(mshadow::Stream<mshadow::gpu>* s, const NDArray& dst);

 /*!
  * \brief Fill an NDArray with zeros
  * \tparam xpu - cpu or gpu
  * \param attrs  - node attributes (unused)
  * \param ctx - Device context
  * \param inputs - NDArray inputs (unused)
  * \param req - Request type (i.e. kWrite, kNullOp, etc.)
  * \param outputs - Array which contains at position zero (0) the array to be set to zeros
  */
 template <typename xpu>
 void FillComputeZerosEx(const nnvm::NodeAttrs& attrs,
                         const OpContext& ctx,
                         const std::vector<NDArray>& inputs,
                         const std::vector<OpReqType>& req,
                         const std::vector<NDArray>& outputs) {
   using namespace mshadow;
   using namespace mshadow::expr;
   Stream<xpu>* s = ctx.get_stream<xpu>();
   CHECK_EQ(outputs.size(), 1);
   auto stype = outputs[0].storage_type();
   // x + 0 == x
   if (req[0] == kNullOp || req[0] == kAddTo)
     return;
   if (stype == kRowSparseStorage) {
     FillZerosRspImpl(s, outputs[0]);
   } else if (stype == kCSRStorage) {
     FillZerosCsrImpl(s, outputs[0]);
   } else {
     LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
   }
 }

 template <typename xpu>
 inline void EyeFillImpl(const TBlob& out_data,
                         const OpContext& ctx,
                         const std::vector<OpReqType>& req,
                         const nnvm::dim_t num_cols,
                         const nnvm::dim_t N,
                         const nnvm::dim_t k) {
   using namespace mxnet_op;
   const nnvm::dim_t cnnz  = std::max(num_cols - std::abs(k), (nnvm::dim_t)0);
   const nnvm::dim_t rnnz  = std::max(N - std::abs(k), (nnvm::dim_t)0);
   const nnvm::dim_t nnz   = k > 0 ? std::min(cnnz, N) : std::min(rnnz, num_cols);
   mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
   MSHADOW_TYPE_SWITCH_EXT(out_data.type_flag_, DType, {
     MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
       Fill(s, out_data, req[0], static_cast<DType>(0));
       if (nnz > 0) {
         Kernel<eye_dns_fill<req_type>, xpu>::Launch(
             s, nnz, out_data.dptr<DType>(), std::max(static_cast<nnvm::dim_t>(0), k), k, num_cols);
       }
     });
   });
 }

 template <typename xpu>
 void EyeFill(const nnvm::NodeAttrs& attrs,
              const OpContext& ctx,
              const std::vector<TBlob>& inputs,
              const std::vector<OpReqType>& req,
              const std::vector<TBlob>& outputs) {
   CHECK_EQ(inputs.size(), 0U);
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
   const EyeParam& param      = nnvm::get<EyeParam>(attrs.parsed);
   const TBlob& out_data      = outputs[0];
   const nnvm::dim_t num_cols = param.M > 0 ? param.M : param.N;
   EyeFillImpl<xpu>(out_data, ctx, req, num_cols, param.N, param.k);
 }

 struct range_fwd {
   template <typename DType>
   MSHADOW_XINLINE static void Map(index_t i,
                                   index_t repeat,
                                   DType start,
                                   DType step,
                                   int req,
                                   DType* out) {
     KERNEL_ASSIGN(out[i], req, start + (i / repeat) * step);
   }
 };

 template <typename xpu, typename ParamType>
 void RangeCompute(const nnvm::NodeAttrs& attrs,
                   const OpContext& ctx,
                   const std::vector<TBlob>& inputs,
                   const std::vector<OpReqType>& req,
                   const std::vector<TBlob>& outputs) {
   using namespace mxnet_op;
   Stream<xpu>* s         = ctx.get_stream<xpu>();
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   MSHADOW_TYPE_SWITCH_EXT(outputs[0].type_flag_, DType, {
     // Force unsigned params to take two's complement form on ARM to ensure consistency with x86
     // results.  Casting negative floats to unsigned types is undefined in the CPP standard.
     auto step  = std::is_signed<DType>() ? param.step : static_cast<index_t>(param.step);
     auto start = std::is_signed<DType>() ? param.start : static_cast<index_t>(param.start);
     Kernel<range_fwd, xpu>::Launch(s,
                                    outputs[0].Size(),
                                    static_cast<int>(param.repeat),
                                    static_cast<DType>(start),
                                    static_cast<DType>(step),
                                    req[0],
                                    outputs[0].dptr<DType>());
   });
 }

 inline bool RangeShape(const nnvm::NodeAttrs& attrs,
                        mxnet::ShapeVector* in_attrs,
                        mxnet::ShapeVector* out_attrs) {
   const RangeParam& param = nnvm::get<RangeParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_NE(param.step, 0) << "Range does not support step=0, received " << param.step;
   CHECK(param.repeat > 0) << "Range only supports repeat > 0, received " << param.repeat;
   if (param.infer_range && !param.stop.has_value()) {
     return false;
   }
   if (param.step > 0) {
     CHECK(param.start < param.stop.value())
         << "Invalid range (start, stop, step) = "
         << "(" << param.start << "," << param.stop.value() << "," << param.step << ")";
   } else {
     CHECK(param.start > param.stop.value())
         << "Invalid range (start, stop, step)= "
         << "(" << param.start << "," << param.stop.value() << "," << param.step << ")";
   }
   const double out_size = std::ceil((param.stop.value() - param.start) / param.step) * param.repeat;
   mxnet::TShape output_shape = mxnet::TShape({static_cast<nnvm::dim_t>(out_size)});
   if (!features::is_enabled(features::INT64_TENSOR_SIZE)) {
     CHECK_LT(output_shape.Size(), (int64_t{1} << 31) - 1)
         << "[RangeShape] Size of tensor you are trying to allocate is larger than "
            "2^31 elements. Please build with flag USE_INT64_TENSOR_SIZE=1";
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, output_shape);
   return true;
 }

 struct linspace_fwd {
   template <typename DType>
   MSHADOW_XINLINE static void Map(index_t i,
                                   double start,
                                   double stop,
                                   double step,
                                   int req,
                                   DType* out) {
     KERNEL_ASSIGN(out[i], req, static_cast<DType>(start + step * i));
   }
 };

 template <typename xpu>
 void LinspaceCompute(const nnvm::NodeAttrs& attrs,
                      const OpContext& ctx,
                      const std::vector<TBlob>& inputs,
                      const std::vector<OpReqType>& req,
                      const std::vector<TBlob>& outputs) {
   using namespace mxnet_op;
   Stream<xpu>* s             = ctx.get_stream<xpu>();
   const LinspaceParam& param = nnvm::get<LinspaceParam>(attrs.parsed);
   MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(outputs[0].type_flag_, DType, {
     index_t step_num = param.endpoint ? param.num - 1 : param.num;
     double step      = step_num > 0 ? (param.stop - param.start) / step_num : 0.0f;
     Kernel<linspace_fwd, xpu>::Launch(
         s, outputs[0].Size(), param.start, param.stop, step, req[0], outputs[0].dptr<DType>());
   });
 }

 inline bool LinspaceShape(const nnvm::NodeAttrs& attrs,
                           mxnet::ShapeVector* in_attrs,
                           mxnet::ShapeVector* out_attrs) {
   const LinspaceParam& param = nnvm::get<LinspaceParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_GE(param.num, 0) << "Number of sequence should be non-negative, received " << param.num;
   mxnet::TShape shape = mxnet::TShape({static_cast<nnvm::dim_t>(param.num)});
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape);
   return true;
 }

 inline bool RangeLikeShape(const nnvm::NodeAttrs& attrs,
                            mxnet::ShapeVector* in_attrs,
                            mxnet::ShapeVector* out_attrs) {
   const RangeLikeParam& param = nnvm::get<RangeLikeParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   int real_axis = -1;
   if (param.axis.has_value()) {
     real_axis =
         param.axis.value() < 0 ? (param.axis.value() + (*in_attrs)[0].ndim()) : param.axis.value();
     CHECK(real_axis >= 0 && real_axis < (*in_attrs)[0].ndim())
         << "cannot handle param.axis " << param.axis.value() << ".";
   }
   if (real_axis == -1) {
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]);
   } else {
     const index_t out_size = (*in_attrs)[0][real_axis];
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({static_cast<nnvm::dim_t>(out_size)}));
   }
   return true;
 }

 }  // namespace op
 }  // namespace mxnet

 #endif  // MXNET_OPERATOR_TENSOR_INIT_OP_H_