tests/cpp/include/test_util.h - mxnet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file test_util.h
  * \brief unit test performance analysis functions
  * \author Chris Olivier
  */
 #ifndef TEST_UTIL_H_
 #define TEST_UTIL_H_

 #include <gtest/gtest.h>
 #include <mxnet/storage.h>
 #include <mxnet/ndarray.h>
 #include <string>
 #include <vector>
 #include <sstream>
 #include <random>

 #include "../../../src/ndarray/ndarray_function.h"

 #if MXNET_USE_VTUNE
 #include <ittnotify.h>
 #endif

 namespace mxnet {
 namespace test {

 extern bool unitTestsWithCuda;
 extern bool debug_output;
 extern bool quick_test;
 extern bool performance_run;
 extern bool csv;
 extern bool thread_safety_force_cpu;

 template <typename DType>
 inline size_t shapeMemorySize(const mxnet::TShape& shape) {
   return shape.Size() * sizeof(DType);
 }

 class BlobMemory {
  public:
   explicit inline BlobMemory(const bool isGPU) : isGPU_(isGPU) {
     this->handle_.dptr = nullptr;
   }
   inline ~BlobMemory() {
     Free();
   }
   void* Alloc(const size_t size) {
     CHECK_GT(size, 0U);  // You've probably made a mistake
     mxnet::Context context = isGPU_ ? mxnet::Context::GPU(0) : mxnet::Context{};
     Storage* storage       = mxnet::Storage::Get();
     handle_                = storage->Alloc(size, context);
     return handle_.dptr;
   }
   void Free() {
     mxnet::Storage::Get()->DirectFree(handle_);
     handle_.dptr = nullptr;
     handle_.size = 0;
   }
   size_t Size() const {
     return handle_.size;
   }

  private:
   const bool isGPU_;
   Storage::Handle handle_;
 };

 class StandaloneBlob : public TBlob {
  public:
   inline StandaloneBlob(const mxnet::TShape& shape, const bool isGPU, const int dtype)
       : TBlob(nullptr, shape, isGPU ? gpu::kDevMask : cpu::kDevMask, dtype),
         memory_(std::make_shared<BlobMemory>(isGPU)) {
     MSHADOW_TYPE_SWITCH(
         dtype, DType, { this->dptr_ = memory_->Alloc(shapeMemorySize<DType>(shape)); });
   }
   inline ~StandaloneBlob() {
     this->dptr_ = nullptr;
   }
   inline size_t MemorySize() const {
     return memory_->Size();
   }

  private:
   /*! \brief Locally allocated memory block for this blob */
   std::shared_ptr<BlobMemory> memory_;
 };

 /*!
  * \brief Access a TBlob's data on the CPU within the scope of this object
  * Overloaded () operator returns the CPU-bound TBlob
  * RAII will copy the data back to the GPU (if it was a GPU blob)
  */
 class CAccessAsCPU {
  public:
   CAccessAsCPU(const RunContext& run_ctx, const TBlob& src, bool copy_back_result = true)
       : run_ctx_(run_ctx), src_(src), copy_back_result_(copy_back_result) {
 #if MXNET_USE_CUDA
     if (run_ctx.ctx.dev_type == Context::kCPU) {
       blob_ = src;
     } else {
       Context cpu_ctx, gpu_ctx = run_ctx.ctx;
       cpu_ctx.dev_type = Context::kCPU;
       cpu_ctx.dev_id   = 0;
       NDArray on_cpu(src.shape_, cpu_ctx, false, src_.type_flag_);
       on_cpu.CheckAndAlloc();
       blob_ = on_cpu.data();
       run_ctx.get_stream<gpu>()->Wait();
       mxnet::ndarray::Copy<gpu, cpu>(src, &blob_, cpu_ctx, gpu_ctx, run_ctx);
       run_ctx.get_stream<gpu>()->Wait();
       on_cpu_ = on_cpu;
     }
 #else
     blob_ = src;
 #endif
   }
   ~CAccessAsCPU() {
 #if MXNET_USE_CUDA
     if (copy_back_result_) {
       // Copy back from GPU to CPU
       if (run_ctx_.ctx.dev_type == Context::kGPU) {
         Context cpu_ctx, gpu_ctx = run_ctx_.ctx;
         cpu_ctx.dev_type = Context::kCPU;
         cpu_ctx.dev_id   = 0;
         run_ctx_.get_stream<gpu>()->Wait();
         mxnet::ndarray::Copy<cpu, gpu>(blob_, &src_, gpu_ctx, cpu_ctx, run_ctx_);
         run_ctx_.get_stream<gpu>()->Wait();
       }
     }
 #endif
   }
   inline const TBlob& operator()() const {
     return blob_;
   }

  private:
   const RunContext run_ctx_;
   TBlob src_;
   const bool copy_back_result_;
   NDArray on_cpu_;
   TBlob blob_;
 };

 /*!
  * \brief Access data blob as if on the CPU via a callback
  * \tparam Type of callback Function to call with CPU-data NDArray
  * \param src Source NDArray (on GPU or CPU)
  * \param run_ctx Run context
  * \param cb Callback Function to call with CPU-data NDArray
  */
 template <typename CallbackFunction>
 inline void AccessAsCPU(const NDArray& src, const RunContext& run_ctx, CallbackFunction cb) {
 #if MXNET_USE_CUDA
   if (src.ctx().dev_type == Context::kCPU) {
     cb(src);
   } else {
     Context cpu_ctx, gpu_ctx = src.ctx();
     cpu_ctx.dev_type = Context::kCPU;
     cpu_ctx.dev_id   = 0;
     NDArray on_cpu(src.shape(), cpu_ctx, false, src.dtype());
     on_cpu.CheckAndAlloc();
     TBlob tmp1 = on_cpu.data();
     run_ctx.get_stream<gpu>()->Wait();
     mxnet::ndarray::Copy<gpu, cpu>(src.data(), &tmp1, cpu_ctx, gpu_ctx, run_ctx);
     run_ctx.get_stream<gpu>()->Wait();
     cb(on_cpu);
     TBlob tmp2 = src.data();
     mxnet::ndarray::Copy<cpu, gpu>(on_cpu.data(), &tmp2, gpu_ctx, cpu_ctx, run_ctx);
     run_ctx.get_stream<gpu>()->Wait();
   }
 #else
   cb(src);
 #endif
 }

 /*!
  * \brief Access data blob as if on the CPU via a callback
  * \tparam Type of callback Function to call with CPU-data NDArray
  * \param src Source TBlob (on GPU or CPU)
  * \param run_ctx Run context
  * \param cb Callback Function to call with CPU-data TBlob
  */
 template <typename CallbackFunction>
 inline void AccessAsCPU(const TBlob& src, const RunContext& run_ctx, CallbackFunction cb) {
 #if MXNET_USE_CUDA
   if (run_ctx.ctx.dev_type == Context::kCPU) {
     cb(src);
   } else {
     cb(CAccessAsCPU(run_ctx, src, true)());
   }
 #else
   cb(src);
 #endif
 }

 constexpr const size_t MPRINT_PRECISION = 5;
 template <typename DType>
 inline void fill(const RunContext& run_ctx, const TBlob& _blob, const DType val) {
   AccessAsCPU(_blob, run_ctx, [val](const TBlob& blob) {
     MSHADOW_TYPE_SWITCH(blob.type_flag_, DTypeX, {
       DTypeX* p1 = blob.dptr<DTypeX>();
       for (size_t i = 0, n = blob.Size(); i < n; ++i) {
         *p1++ = val;
       }
     });
   });
 }

 template <typename DType>
 inline void try_fill(const RunContext& run_ctx, const TBlob* blob, const DType val) {
   if (blob) {
     fill(run_ctx, *blob, val);
   }
 }

 template <typename DType, typename Stream>
 inline void dump(Stream* os, const TBlob& blob, const char* suffix = "f") {
   DType* p1 = blob.dptr<DType>();
   for (size_t i = 0, n = blob.Size(); i < n; ++i) {
     if (i) {
       *os << ", ";
     }
     const DType val = *p1++;

     std::stringstream stream;
     stream << val;
     std::string ss = stream.str();
     if (suffix && *suffix == 'f') {
       if (std::find(ss.begin(), ss.end(), '.') == ss.end()) {
         ss += ".0";
       }
     }
     *os << ss << suffix;
   }
 }

 /*! \brief Return reference to data at position indexes */
 inline index_t getMult(const mxnet::TShape& shape, const index_t axis) {
   return axis < shape.ndim() ? shape[axis] : 1;
 }

 /*! \brief offset, given indices such as bn, channel, depth, row, column */
 inline index_t offset(const mxnet::TShape& shape, const std::vector<size_t>& indices) {
   const size_t dim = shape.ndim();
   CHECK_LE(indices.size(), dim);
   size_t offset = 0;
   for (size_t i = 0; i < dim; ++i) {
     offset *= shape[i];
     if (indices.size() > i) {
       CHECK_LT(indices[i], shape[i]);
       offset += indices[i];
     }
   }
   return offset;
 }

 /*! \brief Return reference to data at position indexes */
 template <typename DType>
 inline const DType& data_at(const TBlob* blob, const std::vector<size_t>& indices) {
   return blob->dptr<DType>()[offset(blob->shape_, indices)];
 }

 /*! \brief Set data at position indexes */
 template <typename DType>
 inline DType& data_ref(const TBlob* blob, const std::vector<size_t>& indices) {
   return blob->dptr<DType>()[offset(blob->shape_, indices)];
 }

 inline std::string repeatedStr(const char* s,
                                const signed int count,
                                const bool trailSpace = false) {
   if (count <= 0) {
     return std::string();
   } else if (count == 1) {
     std::stringstream str;
     str << s << " ";
     return str.str();
   } else {
     std::stringstream str;
     for (int x = 0; x < count; ++x) {
       str << s;
     }
     if (trailSpace) {
       str << " ";
     }
     return str.str();
   }
 }

 /*! \brief Pretty print a shape with optional label */
 template <typename StreamType>
 inline StreamType& print_shape(StreamType* _os,
                                const std::string& label,
                                const mxnet::TShape& shape,
                                const bool add_endl = true) {
   if (!label.empty()) {
     *_os << label << ": ";
   }
   *_os << "(";
   for (size_t i = 0, n = shape.ndim(); i < n; ++i) {
     if (i) {
       *_os << ", ";
     }
     *_os << shape[i];
   }
   *_os << ")";
   if (add_endl) {
     *_os << std::endl;
   } else {
     *_os << " ";
   }
   return *_os << std::flush;
 }

 /*! \brief Pretty print a 1D, 2D, or 3D blob */
 template <typename DType, typename StreamType>
 inline StreamType& print_blob_(const RunContext& ctx,
                                StreamType* _os,
                                const TBlob& blob,
                                const bool doChannels = true,
                                const bool doBatches  = true,
                                const bool add_endl   = true) {
 #if MXNET_USE_CUDA
   if (blob.dev_mask() == gpu::kDevMask) {
     return print_blob_<DType>(
         ctx, _os, CAccessAsCPU(ctx, blob, false)(), doChannels, doBatches, add_endl);
   }
 #endif  // MXNET_USE_CUDA

   StreamType& os   = *_os;
   const size_t dim = static_cast<size_t>(blob.ndim());

   if (dim == 1) {
     // probably a 1d tensor (mshadow::Tensor is deprecated)
     TBlob changed(blob.dptr<DType>(), mxnet::TShape(3, -1), blob.dev_mask(), blob.dev_id());
     changed.shape_[0] = 1;
     changed.shape_[1] = 1;
     changed.shape_[2] = blob.shape_[0];
     return print_blob_<DType>(ctx, &os, changed, false, false, add_endl);
   } else if (dim == 2) {
     // probably a 2d tensor (mshadow::Tensor is deprecated)
     TBlob changed(blob.dptr<DType>(), mxnet::TShape(4, -1), blob.dev_mask(), blob.dev_id());
     changed.shape_[0] = 1;
     changed.shape_[1] = 1;
     changed.shape_[2] = blob.shape_[0];
     changed.shape_[3] = blob.shape_[1];
     return print_blob_<DType>(ctx, &os, changed, false, false, add_endl);
   }
   CHECK_GE(dim, 3U) << "Invalid dimension zero (0)";

   const size_t batchSize = blob.size(0);

   size_t channels = 1;
   size_t depth    = 1;
   size_t height   = 1;
   size_t width    = 1;
   if (dim > 1) {
     channels = blob.size(1);
     if (dim > 2) {
       if (dim == 3) {
         width = blob.size(2);
       } else if (dim == 4) {
         height = blob.size(2);
         width  = blob.size(3);
       } else {
         depth = blob.size(2);
         if (dim > 3) {
           height = blob.size(3);
           if (dim > 4) {
             width = blob.size(4);
           }
         }
       }
     }
   }

   for (size_t r = 0; r < height; ++r) {
     for (size_t thisBatch = 0; thisBatch < batchSize; ++thisBatch) {
       if (doBatches) {
         std::stringstream ss;
         if (doBatches && !thisBatch) {
           os << "|";
         }
         ss << "N" << thisBatch << "| ";
         const std::string nns = ss.str();
         if (!r) {
           os << nns;
         } else {
           os << repeatedStr(" ", nns.size());
         }
       }
       for (size_t thisChannel = 0; thisChannel < channels; ++thisChannel) {
         os << "[";
         for (size_t c = 0; c < width; ++c) {
           if (c) {
             os << ", ";
           }
           for (size_t dd = 0; dd < depth; ++dd) {
             DType val;
             switch (dim) {
               case 3:
                 val = data_at<DType>(&blob, {thisBatch, thisChannel, c});
                 break;
               case 4:
                 val = data_at<DType>(&blob, {thisBatch, thisChannel, r, c});
                 break;
               case 5:
                 val = data_at<DType>(&blob, {thisBatch, thisChannel, dd, r, c});
                 break;
               default:
                 LOG(FATAL) << "Unsupported blob dimension" << dim;
                 val = DType(0);
                 break;
             }
             os << repeatedStr("(", dd);
             os << std::fixed << std::setw(7) << std::setprecision(MPRINT_PRECISION) << std::right
                << val << " ";
             os << repeatedStr(")", dd, true);
           }
         }
         os << "]  ";
         if (!doChannels) {
           break;
         }
       }
       if (!doBatches) {
         break;
       } else {
         os << " |" << std::flush;
       }
     }
     if (r < height - 1) {
       os << std::endl;
     }
   }
   if (!height) {
     os << "[]";
     if (add_endl) {
       os << std::endl;
     }
   } else if (!add_endl) {
     os << " ";
   } else {
     os << std::endl;
   }
   os << std::flush;
   return os;
 }

 template <typename StreamType>
 inline StreamType& print(const RunContext& ctx,
                          StreamType* _os,
                          const TBlob& blob,
                          const bool doChannels = true,
                          const bool doBatches  = true,
                          const bool add_endl   = true) {
   MSHADOW_TYPE_SWITCH(blob.type_flag_, DType, {
     print_blob_<DType>(ctx, _os, blob, doChannels, doBatches, add_endl);
   });
   return *_os;
 }

 template <typename StreamType>
 inline StreamType& print(const RunContext& ctx,
                          StreamType* _os,
                          const std::string& label,
                          const TBlob& blob,
                          const bool doChannels = true,
                          bool doBatches        = true,
                          const bool add_endl   = true) {
   if (!label.empty()) {
     *_os << label << ": ";
   }
   return print(ctx, _os, blob, doChannels, doBatches, add_endl);
 }

 template <typename StreamType>
 inline StreamType& print(const RunContext& ctx,
                          StreamType* _os,
                          const std::string& label,
                          const NDArray& arr) {
   if (!label.empty()) {
     *_os << label << ": ";
   }
   switch (arr.storage_type()) {
     case kRowSparseStorage: {
       // data
       const mxnet::TShape& shape = arr.shape();
       print_shape(_os, "[row_sparse] main shape", shape, false);
       const mxnet::TShape& storage_shape = arr.storage_shape();
       const bool is_one_row              = storage_shape[0] < 2;
       print_shape(_os, "storage shape", storage_shape, false);
       print(ctx, _os, arr.data(), true, true, !is_one_row);

       // indices
       const mxnet::TShape& indices_shape = arr.aux_shape(rowsparse::kIdx);
       print_shape(_os, "indices shape", indices_shape, false);
       print(ctx, _os, arr.aux_data(rowsparse::kIdx), true, true, false) << std::endl;
       break;
     }
     case kCSRStorage: {
       // data
       const mxnet::TShape& shape = arr.shape();
       print_shape(_os, "[CSR] main shape", shape, false);
       const mxnet::TShape& storage_shape = arr.storage_shape();
       const bool is_one_row              = storage_shape[0] < 2;
       print_shape(_os, "storage shape", storage_shape, false);
       print(ctx, _os, arr.data(), true, true, !is_one_row);

       // row ptrs
       const mxnet::TShape& ind_ptr_shape = arr.aux_shape(csr::kIndPtr);
       print_shape(_os, "row ptrs shape", ind_ptr_shape, false);
       print(ctx, _os, arr.aux_data(csr::kIndPtr), true, true, false) << std::endl;

       // col indices
       const mxnet::TShape& indices_shape = arr.aux_shape(csr::kIdx);
       print_shape(_os, "col indices shape", indices_shape, false);
       print(ctx, _os, arr.aux_data(csr::kIdx), true, true, false) << std::endl;

       break;
     }
     case kDefaultStorage: {
       // data
       const mxnet::TShape& shape = arr.shape();
       const bool is_one_row      = shape[0] < 2;
       print_shape(_os, "[dense] main shape", shape, !is_one_row);
       print(ctx, _os, arr.data(), true, true, !is_one_row) << std::endl;
       break;
     }
     default:
       CHECK(false) << "Unsupported storage type:" << arr.storage_type();
       break;
   }
   return *_os << std::flush;
 }

 inline void print(const RunContext& ctx,
                   const std::string& label,
                   const std::string& var,
                   const std::vector<NDArray>& arrays) {
   std::cout << label << std::endl;
   for (size_t x = 0, n = arrays.size(); x < n; ++x) {
     std::stringstream ss;
     ss << var << "[" << x << "]";
     test::print(ctx, &std::cout, ss.str(), arrays[x]);
   }
 }

 inline void print(const RunContext& ctx,
                   const std::string& label,
                   const std::string& var,
                   const std::vector<TBlob>& arrays) {
   std::cout << label << std::endl;
   for (size_t x = 0, n = arrays.size(); x < n; ++x) {
     std::stringstream ss;
     ss << var << "[" << x << "]";
     test::print(ctx, &std::cout, ss.str(), arrays[x], true, true, false);
   }
 }

 inline std::string demangle(const char* name) {
 #if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION)
   int status = -4;  // some arbitrary value to eliminate the compiler warning
   std::unique_ptr<char, void (*)(void*)> res{abi::__cxa_demangle(name, nullptr, nullptr, &status),
                                              &std::free};
   return status ? name : res.get();
 #else
   return name;
 #endif
 }

 template <typename T>
 inline std::string type_name() {
   return demangle(typeid(T).name());
 }

 #define PRINT_NDARRAYS(__ctx$, __var) test::print(__ctx$, __FUNCTION__, #__var, __var)
 #define PRINT_OP_AND_ARRAYS(__ctx$, __op, __var)                                       \
   test::print(__ctx$,                                                                  \
               __FUNCTION__,                                                            \
               static_cast<std::stringstream*>(                                         \
                   &(std::stringstream() << #__var << "<" << type_name<__op>() << ">")) \
                   ->str(),                                                             \
               __var)
 #define PRINT_OP2_AND_ARRAYS(__ctx$, __op1, __op2, __var)  test::print(__ctx$, __FUNCTION__, \
   static_cast<std::stringstream *>(&(std::stringstream() << #__var << \
   "<" << type_name<__op1>().name()) << ", " \
   << type_name<__op2>() << ">"))->str(), __var)

 /*! \brief Fill blob with some pattern defined by the getNextData() callback
  * Pattern fill in the defined order (important for analysis):
  *  1D: batch item -> channel -> depth -> row -> col
  *  2D: batch item -> channel -> row -> col
  *  3D: batch item -> channel -> col
  */
 template <typename GetNextData>
 static inline void patternFill(const RunContext& run_ctx,
                                const TBlob* _blob,
                                GetNextData getNextData) {
   AccessAsCPU(*_blob, run_ctx, [getNextData](const TBlob& blob) {
     const size_t dim = static_cast<size_t>(blob.ndim());
     CHECK_LE(dim, 5U) << "Will need to handle above 3 dimensions (another for loop)";
     const size_t num             = blob.size(0);
     const size_t channels        = dim > 1 ? blob.size(1) : 1;
     const size_t depth           = dim > 2 ? blob.size(2) : 1;
     const size_t height          = dim > 3 ? blob.size(3) : 1;
     const size_t width           = dim > 4 ? blob.size(4) : 1;
     const size_t numberOfIndexes = blob.shape_.Size();
     for (size_t n = 0; n < num; ++n) {
       if (dim > 1) {
         for (size_t ch = 0; ch < channels; ++ch) {
           if (dim > 2) {
             for (size_t d = 0; d < depth; ++d) {
               if (dim > 3) {
                 for (size_t row = 0; row < height; ++row) {
                   if (dim > 4) {
                     for (size_t col = 0; col < width; ++col) {
                       if (dim == 5) {
                         const size_t idx = test::offset(blob.shape_, {n, ch, d, row, col});
                         CHECK_LT(idx, numberOfIndexes);
                         MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, {
                           ThisDataType& f = blob.dptr<ThisDataType>()[idx];
                           f               = getNextData();
                         });
                       } else {
                         CHECK(dim <= 5) << "Unimplemented dimension: " << dim;
                       }
                     }
                   } else {
                     const size_t idx = test::offset(blob.shape_, {n, ch, d, row});
                     CHECK_LT(idx, numberOfIndexes);
                     MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, {
                       ThisDataType& f = blob.dptr<ThisDataType>()[idx];
                       f               = getNextData();
                     });
                   }
                 }
               } else {
                 const size_t idx = test::offset(blob.shape_, {n, ch, d});
                 CHECK_LT(idx, numberOfIndexes);
                 MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, {
                   ThisDataType& f = blob.dptr<ThisDataType>()[idx];
                   f               = getNextData();
                 });
               }
             }
           } else {
             const size_t idx = test::offset(blob.shape_, {n, ch});
             CHECK_LT(idx, numberOfIndexes);
             MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, {
               ThisDataType& f = blob.dptr<ThisDataType>()[idx];
               f               = getNextData();
             });
           }
         }
       } else {
         const size_t idx = test::offset(blob.shape_, {n});
         CHECK_LT(idx, numberOfIndexes);
         MSHADOW_TYPE_SWITCH(blob.type_flag_, ThisDataType, {
           ThisDataType& f = blob.dptr<ThisDataType>()[idx];
           f               = getNextData();
         });
       }
     }
   });
 }

 /*! \brief Return a random number within a given range (inclusive) */
 template <class ScalarType>
 inline ScalarType rangedRand(const ScalarType min, const ScalarType max) {
   uint64_t num_bins = static_cast<uint64_t>(max + 1), num_rand = static_cast<uint64_t>(RAND_MAX),
            bin_size = num_rand / num_bins, defect = num_rand % num_bins;
   ScalarType x;
   do {
     x = std::rand();
   } while (num_rand - defect <= (uint64_t)x);

   return static_cast<ScalarType>(x / bin_size + min);
 }

 /*!
  * \brief Deterministically compare mxnet::TShape objects as less-than,
  *        for use in stl sorted key such as map and set
  * \param s1 First shape
  * \param s2 Second shape
  * \return true if s1 is less than s2
  */
 inline bool operator<(const mxnet::TShape& s1, const mxnet::TShape& s2) {
   if (s1.Size() == s2.Size()) {
     if (s1.ndim() == s2.ndim()) {
       for (size_t i = 0, n = s1.ndim(); i < n; ++i) {
         if (s1[i] == s2[i]) {
           continue;
         }
         return s1[i] < s2[i];
       }
       return false;
     }
     return s1.ndim() < s2.ndim();
   }
   return s1.Size() < s2.Size();
 }

 /*!
  * \brief Deterministically compare a vector of mxnet::TShape objects as less-than,
  *        for use in stl sorted key such as map and set
  * \param v1 First vector of shapes
  * \param v2 Second vector of shapes
  * \return true if v1 is less than v2
  */
 inline bool operator<(const std::vector<mxnet::TShape>& v1, const std::vector<mxnet::TShape>& v2) {
   if (v1.size() == v2.size()) {
     for (size_t i = 0, n = v1.size(); i < n; ++i) {
       if (v1[i] == v2[i]) {
         continue;
       }
       return v1[i] < v2[i];
     }
     return false;
   }
   return v1.size() < v2.size();
 }

 /*!
  * \brief std::less compare structure for compating vectors of shapes for stl sorted containers
  */
 struct less_shapevect {
   bool operator()(const std::vector<mxnet::TShape>& v1,
                   const std::vector<mxnet::TShape>& v2) const {
     if (v1.size() == v2.size()) {
       for (size_t i = 0, n = v1.size(); i < n; ++i) {
         if (v1[i] == v2[i]) {
           continue;
         }
         return v1[i] < v2[i];
       }
       return false;
     }
     return v1.size() < v2.size();
   }
 };

 inline std::string pretty_num(uint64_t val) {
   if (!test::csv) {
     std::string res, s = std::to_string(val);
     size_t ctr = 0;
     for (int i = static_cast<int>(s.size()) - 1; i >= 0; --i, ++ctr) {
       if (ctr && (ctr % 3) == 0) {
         res += ",";
       }
       res.push_back(s[i]);
     }
     std::reverse(res.begin(), res.end());
     return res;
   } else {
     return std::to_string(val);
   }
 }

 /*! \brief Change a value during the scope of this declaration */
 template <typename T>
 struct ScopeSet {
   inline ScopeSet(T* var, const T tempValue) : var_(*var), saveValue_(var) {
     *var = tempValue;
   }
   inline ~ScopeSet() {
     var_ = saveValue_;
   }
   T& var_;
   T saveValue_;
 };

 static inline void AssertEqual(const std::vector<NDArray*>& in_arrs,
                                const std::vector<NDArray*>& out_arrs,
                                float rtol           = 1e-5,
                                float atol           = 1e-8,
                                bool test_first_only = false) {
   for (size_t j = 0; j < in_arrs.size(); ++j) {
     // When test_all is fir
     if (test_first_only && j == 1) {
       return;
     }
     NDArray tmp1 = *in_arrs[j];
     NDArray tmp2 = *out_arrs[j];
     if (tmp1.ctx().dev_type == mxnet::Context::kGPU) {
       tmp1 = tmp1.Copy(mxnet::Context::CPU(0));
       tmp2 = tmp2.Copy(mxnet::Context::CPU(0));
       tmp1.WaitToRead();
       tmp2.WaitToRead();
     }
 #if MXNET_USE_ONEDNN == 1
     tmp1 = tmp1.Reorder2Default();
     tmp2 = tmp2.Reorder2Default();
 #endif
     EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size());
     TBlob blob1                 = tmp1.data();
     TBlob blob2                 = tmp2.data();
     mshadow::default_real_t* d1 = static_cast<mshadow::default_real_t*>(blob1.dptr_);
     mshadow::default_real_t* d2 = static_cast<mshadow::default_real_t*>(blob2.dptr_);
     for (int i = 0; i < tmp1.shape().Size(); i++) {
       float abs_err = fabs((d1[i]) - (d2[i]));
       ASSERT_LE(abs_err, (atol + rtol * fabs(d2[i])))
           << "index: " << i << ", " << d1[i] << " vs " << d2[i];
     }
   }
 }

 }  // namespace test
 }  // namespace mxnet

 #if defined(_MSC_VER)
 inline void usleep(__int64 usec) {
   HANDLE timer;
   LARGE_INTEGER ft;

   // Convert to 100 nanosecond interval, negative value indicates relative time
   ft.QuadPart = -(10 * usec);

   timer = CreateWaitableTimer(NULL, TRUE, NULL);
   SetWaitableTimer(timer, &ft, 0, NULL, NULL, 0);
   WaitForSingleObject(timer, INFINITE);
   CloseHandle(timer);
 }
 #endif  // _WIN32

 #endif  // TEST_UTIL_H_