| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file mkldnn.cc |
| * \brief test functions in mkldnn. |
| * \author Da Zheng |
| */ |
| |
| #if MXNET_USE_MKLDNN == 1 |
| |
| #include <mkldnn_types.h> |
| #include <cmath> |
| #include <climits> |
| #include <set> |
| #include "gtest/gtest.h" |
| #include "mxnet/imperative.h" |
| #include "../../src/operator/nn/mkldnn/mkldnn_base-inl.h" |
| #include "../../src/operator/nn/mkldnn/mkldnn_ops-inl.h" |
| #include "../../src/operator/nn/mkldnn/mkldnn_pooling-inl.h" |
| #include "../../src/operator/nn/pooling-inl.h" |
| |
| using namespace mxnet; |
| |
| #if __GNUC__ >= 5 |
| bool test_mem_align(void *mem, size_t size, size_t alignment, size_t space) { |
| void *ret1, *ret2; |
| size_t space1, space2; |
| space1 = space; |
| space2 = space; |
| ret1 = mxnet::AlignMem(mem, size, alignment, &space1); |
| ret2 = std::align(alignment, size, mem, space2); |
| EXPECT_EQ(ret1, ret2); |
| EXPECT_EQ(space1, space2); |
| return ret1 == ret2; |
| } |
| #endif |
| |
| TEST(MKLDNN_UTIL_FUNC, AlignMem) { |
| #if __GNUC__ >= 5 |
| size_t alignment = 4096; |
| void *mem; |
| size_t size, space; |
| // When mem has been aligned. |
| mem = reinterpret_cast<void *>(0x10000); |
| size = 1000; |
| space = 10000; |
| test_mem_align(mem, size, alignment, space); |
| |
| // When mem isn't aligned and we have enough space for alignment. |
| mem = reinterpret_cast<void *>(0x10010); |
| size = 1000; |
| space = 10000; |
| test_mem_align(mem, size, alignment, space); |
| |
| // When mem isn't aligned and we don't have enough memory for alignment |
| mem = reinterpret_cast<void *>(0x10010); |
| size = 1000; |
| space = 1001; |
| test_mem_align(mem, size, alignment, space); |
| |
| for (size_t i = 0; i < 10000; i++) { |
| mem = reinterpret_cast<void *>(random()); |
| size = random() % 2000; |
| space = random() % 2000; |
| test_mem_align(mem, size, alignment, space); |
| } |
| #else |
| // std::align is not supported in GCC < 5.0, this test case will be checked |
| // with newer version |
| LOG(INFO) << "Skipped for GCC " << __GNUC__ << "." << __GNUC_MINOR__; |
| #endif |
| } |
| |
| TEST(MKLDNN_UTIL_FUNC, MemFormat) { |
| // Check whether the number of format is correct. |
| CHECK_EQ(mkldnn_format_last, 67); |
| CHECK_EQ(mkldnn_nchw, 5); |
| CHECK_EQ(mkldnn_oihw, 15); |
| } |
| |
| // Init arrays with the default layout. |
| static void InitDefaultArray(NDArray *arr, bool is_rand = false) { |
| const TBlob &blob = arr->data(); |
| mshadow::default_real_t *data = blob.dptr<mshadow::default_real_t>(); |
| int size = blob.Size(); |
| |
| for (int i = 0; i < size; i++) |
| if (is_rand) { |
| data[i] = (std::rand() % 100) - 50; |
| } else { |
| data[i] = i % 100 - 50; |
| } |
| } |
| |
| using VerifyFunc = std::function<void (const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs)>; |
| |
| // Init arrays with the specified layout. |
| static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd, |
| bool is_rand = false) { |
| InitDefaultArray(arr, is_rand); |
| arr->MKLDNNDataReorderAsync(pd); |
| arr->WaitToRead(); |
| } |
| |
| static void VerifyDefMem(const mkldnn::memory &mem) { |
| mkldnn::memory::primitive_desc pd = mem.get_primitive_desc(); |
| mshadow::default_real_t *data |
| = static_cast<mshadow::default_real_t *>(mem.get_data_handle()); |
| size_t size = pd.get_size() / sizeof(mshadow::default_real_t); |
| size_t num_same = 0; |
| for (int i = 0; i < size; i++) |
| num_same += data[i] == static_cast<mshadow::default_real_t>(i % 100 - 50); |
| EXPECT_EQ(num_same, size); |
| } |
| |
| static void VerifyMem(const mkldnn::memory &mem) { |
| mkldnn::memory::primitive_desc pd = mem.get_primitive_desc(); |
| |
| if (pd.desc().data.format == GetDefaultFormat(pd.desc())) { |
| VerifyDefMem(mem); |
| } else { |
| mkldnn::memory::dims dims(pd.desc().data.ndims); |
| for (size_t i = 0; i < dims.size(); i++) |
| dims[i] = pd.desc().data.dims[i]; |
| mkldnn::memory::desc desc{dims, |
| static_cast<mkldnn::memory::data_type>(pd.desc().data.data_type), |
| static_cast<mkldnn::memory::format>(GetDefaultFormat(pd.desc()))}; |
| mkldnn::memory::primitive_desc new_pd(desc, CpuEngine::Get()->get_engine()); |
| mkldnn::memory new_mem(new_pd); |
| |
| std::vector<mkldnn::primitive> net; |
| net.push_back(mkldnn::reorder(mem, new_mem)); |
| mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait(); |
| VerifyDefMem(new_mem); |
| } |
| } |
| |
| static bool IsSameShape(mkldnn::memory::primitive_desc pd, TShape shape) { |
| if (pd.desc().data.ndims != shape.ndim()) return false; |
| for (size_t i = 0; i < shape.ndim(); i++) |
| if (pd.desc().data.dims[i] != shape[i]) return false; |
| return true; |
| } |
| |
| static mkldnn::memory::primitive_desc GetMemPD(const TShape s, int dtype, |
| mkldnn::memory::format format) { |
| mkldnn::memory::dims dims(s.ndim()); |
| for (size_t i = 0; i < dims.size(); i++) |
| dims[i] = s[i]; |
| mkldnn::memory::desc desc{dims, get_mkldnn_type(dtype), format}; |
| return mkldnn::memory::primitive_desc(desc, CpuEngine::Get()->get_engine()); |
| } |
| |
| static mkldnn::memory::primitive_desc GetExpandedMemPD( |
| mkldnn::memory::primitive_desc pd, float scale, int dim = 0) { |
| CHECK(dim < pd.desc().data.ndims) << "dimension cannot be larger than total dimensions of input"; |
| nnvm::TShape s(pd.desc().data.ndims); |
| for (size_t i = 0; i < pd.desc().data.ndims; i++) |
| s[i] = pd.desc().data.dims[i]; |
| s[dim] = static_cast<int>(s[dim] * scale); |
| return GetMemPD(s, mshadow::DataType<mshadow::default_real_t>::kFlag, |
| static_cast<mkldnn::memory::format>(pd.desc().data.format)); |
| } |
| |
| // This function gets special MKLDNN formats without knowing the specific |
| // hardware configuration. Certainly, it potentially misses some format if |
| // it's specific for certain array shapes. It covers at least one special format |
| // for each of the formats: nchw, oihw, goihw. |
| // To test the logic of the code in NDArray, these formats should be enough. |
| static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dims, int dtype) { |
| if (num_dims == 4) { |
| mkldnn::memory::dims data_dims{1, 3, 224, 224}; |
| mkldnn::memory::desc data_md{data_dims, get_mkldnn_type(dtype), |
| mkldnn::memory::format::any}; |
| mkldnn::memory::dims weight_dims{96, 3, 11, 11}; |
| mkldnn::memory::desc weight_md{weight_dims, get_mkldnn_type(dtype), |
| mkldnn::memory::format::any}; |
| mkldnn::memory::dims output_dims{1, 96, 54, 54}; |
| mkldnn::memory::desc out_md{output_dims, get_mkldnn_type(dtype), |
| mkldnn::memory::format::any}; |
| mkldnn::memory::dims strides{4, 4}; |
| mkldnn::memory::dims padding{0, 0}; |
| |
| mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training, |
| mkldnn::algorithm::convolution_direct, |
| data_md, weight_md, out_md, strides, |
| padding, padding, mkldnn::padding_kind::zero); |
| mkldnn::convolution_forward::primitive_desc pd(desc, CpuEngine::Get()->get_engine()); |
| std::vector<mkldnn::memory::format> ret(2); |
| ret[0] = static_cast<mkldnn::memory::format>(pd.dst_primitive_desc().desc().data.format); |
| ret[1] = static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format); |
| printf("format: %d, %d\n", ret[0], ret[1]); |
| return ret; |
| } else if (num_dims == 5) { |
| mkldnn::memory::dims data_dims{1, 32, 112, 112}; |
| mkldnn::memory::desc data_md{data_dims, get_mkldnn_type(dtype), |
| mkldnn::memory::format::any}; |
| mkldnn::memory::dims weight_dims{32, 1, 1, 3, 3}; |
| mkldnn::memory::desc weight_md{weight_dims, get_mkldnn_type(dtype), |
| mkldnn::memory::format::any}; |
| mkldnn::memory::dims output_dims{1, 32, 112, 112}; |
| mkldnn::memory::desc out_md{output_dims, get_mkldnn_type(dtype), |
| mkldnn::memory::format::any}; |
| mkldnn::memory::dims strides{1, 1}; |
| mkldnn::memory::dims padding{1, 1}; |
| |
| mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training, |
| mkldnn::algorithm::convolution_direct, |
| data_md, weight_md, out_md, strides, |
| padding, padding, mkldnn::padding_kind::zero); |
| mkldnn::convolution_forward::primitive_desc pd(desc, CpuEngine::Get()->get_engine()); |
| std::vector<mkldnn::memory::format> ret(1); |
| ret[0] = static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format); |
| printf("format: %d\n", ret[0]); |
| return ret; |
| } else { |
| return std::vector<mkldnn::memory::format>(); |
| } |
| } |
| |
| struct TestArrayShapes { |
| std::vector<nnvm::TShape> shapes; |
| std::vector<mkldnn::memory::primitive_desc> pds; |
| }; |
| |
| static TestArrayShapes GetTestArrayShapes() { |
| int dtype = mshadow::DataType<mshadow::default_real_t>::kFlag; |
| std::vector<TShape> shapes; |
| std::vector<mkldnn::memory::primitive_desc> pds; |
| { |
| // 1D |
| TShape s(1); |
| s[0] = 279936; |
| shapes.push_back(s); |
| pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x)); |
| s[0] = 34848; |
| shapes.push_back(s); |
| pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x)); |
| } |
| { |
| // 2D |
| TShape s(2); |
| s[0] = 96; |
| s[1] = 2916; |
| shapes.push_back(s); |
| pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::nc)); |
| s[0] = 96; |
| s[1] = 363; |
| shapes.push_back(s); |
| pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::nc)); |
| } |
| { |
| // 4D |
| TShape s1(4); |
| s1[0] = 10; s1[1] = 96; s1[2] = 54; s1[3] = 54; |
| shapes.push_back(s1); |
| pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw)); |
| |
| TShape s2(4); |
| s2[0] = 96; s2[1] = 3; s2[2] = 11; s2[3] = 11; |
| shapes.push_back(s2); |
| pds.push_back(GetMemPD(s2, dtype, mkldnn::memory::format::oihw)); |
| |
| std::vector<mkldnn::memory::format> formats = GetMKLDNNFormat(4, dtype); |
| pds.push_back(GetMemPD(s1, dtype, formats[0])); |
| pds.push_back(GetMemPD(s2, dtype, formats[1])); |
| } |
| { |
| // 5D |
| TShape s(5); |
| s[0] = 96; s[1] = 1; s[2] = 3; s[3] = 11; s[4] = 11; |
| shapes.push_back(s); |
| pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::goihw)); |
| |
| std::vector<mkldnn::memory::format> formats = GetMKLDNNFormat(5, dtype); |
| pds.push_back(GetMemPD(s, dtype, formats[0])); |
| } |
| |
| TestArrayShapes ret; |
| ret.shapes = shapes; |
| ret.pds = pds; |
| return ret; |
| } |
| |
| TEST(MKLDNN_NDArray, GetDataReorder) { |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<TShape> shapes = tas.shapes; |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| |
| // Reorder from the default to any other layout. |
| for (auto s : shapes) { |
| NDArray arr(s, Context()); |
| InitDefaultArray(&arr); |
| for (auto pd : pds) { |
| if (s.Size() == pd.get_size() / sizeof(mshadow::default_real_t)) { |
| const mkldnn::memory *mem = arr.GetMKLDNNDataReorder(pd); |
| printf("reorder from ("); |
| for (size_t i = 0; i < s.ndim(); i++) |
| printf("%ld, ", s[i]); |
| printf(") to ("); |
| for (int i = 0; i < pd.desc().data.ndims; i++) |
| printf("%d, ", pd.desc().data.dims[i]); |
| printf("), format: %d\n", pd.desc().data.format); |
| MKLDNNStream::Get()->Submit(false); |
| VerifyMem(*mem); |
| MKLDNNStream::Get()->Cleanup(); |
| } |
| } |
| } |
| |
| // Reorder from a special layout to another layout. |
| for (auto s : shapes) { |
| for (auto from_pd : pds) { |
| if (from_pd.get_size() / sizeof(mshadow::default_real_t) == s.Size()) { |
| NDArray arr(s, Context()); |
| // There is possibility that the dimensions of an NDArray doesn't match |
| // with the MKLDNN memory inside. |
| printf("Init array ("); |
| for (size_t i = 0; i < s.ndim(); i++) |
| printf("%ld, ", s[i]); |
| printf(") with MKLDNN memory ("); |
| for (int i = 0; i < from_pd.desc().data.ndims; i++) |
| printf("%d, ", from_pd.desc().data.dims[i]); |
| printf("), format: %d\n", from_pd.desc().data.format); |
| InitMKLDNNArray(&arr, from_pd); |
| for (auto to_pd : pds) { |
| if (to_pd.get_size() / sizeof(mshadow::default_real_t) == s.Size()) { |
| const mkldnn::memory *mem = arr.GetMKLDNNDataReorder(to_pd); |
| printf("reorder from ("); |
| for (size_t i = 0; i < s.ndim(); i++) |
| printf("%ld, ", s[i]); |
| printf("), format: %d to (", |
| arr.GetMKLDNNData()->get_primitive_desc().desc().data.format); |
| for (int i = 0; i < to_pd.desc().data.ndims; i++) |
| printf("%d, ", to_pd.desc().data.dims[i]); |
| printf("), format: %d\n", to_pd.desc().data.format); |
| MKLDNNStream::Get()->Submit(false); |
| VerifyMem(*mem); |
| MKLDNNStream::Get()->Cleanup(); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| struct NDArrayAttrs { |
| NDArray arr; |
| std::string desc; |
| NDArrayAttrs(NDArray arr, std::string desc) : arr(arr), desc(desc) {} |
| }; |
| |
| struct OpAttrs { |
| nnvm::NodeAttrs attrs; |
| std::vector<DispatchMode> dispatches; |
| std::set<OpReqType> requests; |
| int num_inputs; |
| int num_outputs; |
| int input_types; |
| int output_types; |
| }; |
| |
| enum ArrayTypes { |
| Normal = 1, |
| MKLDNN = 2, |
| MKLDNNDiffShape = 4, |
| MKLDNNDiffDim = 8, |
| NormalReshaped = 16, |
| MKLDNNReshaped = 32, |
| MKLDNNReshapedDiffShape = 64, |
| MKLDNNReshapedDiffDim = 128, |
| NormalReused = 256, |
| MKLDNNReused = 512, |
| MKLDNNReusedDiffDim = 1024, |
| NormalReshapedReused = 2048, |
| NormalReusedDiffDtype = 4096, |
| All = 8191, |
| }; |
| |
| OpAttrs GetCopyOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("_copy"); |
| attrs.num_inputs = 1; |
| attrs.num_outputs = 1; |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| attrs.requests.insert(OpReqType::kWriteTo); |
| attrs.requests.insert(OpReqType::kWriteInplace); |
| attrs.requests.insert(OpReqType::kAddTo); |
| return attrs; |
| } |
| |
| OpAttrs GetCopyBackwardsOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("_backward_copy"); |
| attrs.num_inputs = 1; |
| attrs.num_outputs = 1; |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| attrs.requests.insert(OpReqType::kWriteTo); |
| attrs.requests.insert(OpReqType::kWriteInplace); |
| attrs.requests.insert(OpReqType::kAddTo); |
| return attrs; |
| } |
| |
| OpAttrs GetReluOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("Activation"); |
| attrs.attrs.dict.insert({"act_type", "relu"}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| attrs.num_inputs = 1; |
| attrs.num_outputs = 1; |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| attrs.requests.insert(OpReqType::kWriteTo); |
| attrs.requests.insert(OpReqType::kWriteInplace); |
| attrs.requests.insert(OpReqType::kAddTo); |
| return attrs; |
| } |
| |
| OpAttrs GetReluBackwardsOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("_backward_Activation"); |
| attrs.attrs.dict.insert({"act_type", "relu"}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| attrs.num_inputs = 2; |
| attrs.num_outputs = 1; |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| attrs.requests.insert(OpReqType::kWriteTo); |
| attrs.requests.insert(OpReqType::kWriteInplace); |
| attrs.requests.insert(OpReqType::kAddTo); |
| return attrs; |
| } |
| |
| OpAttrs GetSumOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("elemwise_add"); |
| attrs.num_inputs = 2; |
| attrs.num_outputs = 1; |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| attrs.requests.insert(OpReqType::kWriteTo); |
| attrs.requests.insert(OpReqType::kWriteInplace); |
| attrs.requests.insert(OpReqType::kAddTo); |
| return attrs; |
| } |
| |
| OpAttrs GetSumBackwardsOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("_backward_add"); |
| attrs.num_inputs = 1; |
| attrs.num_outputs = 2; |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| attrs.requests.insert(OpReqType::kWriteTo); |
| attrs.requests.insert(OpReqType::kWriteInplace); |
| attrs.requests.insert(OpReqType::kAddTo); |
| return attrs; |
| } |
| |
| OpAttrs GetConcatOp(int num_args, int dim) { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("concat"); |
| attrs.num_inputs = num_args; |
| attrs.num_outputs = 1; |
| attrs.attrs.dict.insert({"num_args" , std::to_string(num_args)}); |
| attrs.attrs.dict.insert({"dim" , std::to_string(dim)}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| return attrs; |
| } |
| |
| OpAttrs GetConcatBackwardsOp(int num_args, int dim) { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("_backward_Concat"); |
| attrs.num_inputs = 2; |
| attrs.num_outputs = num_args; |
| attrs.attrs.dict.insert({"num_args" , std::to_string(num_args)}); |
| attrs.attrs.dict.insert({"dim" , std::to_string(dim)}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| attrs.dispatches.resize(2); |
| attrs.dispatches[0] = DispatchMode::kFCompute; |
| attrs.dispatches[1] = DispatchMode::kFComputeEx; |
| return attrs; |
| } |
| |
| std::string CreateShapeString(int value, int dim) { |
| std::stringstream ss; |
| ss << "("; |
| for (int i = 0; i < dim; i++) { |
| ss << value; |
| if (i != dim - 1) ss << ","; |
| } |
| ss << ")"; |
| return ss.str(); |
| } |
| |
| |
| OpAttrs GetPoolingOp(int kernel, int dim, int stride, int pad) { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("Pooling"); |
| attrs.num_inputs = 1; |
| attrs.num_outputs = dim == 2 ? 2 : 1; |
| attrs.attrs.dict.insert({"kernel" , CreateShapeString(kernel, dim)}); |
| attrs.attrs.dict.insert({"stride" , CreateShapeString(stride, dim)}); |
| attrs.attrs.dict.insert({"pad" , CreateShapeString(pad, dim)}); |
| attrs.attrs.dict.insert({"pool_type" , "max"}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| return attrs; |
| } |
| |
| OpAttrs GetPoolingBackwardsOp(int kernel, int dim, int stride, int pad) { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("_backward_Pooling"); |
| attrs.num_inputs = dim == 2 ? 5 : 3; |
| attrs.num_outputs = 1; |
| attrs.attrs.dict.insert({"kernel" , CreateShapeString(kernel, dim)}); |
| attrs.attrs.dict.insert({"stride" , CreateShapeString(stride, dim)}); |
| attrs.attrs.dict.insert({"pad" , CreateShapeString(pad, dim)}); |
| attrs.attrs.dict.insert({"pool_type" , "max"}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| return attrs; |
| } |
| |
| |
| void PrintVerifyMsg(const NDArrayAttrs &arr1, const NDArrayAttrs &arr2) { |
| TShape t1 = arr1.arr.shape(); |
| TShape t2 = arr2.arr.shape(); |
| std::stringstream ss; |
| std::cout << "Verifying: " << arr1.desc.c_str() << " " << |
| t1 << " with " << arr2.desc.c_str() << " " << t2 << "\n"; |
| } |
| |
| OpAttrs GetLRNOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("LRN"); |
| attrs.num_inputs = 1; |
| attrs.num_outputs = 2; |
| attrs.attrs.dict.insert({"nsize" , "3"}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| attrs.dispatches.resize(2); |
| attrs.requests.insert(OpReqType::kWriteTo); |
| attrs.input_types = ArrayTypes::Normal | |
| ArrayTypes::MKLDNN | |
| ArrayTypes::NormalReshaped | |
| ArrayTypes::MKLDNNReshaped; |
| attrs.output_types = ArrayTypes::Normal | |
| ArrayTypes::MKLDNN | |
| ArrayTypes::NormalReshaped | |
| ArrayTypes::MKLDNNReshaped; |
| return attrs; |
| } |
| |
| OpAttrs GetLRNBackwardsOp() { |
| OpAttrs attrs; |
| attrs.attrs.op = Op::Get("_backward_LRN"); |
| attrs.num_inputs = 3; |
| attrs.num_outputs = 1; |
| attrs.attrs.dict.insert({"nsize" , "3"}); |
| attrs.attrs.op->attr_parser(&attrs.attrs); |
| attrs.dispatches.resize(2); |
| attrs.requests.insert(OpReqType::kWriteTo); |
| return attrs; |
| } |
| |
| /* |
| * We want to get a few types of NDArrays for testing: |
| * 1. Normal NDArray |
| * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator) |
| * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different |
| * dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this |
| * type of NDArrays only exists for weight arrays. I don't think we should |
| * pass them to all operators. |
| * In the inference mode, the MKLDNN memory in the weight array will be |
| * reordered to 5 dimensions. |
| * 4. Reshaped/sliced NDArray |
| * 5. Reshaped/sliced NDArray with MKLDNN layout (reshape/slice from Normal NDArray |
| * with MKLDNN layout) |
| * 6. Reshaped/sliced NDArray with MKLDNN layout whose MKLDNN memory may have |
| * different dimensions from the NDArray (result of MKLDNNDataReorderAsync). |
| * However, this type of NDArrays only exists for weight arrays. I don't think |
| * we should pass them to all operators. |
| * In the inference mode, the MKLDNN memory in the weight array will be |
| * reordered to 5 dimensions. |
| * |
| * num_inputs / dim arguments used to scale shape (used for concat backwards to enlarge input shapes) |
| */ |
| std::vector<NDArrayAttrs> GetTestInputArrays( |
| int types = ArrayTypes::All, bool rand = false, |
| int num_inputs = 1, int dim = 0) { |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<nnvm::TShape> shapes = tas.shapes; |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| std::vector<NDArrayAttrs> in_arrs; |
| std::string desc; |
| |
| int slice_amount = 1; |
| if (dim == 0) |
| slice_amount = num_inputs; |
| for (auto shape : shapes) { |
| if (dim >= shape.ndim()) |
| continue; |
| shape[dim] = shape[dim] * num_inputs; |
| |
| // Type 1. |
| NDArray arr(shape, Context()); |
| if (types & ArrayTypes::Normal) { |
| InitDefaultArray(&arr, rand); |
| in_arrs.emplace_back(arr, "Normal NDArray"); |
| } |
| |
| // Type 4 |
| arr = NDArray(shape, Context()); |
| if (types & ArrayTypes::NormalReshaped) { |
| InitDefaultArray(&arr, rand); |
| in_arrs.emplace_back(arr.Slice(slice_amount, arr.shape()[0] - slice_amount), |
| "Reshaped Normal NDArray"); |
| } |
| |
| |
| for (auto pd : pds) { |
| if (num_inputs > 1) { |
| // preserve if matching layout else just expand on 0 dim |
| if (shape.ndim() == pd.desc().data.ndims) |
| pd = GetExpandedMemPD(pd, num_inputs, dim); |
| else |
| pd = GetExpandedMemPD(pd, num_inputs); |
| } |
| |
| if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t)) |
| continue; |
| |
| // Type 2, 3. |
| arr = NDArray(shape, Context()); |
| if (shape.ndim() == pd.desc().data.ndims && IsSameShape(pd, shape) |
| && types & ArrayTypes::MKLDNN) { |
| desc = "MKLDNN NDArray"; |
| InitMKLDNNArray(&arr, pd, rand); |
| in_arrs.emplace_back(arr, desc); |
| } else if (shape.ndim() == pd.desc().data.ndims && !IsSameShape(pd, shape) |
| && types & ArrayTypes::MKLDNNDiffShape) { |
| desc = "MKLDNN NDArray with different shape"; |
| InitMKLDNNArray(&arr, pd, rand); |
| in_arrs.emplace_back(arr, desc); |
| } else if (shape.ndim() != pd.desc().data.ndims && types & ArrayTypes::MKLDNNDiffDim) { |
| std::stringstream ss; |
| ss << "MKLDNN NDArray with different dim " << |
| shape.ndim() << "/" << pd.desc().data.ndims; |
| desc = ss.str(); |
| InitMKLDNNArray(&arr, pd, rand); |
| in_arrs.emplace_back(arr, desc); |
| } |
| |
| |
| // Type 5, 6. |
| arr = NDArray(shape, Context()); |
| if (shape.ndim() == pd.desc().data.ndims && IsSameShape(pd, shape) |
| && types & ArrayTypes::MKLDNNReshaped) { |
| desc = "Reshaped MKLDNN NDArray"; |
| InitMKLDNNArray(&arr, pd, rand); |
| in_arrs.emplace_back(arr.Slice(slice_amount, arr.shape()[0] - slice_amount), desc); |
| } else if (shape.ndim() == pd.desc().data.ndims && !IsSameShape(pd, shape) |
| && types & ArrayTypes::MKLDNNReshapedDiffShape) { |
| desc = "Reshaped MKLDNN NDArray with different shape"; |
| InitMKLDNNArray(&arr, pd, rand); |
| in_arrs.emplace_back(arr.Slice(slice_amount, arr.shape()[0] - slice_amount), desc); |
| } else if (shape.ndim() != pd.desc().data.ndims |
| && types & ArrayTypes::MKLDNNReshapedDiffDim) { |
| std::stringstream ss; |
| ss << "MKLDNN NDArray with different dim " << |
| shape.ndim() << "/" << pd.desc().data.ndims; |
| desc = ss.str(); |
| InitMKLDNNArray(&arr, pd, rand); |
| in_arrs.emplace_back(arr.Slice(slice_amount, arr.shape()[0] - slice_amount), desc); |
| } |
| } |
| } |
| return in_arrs; |
| } |
| |
| /* |
| * We want to get a few types of NDArrays for testing: |
| * 1. Normal NDArray |
| * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator) |
| * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different |
| * dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this |
| * type of NDArrays only exists for weight arrays. I don't think we should |
| * pass them to all operators. |
| * In the inference mode, the MKLDNN memory in the weight array will be |
| * reordered to 5 dimensions. |
| * 4. Reshaped/sliced NDArray |
| * 5. Reused NDArray (this is created by the MXNet executor). This type of |
| * NDArrays can only be used as output arrays. |
| * 6. Reused NDArray converted from an array with a different data type. |
| * 7. Reused reshaped/sliced NDArray. |
| * 8. Reused NDArray with MKLDNN layout. |
| * 9. Reused NDArray with MKLDNN layout of different dimensions. |
| * |
| * Optional num_inputs / dim args can be passed to modify input shape (used for Concat test) |
| */ |
| std::vector<NDArrayAttrs> GetTestOutputArrays( |
| const TShape &shp, |
| const std::vector<mkldnn::memory::primitive_desc> &pds, |
| std::vector<float>scale = {1}, bool rand = true, int types = ArrayTypes::All) { |
| TShape shape = shp; |
| |
| for (int dim = 0; dim < scale.size(); dim++) |
| shape[dim] = static_cast<int>(shape[dim] * scale[dim]); |
| |
| std::vector<NDArrayAttrs> in_arrs; |
| std::string desc; |
| // Type 1. |
| NDArray arr(shape, Context()); |
| |
| if (types & ArrayTypes::Normal) { |
| in_arrs.emplace_back(arr, "Normal NDArray"); |
| InitDefaultArray(&in_arrs.back().arr, rand); |
| } |
| |
| TShape tmp_shape = shape; |
| if (types & ArrayTypes::NormalReshaped) { |
| // Type 4. |
| tmp_shape[0] = shape[0] * 2; |
| NDArray arr0(tmp_shape, Context()); |
| InitDefaultArray(&arr0, rand); |
| in_arrs.emplace_back(arr0.Slice(1, shape[0] + 1), "Reshaped NDArray"); |
| } |
| |
| nnvm::TShape s(1); |
| if (types & ArrayTypes::NormalReused) { |
| // Type 5. |
| // Get a reused version. |
| s[0] = shape.Size(); |
| NDArray arr1(s, Context()); |
| arr1 = arr1.AsArray(shape, arr1.dtype()); |
| InitDefaultArray(&arr1, rand); |
| in_arrs.emplace_back(arr1, "Reused NDArray"); |
| } |
| |
| if (types & ArrayTypes::NormalReusedDiffDtype) { |
| // Type 6. |
| s[0] = shape.Size() * GetTypeSize(mshadow::default_type_flag); |
| NDArray arr2(s, Context(), true, mshadow::kUint8); |
| arr2 = arr2.AsArray(shape, mshadow::default_type_flag); |
| InitDefaultArray(&arr2, rand); |
| in_arrs.emplace_back(arr2, "Reused NDArray with diff data type"); |
| } |
| |
| if (types & ArrayTypes::NormalReshapedReused) { |
| // Type 7 |
| s[0] = shape.Size() * GetTypeSize(mshadow::default_type_flag) * 2; |
| NDArray arr3(s, Context(), true, mshadow::kUint8); |
| tmp_shape[0] = shape[0] * 2; |
| arr3 = arr3.AsArray(tmp_shape, mshadow::default_type_flag); |
| InitDefaultArray(&arr3, rand); |
| in_arrs.emplace_back(arr3.Slice(1, shape[0] + 1), "Reused+Reshaped NDArray"); |
| } |
| |
| for (auto pd : pds) { |
| if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t)) |
| continue; |
| |
| if (scale.size() > pd.desc().data.ndims) |
| continue; |
| |
| for (int dim = 0; dim < scale.size(); dim++) |
| pd = GetExpandedMemPD(pd, scale[dim]); |
| |
| // Type 2, 3. |
| arr = NDArray(shape, Context()); |
| desc = "MKLDNN NDArray"; |
| if (shape.ndim() != pd.desc().data.ndims) { |
| std::stringstream ss; |
| ss << "MKLDNN NDArray with different memory layout " |
| << shape.ndim() << "/" << pd.desc().data.ndims; |
| desc = ss.str(); |
| } |
| |
| if ((types & ArrayTypes::MKLDNN && shape.ndim() == pd.desc().data.ndims) || |
| (types & ArrayTypes::MKLDNNDiffDim && shape.ndim() != pd.desc().data.ndims)) { |
| in_arrs.emplace_back(arr, desc); |
| InitMKLDNNArray(&in_arrs.back().arr, pd, rand); |
| } |
| |
| // Type 8, 9. |
| // Get a reused version. |
| nnvm::TShape s(1); |
| s[0] = shape.Size(); |
| NDArray arr = NDArray(s, Context()); |
| arr = arr.AsArray(shape, arr.dtype()); |
| InitMKLDNNArray(&arr, pd, rand); |
| desc = "Reused MKLDNN NDArray"; |
| if (shape.ndim() != pd.desc().data.ndims) { |
| std::stringstream ss; |
| ss << "Reused MKLDNN NDArray with different memory layout " |
| << shape.ndim() << "/" << pd.desc().data.ndims; |
| desc = ss.str(); |
| } |
| |
| if ((types & ArrayTypes::MKLDNNReused && shape.ndim() == pd.desc().data.ndims) || |
| (types & ArrayTypes::MKLDNNReusedDiffDim && shape.ndim() != pd.desc().data.ndims)) { |
| in_arrs.emplace_back(arr, desc); |
| } |
| } |
| return in_arrs; |
| } |
| |
| TEST(MKLDNN_NDArray, GetTestInputArraysConcat) { |
| auto in_arrs = GetTestInputArrays(); |
| for (int dim = 0; dim < 5; dim++) { |
| for (int num_inputs = 2; num_inputs < 5; num_inputs++) { |
| std::vector<NDArrayAttrs> expanded_arrs = GetTestInputArrays( |
| ArrayTypes::All, false, num_inputs, dim); |
| int i = 0; |
| for (auto &arr : in_arrs) { |
| if (dim >= arr.arr.shape().ndim()) |
| continue; |
| auto ex_arr = expanded_arrs[i]; |
| PrintVerifyMsg(arr, ex_arr); |
| EXPECT_EQ(arr.arr.shape().Size() * num_inputs, ex_arr.arr.shape().Size()); |
| EXPECT_EQ(arr.arr.shape()[dim] * num_inputs, ex_arr.arr.shape()[dim]); |
| i++; |
| } |
| } |
| } |
| } |
| |
| TEST(MKLDNN_NDArray, GetTestOutputArraysConcat) { |
| auto shapes_pds = GetTestArrayShapes(); |
| std::vector<nnvm::TShape> shapes; shapes = shapes_pds.shapes; |
| std::vector<mkldnn::memory::primitive_desc> pds = shapes_pds.pds; |
| for (auto &shape : shapes) { |
| for (int dim = 0; dim < 5; dim++) { |
| for (int num_inputs = 2; num_inputs < 5; num_inputs++) { |
| if (shape.ndim() <= dim) |
| continue; |
| std::cout << "Extending " << shape << " dim " << |
| dim << " and " << num_inputs << "num_inputs\n"; |
| std::vector<float> scale_vector(shape.ndim()); |
| for (int i = 0; i < shape.ndim(); i++) |
| scale_vector[i] = 1; |
| scale_vector[dim] = num_inputs; |
| auto output_arrs = GetTestOutputArrays(shape, pds, scale_vector); |
| for (auto &out_arr : output_arrs) { |
| auto out_shape = out_arr.arr.shape(); |
| EXPECT_EQ(shape.Size() * num_inputs, out_arr.arr.shape().Size()); |
| EXPECT_EQ(shape[dim] * num_inputs, out_arr.arr.shape()[dim]); |
| } |
| } |
| } |
| } |
| } |
| |
| void VerifyCopyResult(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| NDArray tmp1 = in_arrs[0]->Reorder2Default(); |
| NDArray tmp2 = out_arrs[0]->Reorder2Default(); |
| EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size()); |
| TBlob d1 = tmp1.data(); |
| TBlob d2 = tmp2.data(); |
| EXPECT_EQ(memcmp(d1.dptr_, d2.dptr_, |
| tmp1.shape().Size() * sizeof(mshadow::default_real_t)), 0); |
| } |
| |
| void AssertEqual(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| NDArray tmp1 = in_arrs[0]->Reorder2Default(); |
| NDArray tmp2 = out_arrs[0]->Reorder2Default(); |
| EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size()); |
| TBlob blob1 = tmp1.data(); |
| TBlob blob2 = tmp2.data(); |
| mshadow::default_real_t *d1 = static_cast<mshadow::default_real_t*>(blob1.dptr_); |
| mshadow::default_real_t *d2 = static_cast<mshadow::default_real_t*>(blob2.dptr_); |
| for (int i = 0; i < tmp1.shape().Size(); i++) |
| ASSERT_FLOAT_EQ(d1[i], d2[i]); |
| } |
| |
| void VerifyActResult(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| NDArray tmp1 = in_arrs[0]->Reorder2Default(); |
| NDArray tmp2 = out_arrs[0]->Reorder2Default(); |
| TBlob blob1 = tmp1.data(); |
| TBlob blob2 = tmp2.data(); |
| mshadow::default_real_t *d1 = static_cast<mshadow::default_real_t*>(blob1.dptr_); |
| mshadow::default_real_t *d2 = static_cast<mshadow::default_real_t*>(blob2.dptr_); |
| EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size()); |
| for (size_t i = 0; i < tmp1.shape().Size(); i++) { |
| EXPECT_EQ(std::fmax(d1[i], 0), d2[i]); |
| } |
| } |
| |
| void VerifySumResult(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| NDArray in1 = in_arrs[0]->Reorder2Default(); |
| NDArray in2 = in_arrs[1]->Reorder2Default(); |
| NDArray out = out_arrs[0]->Reorder2Default(); |
| EXPECT_EQ(in1.shape().Size(), in2.shape().Size()); |
| EXPECT_EQ(in1.shape().Size(), out.shape().Size()); |
| |
| mshadow::default_real_t *d1 = in1.data().dptr<mshadow::default_real_t>(); |
| mshadow::default_real_t *d2 = in2.data().dptr<mshadow::default_real_t>(); |
| mshadow::default_real_t *o = out.data().dptr<mshadow::default_real_t>(); |
| for (size_t i = 0; i < in1.shape().Size(); i++) |
| ASSERT_EQ(d1[i] + d2[i], o[i]); |
| } |
| |
| void VerifyActBackwardsResult(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| NDArray tmp1 = in_arrs[0]->Reorder2Default(); // out grads |
| NDArray tmp2 = in_arrs[1]->Reorder2Default(); // input |
| NDArray tmp3 = out_arrs[0]->Reorder2Default(); // input grads |
| TBlob blob1 = tmp1.data(); |
| TBlob blob2 = tmp2.data(); |
| TBlob blob3 = tmp3.data(); |
| mshadow::default_real_t *d1 = static_cast<mshadow::default_real_t*>(blob1.dptr_); |
| mshadow::default_real_t *d2 = static_cast<mshadow::default_real_t*>(blob2.dptr_); |
| mshadow::default_real_t *d3 = static_cast<mshadow::default_real_t*>(blob3.dptr_); |
| EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size()); |
| for (size_t i = 0; i < tmp1.shape().Size(); i++) { |
| ASSERT_EQ(d2[i] > 0 ? d1[i] : 0, d3[i]); |
| } |
| } |
| |
| void VerifySumBackwardsResult(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| NDArray out_grads = in_arrs[0]->Reorder2Default(); // out grads |
| NDArray input_grads1 = out_arrs[0]->Reorder2Default(); // input grads |
| NDArray input_grads2 = out_arrs[1]->Reorder2Default(); // input grads |
| mshadow::default_real_t *og = out_grads.data().dptr<mshadow::default_real_t>(); |
| mshadow::default_real_t *ig1 = input_grads1.data().dptr<mshadow::default_real_t>(); |
| mshadow::default_real_t *ig2 = input_grads2.data().dptr<mshadow::default_real_t>(); |
| for (size_t i = 0; i < out_grads.shape().Size(); i++) { |
| ASSERT_EQ(og[i], ig1[i]); |
| ASSERT_EQ(og[i], ig2[i]); |
| } |
| } |
| |
| /* |
| * Determines axis ndarrays are concatenated by |
| * Used to verify concat/concat backwards operator |
| */ |
| int GetDim(TShape input_shape, TShape output_shape) { |
| CHECK(input_shape.Size() != output_shape.Size()); |
| for (size_t i = 0; i < input_shape.ndim(); i++) { |
| if (input_shape[i] != output_shape[i]) |
| return i; |
| } |
| return -1; |
| } |
| |
| /* |
| * Calculates the size of continuous block of array inside larger concatenated array |
| * Used to verify concat/concat backwards operator |
| */ |
| int GetBlockSize(TShape shape, int dim) { |
| int block_size = 1; |
| for (int i = shape.ndim() - 1; i >= dim; i--) |
| block_size *= shape[i]; |
| return block_size; |
| } |
| |
| void VerifyConcatResult(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| int num_inputs = in_arrs.size(); |
| int input_size = in_arrs[0]->shape().Size(); |
| TShape input_shape = in_arrs[0]->shape(); |
| NDArray output = out_arrs[0]->Reorder2Default(); |
| size_t total_size = output.shape().Size(); |
| EXPECT_EQ(input_size * num_inputs, total_size); |
| mshadow::default_real_t *out_data = output.data().dptr<mshadow::default_real_t>(); |
| |
| int dim = GetDim(input_shape, output.shape()); |
| int block_size = GetBlockSize(input_shape, dim); |
| int num_blocks = input_size / block_size; |
| for (size_t input_num = 0; input_num < num_inputs; input_num++) { |
| NDArray tmp = in_arrs[input_num]->Reorder2Default(); |
| mshadow::default_real_t* data = tmp.data().dptr<mshadow::default_real_t>(); |
| for (size_t block_num = 0; block_num < num_blocks; block_num++) { |
| for (size_t i = 0; i < block_size; i++) |
| ASSERT_EQ(data[block_num * block_size + i], |
| out_data[(block_num * num_inputs + input_num) * block_size + i]); |
| } |
| } |
| } |
| |
| void VerifyAddRequest(const std::vector<NDArray*> &in_arrs, |
| const std::vector<NDArray*> &original_outputs, |
| const std::vector<NDArray*> &new_outputs, |
| VerifyFunc verify_fn) { |
| CHECK(original_outputs.size() == new_outputs.size()); |
| std::vector<NDArray*> tmp_outputs; |
| NDArray tmp; |
| for (size_t i = 0; i < new_outputs.size(); i++) { |
| tmp = new_outputs[i]->Reorder2Default() - original_outputs[i]->Reorder2Default(); |
| tmp_outputs.push_back(&tmp); |
| } |
| Engine::Get()->WaitForAll(); |
| verify_fn(in_arrs, tmp_outputs); |
| } |
| |
| void VerifyConcatBackwardsResult(const std::vector<NDArray *> &in_arrs, |
| const std::vector<NDArray *> &out_arrs) { |
| // in_arrs is larger array, out_arr is ammler |
| int num_inputs = out_arrs.size(); |
| int input_size = out_arrs[0]->shape().Size(); |
| TShape input_shape = out_arrs[0]->shape(); |
| NDArray output = in_arrs[0]->Reorder2Default(); |
| size_t total_size = output.shape().Size(); |
| EXPECT_EQ(input_size * num_inputs, total_size); |
| mshadow::default_real_t *out_data = output.data().dptr<mshadow::default_real_t>(); |
| |
| int dim = GetDim(input_shape, output.shape()); |
| int block_size = GetBlockSize(input_shape, dim); |
| int num_blocks = input_size / block_size; |
| for (size_t input_num = 0; input_num < num_inputs; input_num++) { |
| NDArray tmp = out_arrs[input_num]->Reorder2Default(); |
| mshadow::default_real_t* data = tmp.data().dptr<mshadow::default_real_t>(); |
| for (size_t block_num = 0; block_num < num_blocks; block_num++) { |
| for (size_t i = 0; i < block_size; i++) |
| ASSERT_EQ(data[block_num * block_size + i], |
| out_data[(block_num * num_inputs + input_num) * block_size + i]); |
| } |
| } |
| } |
| |
| TEST(MKLDNN_NDArray, CopyFrom) { |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(); |
| for (auto &in_arr : in_arrs) { |
| if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) |
| continue; |
| std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); |
| for (auto &out_arr : out_arrs) { |
| const mkldnn::memory *mem = in_arr.arr.GetMKLDNNData(); |
| out_arr.arr.CopyFrom(*mem); |
| MKLDNNStream::Get()->Submit(); |
| std::vector<NDArray *> inputs(1); |
| inputs[0] = &in_arr.arr; |
| VerifyCopyResult(inputs, {&out_arr.arr}); |
| } |
| } |
| } |
| |
| void TestOp(const OpAttrs &attrs, VerifyFunc verify_fn) { |
| std::vector<NDArray*> inputs(attrs.num_inputs); |
| std::vector<NDArray*> outputs(attrs.num_outputs); |
| std::vector<OpReqType> req(attrs.num_outputs); |
| std::vector<NDArrayAttrs> in_arrs; |
| std::vector<std::vector<NDArrayAttrs>> out_arrs(attrs.num_outputs); |
| std::vector<DispatchMode> dispatches = attrs.dispatches; |
| |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| if (attrs.requests.find(OpReqType::kWriteTo) != attrs.requests.end()) { |
| std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(); |
| for (auto &in_arr : in_arrs) { |
| for (auto &dispatch : dispatches) { |
| std::vector<std::vector<NDArrayAttrs>> out_arrs(attrs.num_outputs); |
| for (int i = 0; i < attrs.num_outputs; i++) |
| out_arrs[i] = GetTestOutputArrays(in_arr.arr.shape(), pds); |
| for (int i = 0; i < attrs.num_inputs; i++) |
| inputs[i] = &in_arr.arr; |
| for (size_t output_i = 0; output_i < out_arrs[0].size(); output_i++) { |
| for (int i = 0; i < attrs.num_outputs; i++) { |
| req[i] = kWriteTo; |
| outputs[i] = &out_arrs[i][output_i].arr; |
| } |
| PrintVerifyMsg(in_arr, out_arrs[0][output_i]); |
| Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, |
| outputs, req, dispatch, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| verify_fn(inputs, outputs); |
| } |
| } |
| } |
| } |
| |
| if (attrs.requests.find(OpReqType::kWriteInplace) != attrs.requests.end()) { |
| for (auto &dispatch : dispatches) { |
| in_arrs = GetTestInputArrays(); |
| for (auto &arr : in_arrs) { |
| // If the array is a view, we shouldn't write data to it. |
| if (arr.arr.IsView()) |
| continue; |
| NDArrayAttrs orig(arr.arr.Copy(arr.arr.ctx()), "InPlace Copy"); |
| for (int i = 0; i < attrs.num_inputs; i++) |
| inputs[i] = &arr.arr; |
| for (int i = 0; i < attrs.num_outputs; i++) { |
| req[i] = kWriteInplace; |
| outputs[i] = &arr.arr; |
| } |
| PrintVerifyMsg(orig, arr); |
| Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, outputs, req, |
| dispatch, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| std::vector<NDArray *> orig_inputs(attrs.num_inputs); |
| for (int i = 0; i < attrs.num_inputs; i++) |
| orig_inputs[i] = &orig.arr; |
| verify_fn(orig_inputs, outputs); |
| } |
| } |
| } |
| |
| if (attrs.requests.find(OpReqType::kAddTo) != attrs.requests.end()) { |
| std::vector<NDArray*> original_outputs(attrs.num_outputs); |
| in_arrs = GetTestInputArrays(); |
| for (auto &in_arr : in_arrs) { |
| for (auto &dispatch : dispatches) { |
| for (int i = 0; i < attrs.num_outputs; i++) |
| out_arrs[i] = GetTestOutputArrays(in_arr.arr.shape(), pds); |
| for (size_t i = 0; i < attrs.num_inputs; i++) |
| inputs[i] = &in_arr.arr; |
| for (size_t output_i = 0; output_i < out_arrs[0].size(); output_i++) { |
| NDArray tmp; |
| for (size_t i = 0; i < attrs.num_outputs; i++) { |
| auto out_arr = out_arrs[i][output_i]; |
| tmp = out_arr.arr.Copy(out_arr.arr.ctx()); |
| original_outputs[i] = &tmp; |
| outputs[i] = &out_arrs[i][output_i].arr; |
| req[i] = kAddTo; |
| } |
| PrintVerifyMsg(in_arr, out_arrs[0][output_i]); |
| Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, |
| outputs, req, dispatch, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| VerifyAddRequest(inputs, original_outputs, outputs, verify_fn); |
| } |
| } |
| } |
| } |
| } |
| |
| void TestConcatOp(const OpAttrs &attrs, VerifyFunc verify_fn, |
| bool backwards = false) { |
| std::vector<NDArray*> inputs(attrs.num_inputs); |
| std::vector<NDArray*> outputs(attrs.num_outputs); |
| std::vector<OpReqType> req(attrs.num_outputs); |
| std::vector<DispatchMode> dispatches = attrs.dispatches; |
| |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(); |
| |
| // concat backwards uses scaled up inputs |
| if (backwards) { |
| std::string str_dim = const_cast<OpAttrs&>(attrs).attrs.dict["dim"]; |
| int dim = std::stoi(str_dim); |
| in_arrs = GetTestInputArrays(ArrayTypes::All, false, attrs.num_outputs, dim); |
| } |
| |
| for (auto &in_arr : in_arrs) { |
| for (auto &dispatch : dispatches) { |
| std::vector<std::vector<NDArrayAttrs>> out_arrs(attrs.num_outputs); |
| |
| std::string str_dim = const_cast<OpAttrs&>(attrs).attrs.dict["dim"]; |
| int dim = std::stoi(str_dim); |
| if (dim >= in_arr.arr.shape().ndim()) |
| continue; |
| float scale = backwards ? 1 / static_cast<float>(attrs.num_outputs) : |
| static_cast<float>(attrs.num_inputs); |
| |
| std::vector<float> scale_vector(in_arr.arr.shape().ndim()); |
| for (int i = 0; i < in_arr.arr.shape().ndim(); i++) |
| scale_vector[i] = 1; |
| scale_vector[dim] = scale; |
| for (int i = 0; i < attrs.num_outputs; i++) |
| out_arrs[i] = GetTestOutputArrays(in_arr.arr.shape(), pds, scale_vector); |
| |
| for (int i = 0; i < attrs.num_inputs; i++) |
| inputs[i] = &in_arr.arr; |
| |
| for (size_t output_i = 0; output_i < out_arrs[0].size(); output_i++) { |
| for (int i = 0; i < attrs.num_outputs; i++) { |
| req[i] = kWriteTo; |
| outputs[i] = &out_arrs[i][output_i].arr; |
| } |
| PrintVerifyMsg(in_arr, out_arrs[0][output_i]); |
| Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, |
| outputs, req, dispatch, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| verify_fn(inputs, outputs); |
| } |
| } |
| } |
| } |
| |
| // compares output of fcompute with fcomputex |
| void TestOpEx(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs) { |
| std::vector<NDArray*> inputs(forward_attrs.num_inputs); |
| std::vector<NDArray*> outputs(forward_attrs.num_outputs); |
| std::vector<NDArray*> ex_outputs(forward_attrs.num_outputs); |
| |
| std::vector<NDArray*> backwards_input(backwards_attrs.num_inputs); |
| std::vector<NDArray*> backwards_outputs(backwards_attrs.num_outputs); |
| std::vector<NDArray*> backwards_ex_outputs(backwards_attrs.num_outputs); |
| |
| |
| std::vector<OpReqType> req(forward_attrs.num_outputs); |
| std::vector<OpReqType> back_req(backwards_attrs.num_outputs); |
| |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(forward_attrs.input_types, true); |
| std::vector<std::vector<NDArrayAttrs>> out_arrs(forward_attrs.num_outputs); |
| std::vector<std::vector<NDArrayAttrs>> ex_out_arrs(forward_attrs.num_outputs); |
| |
| if (forward_attrs.requests.find(OpReqType::kWriteTo) != forward_attrs.requests.end()) { |
| for (int i1 = 0; i1 < in_arrs.size(); i1++) { |
| auto in_arr = in_arrs[i1]; |
| |
| // TODO(alex): (MXNET-845) Remove when MKLDNN supports other dims |
| if (in_arr.arr.shape().ndim() != 4) |
| continue; |
| |
| for (int i = 0; i < forward_attrs.num_outputs; i++) { |
| out_arrs[i] = |
| GetTestOutputArrays(in_arr.arr.shape(), pds, {1}, forward_attrs.output_types); |
| ex_out_arrs[i] = |
| GetTestOutputArrays(in_arr.arr.shape(), pds, {1}, forward_attrs.output_types); |
| } |
| |
| for (int i = 0; i < forward_attrs.num_inputs; i++) |
| inputs[i] = &in_arr.arr; |
| |
| for (size_t output_i = 0; output_i < out_arrs[0].size(); output_i++) { |
| if (out_arrs[0][output_i].arr.IsMKLDNNData()) |
| continue; |
| |
| for (int i = 0; i < forward_attrs.num_outputs; i++) { |
| req[i] = kWriteTo; |
| outputs[i] = &out_arrs[i][output_i].arr; |
| ex_outputs[i] = &ex_out_arrs[i][output_i].arr; |
| } |
| Imperative::Get()->set_is_training(true); |
| |
| PrintVerifyMsg(in_arr, out_arrs[0][output_i]); |
| Imperative::Get()->InvokeOp( |
| Context(), forward_attrs.attrs, inputs, outputs, req, |
| DispatchMode::kFCompute, mxnet::OpStatePtr()); |
| Imperative::Get()->InvokeOp( |
| Context(), forward_attrs.attrs, inputs, ex_outputs, req, |
| DispatchMode::kFComputeEx, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| AssertEqual(outputs, ex_outputs); |
| |
| // backwards test performed same time since output needed |
| backwards_input[0] = outputs[0]; // output grad |
| backwards_input[1] = inputs[0]; // input |
| backwards_input[2] = outputs[1]; // out norm |
| |
| auto tmp_output = GetTestInputArrays(forward_attrs.input_types, true)[i1]; |
| backwards_outputs[0] = &tmp_output.arr; |
| |
| auto tmp_output2 = GetTestInputArrays(forward_attrs.input_types, true)[i1]; |
| backwards_ex_outputs[0] = &tmp_output2.arr; |
| |
| for (int i = 0; i < backwards_attrs.num_outputs; i++) |
| back_req[i] = kWriteTo; |
| |
| std::cout << "Backwards: "; |
| PrintVerifyMsg(out_arrs[0][output_i], tmp_output); |
| Imperative::Get()->InvokeOp( |
| Context(), backwards_attrs.attrs, backwards_input, backwards_outputs, |
| back_req, DispatchMode::kFCompute, mxnet::OpStatePtr()); |
| Imperative::Get()->InvokeOp( |
| Context(), backwards_attrs.attrs, backwards_input, backwards_ex_outputs, |
| back_req, DispatchMode::kFComputeEx, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| AssertEqual(backwards_outputs, backwards_ex_outputs); |
| } |
| } |
| } |
| } |
| |
| int CalculateWidthPoolOutput(int width, int kernel, int padding, int stride) { |
| return (width - kernel + 2 * padding) / stride + 1; |
| } |
| |
| void TestPoolingOp(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs) { |
| std::vector<NDArray*> inputs(forward_attrs.num_inputs); |
| std::vector<NDArray*> outputs(forward_attrs.num_outputs); |
| std::vector<NDArray*> ex_outputs(forward_attrs.num_outputs); |
| |
| std::vector<NDArray*> backwards_input(backwards_attrs.num_inputs); |
| std::vector<NDArray*> backwards_outputs(backwards_attrs.num_outputs); |
| std::vector<NDArray*> backwards_ex_outputs(backwards_attrs.num_outputs); |
| |
| |
| std::vector<OpReqType> req(forward_attrs.num_outputs); |
| std::vector<OpReqType> back_req(backwards_attrs.num_outputs); |
| std::vector<DispatchMode> dispatches = forward_attrs.dispatches; |
| |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| mxnet::op::PoolingParam param; |
| param.Init(forward_attrs.attrs.dict); |
| TShape kernel = param.kernel; |
| TShape padding = param.pad; |
| TShape stride = param.stride; |
| |
| std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(); |
| std::vector<std::vector<NDArrayAttrs>> out_arrs(forward_attrs.num_outputs); |
| std::vector<std::vector<NDArrayAttrs>> ex_out_arrs(forward_attrs.num_outputs); |
| |
| for (int i1 = 0; i1 < in_arrs.size(); i1++) { |
| auto in_arr = in_arrs[i1]; |
| |
| // can only pool only 3D and 4D inputs |
| TShape input_shape = in_arr.arr.shape(); |
| if (input_shape.ndim() != kernel.ndim() + 2) |
| continue; |
| // cannot pool if ndarray and mkldnn memory have different ndim |
| if (in_arr.arr.IsView() || in_arr.arr.GetMKLDNNData()->get_primitive_desc().desc().data.ndims |
| != in_arr.arr.shape().ndim()) |
| continue; |
| std::vector<float> scale_vector(in_arr.arr.shape().ndim()); |
| for (int i = 0; i < in_arr.arr.shape().ndim(); i++) { |
| if (i < 2) |
| scale_vector[i] = 1; |
| else |
| scale_vector[i] = CalculateWidthPoolOutput( |
| input_shape[i], kernel[i-2], padding[i-2], stride[i-2]) / |
| static_cast<float>(input_shape[i]); |
| } |
| for (int i = 0; i < forward_attrs.num_outputs; i++) { |
| out_arrs[i] = GetTestOutputArrays(in_arr.arr.shape(), pds, scale_vector); |
| ex_out_arrs[i] = GetTestOutputArrays(in_arr.arr.shape(), pds, scale_vector); |
| } |
| |
| for (int i = 0; i < forward_attrs.num_inputs; i++) |
| inputs[i] = &in_arr.arr; |
| |
| for (size_t output_i = 0; output_i < out_arrs[0].size(); output_i++) { |
| for (int i = 0; i < forward_attrs.num_outputs; i++) { |
| req[i] = kWriteTo; |
| outputs[i] = &out_arrs[i][output_i].arr; |
| ex_outputs[i] = &ex_out_arrs[i][output_i].arr; |
| } |
| Imperative::Get()->set_is_training(true); |
| |
| PrintVerifyMsg(in_arr, out_arrs[0][output_i]); |
| Imperative::Get()->InvokeOp(Context(), forward_attrs.attrs, inputs, |
| outputs, req, DispatchMode::kFCompute, mxnet::OpStatePtr()); |
| Imperative::Get()->InvokeOp(Context(), forward_attrs.attrs, inputs, |
| ex_outputs, req, DispatchMode::kFComputeEx, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| VerifyCopyResult(outputs, ex_outputs); |
| |
| |
| // backwards test performed same time since output needed |
| if (backwards_attrs.num_inputs == 3) { |
| backwards_input[0] = outputs[0]; // output grad |
| backwards_input[1] = inputs[0]; // input |
| backwards_input[2] = outputs[0]; // output |
| } else if (backwards_attrs.num_inputs == 5) { |
| backwards_input[0] = outputs[0]; // output grad |
| backwards_input[1] = outputs[0]; // workspace grad |
| backwards_input[2] = inputs[0]; // input |
| backwards_input[3] = outputs[0]; // output |
| backwards_input[4] = ex_outputs[1]; // workspace |
| } |
| |
| // needs copies of inputs since they be reused in next iteration |
| // cannot use Copy method since we need to maintain MKLDNN format |
| auto tmp_output = GetTestInputArrays()[i1]; |
| auto tmp_output2 = GetTestInputArrays()[i1]; |
| backwards_outputs[0] = &tmp_output.arr; |
| backwards_ex_outputs[0] = &tmp_output2.arr; |
| back_req[0] = kWriteTo; |
| std::cout << "Backwards: "; |
| PrintVerifyMsg(out_arrs[0][output_i], tmp_output); |
| Imperative::Get()->InvokeOp( |
| Context(), backwards_attrs.attrs, backwards_input, backwards_outputs, |
| back_req, DispatchMode::kFCompute, mxnet::OpStatePtr()); |
| Imperative::Get()->InvokeOp( |
| Context(), backwards_attrs.attrs, backwards_input, backwards_ex_outputs, |
| back_req, DispatchMode::kFComputeEx, mxnet::OpStatePtr()); |
| Engine::Get()->WaitForAll(); |
| VerifyCopyResult(backwards_outputs, backwards_ex_outputs); |
| } |
| } |
| } |
| |
| TEST(IMPERATIVE, CopyOp) { |
| OpAttrs attrs = GetCopyOp(); |
| TestOp(attrs, VerifyCopyResult); |
| } |
| |
| TEST(IMPERATIVE, CopyBackwardsOp) { |
| OpAttrs attrs = GetCopyBackwardsOp(); |
| TestOp(attrs, VerifyCopyResult); |
| } |
| |
| TEST(IMPERATIVE, ActOp) { |
| OpAttrs attrs = GetReluOp(); |
| TestOp(attrs, VerifyActResult); |
| } |
| |
| TEST(IMPERATIVE, ActBackwardsOp) { |
| OpAttrs attrs = GetReluBackwardsOp(); |
| TestOp(attrs, VerifyActBackwardsResult); |
| } |
| |
| TEST(IMPERATIVE, SumOp) { |
| OpAttrs attrs = GetSumOp(); |
| TestOp(attrs, VerifySumResult); |
| } |
| |
| TEST(IMPERATIVE, SumBackwardsOp) { |
| OpAttrs attrs = GetSumBackwardsOp(); |
| TestOp(attrs, VerifySumBackwardsResult); |
| } |
| |
| TEST(IMPERATIVE, ConcatOp) { |
| for (int num_inputs = 2; num_inputs < 4; num_inputs++) { |
| for (int dim = 0; dim < 5; dim++) { |
| OpAttrs attrs = GetConcatOp(num_inputs, dim); |
| TestConcatOp(attrs, VerifyConcatResult); |
| } |
| } |
| } |
| |
| TEST(IMPERATIVE, ConcatBackwardsOp) { |
| for (int num_inputs = 2; num_inputs < 4; num_inputs++) { |
| for (int dim = 0; dim < 5; dim++) { |
| OpAttrs attrs = GetConcatBackwardsOp(num_inputs, dim); |
| TestConcatOp(attrs, VerifyConcatBackwardsResult, true); |
| } |
| } |
| } |
| |
| TEST(IMPERATIVE, LRNOp) { |
| OpAttrs forward_attrs = GetLRNOp(); |
| OpAttrs backwards_attrs = GetLRNBackwardsOp(); |
| TestOpEx(forward_attrs, backwards_attrs); |
| } |
| |
| TEST(IMPERATIVE, PoolingOp) { |
| for (int dim = 2; dim < 4; dim++) { |
| for (int kernel = 1; kernel < 4; kernel++) { |
| for (int stride = 1; stride < 3; stride++) { |
| for (int pad = 0; pad < 2; pad++) { |
| if (kernel / 2. < pad) |
| continue; |
| OpAttrs forward_attrs = GetPoolingOp(kernel, dim, stride, pad); |
| OpAttrs backwards_attrs = GetPoolingBackwardsOp(kernel, dim, stride, pad); |
| TestPoolingOp(forward_attrs, backwards_attrs); |
| } |
| } |
| } |
| } |
| } |
| |
| TEST(MKLDNN_BASE, MKLDNNSum) { |
| std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(); |
| std::vector<NDArrayAttrs> in_arrs2 = GetTestInputArrays(ArrayTypes::All, true); |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| |
| for (int i = 0; i < in_arrs.size(); i++) { |
| auto in_arr = in_arrs[i]; |
| auto in_arr2 = in_arrs2[i]; |
| if (!SupportMKLDNN(in_arr.arr)) |
| continue; |
| if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { |
| continue; |
| } |
| std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); |
| for (auto &out_arr : out_arrs) { |
| auto in_mem1 = in_arr.arr.GetMKLDNNData(); |
| auto in_mem2 = in_arr2.arr.GetMKLDNNData(); |
| if (out_arr.arr.IsView()) |
| continue; |
| auto out_mem = out_arr.arr.GetMKLDNNData(); |
| PrintVerifyMsg(in_arr, in_arr); |
| op::MKLDNNSum(*in_mem1, *in_mem2, *out_mem); |
| MKLDNNStream::Get()->Submit(); |
| VerifySumResult({&in_arr.arr, &in_arr2.arr}, {&out_arr.arr}); |
| } |
| } |
| |
| // in place |
| for (int i = 0; i < in_arrs.size(); i++) { |
| auto in_arr = in_arrs[i]; |
| auto in_arr2 = in_arrs2[i]; |
| if (!SupportMKLDNN(in_arr.arr)) |
| continue; |
| if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { |
| continue; |
| } |
| auto input_mem = in_arr.arr.GetMKLDNNData(); |
| auto input_mem2 = in_arr2.arr.GetMKLDNNData(); |
| NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy"); |
| orig_arr.arr.WaitToRead(); |
| PrintVerifyMsg(orig_arr, in_arr); |
| InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc()); |
| orig_arr.arr.CopyFrom(*input_mem); |
| op::MKLDNNSum(*input_mem, *input_mem2, *input_mem); |
| MKLDNNStream::Get()->Submit(); |
| VerifySumResult({&orig_arr.arr, &in_arr2.arr}, {&in_arr.arr}); |
| } |
| } |
| |
| TEST(MKLDNN_BASE, CreateMKLDNNMem) { |
| std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(); |
| std::vector<NDArrayAttrs> in_arrs2 = GetTestInputArrays(ArrayTypes::All, true); |
| TestArrayShapes tas = GetTestArrayShapes(); |
| std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; |
| MKLDNNStream *stream = MKLDNNStream::Get(); |
| |
| // kWriteTo |
| for (int i = 0; i < in_arrs.size(); i++) { |
| auto in_arr = in_arrs[i]; |
| auto in_arr2 = in_arrs2[i]; |
| if (!SupportMKLDNN(in_arr.arr)) |
| continue; |
| if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { |
| continue; |
| } |
| std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); |
| for (auto &out_arr : out_arrs) { |
| auto in_mem = in_arr.arr.GetMKLDNNData(); |
| auto in_mem2 = in_arr2.arr.GetMKLDNNData(); |
| NDArray orig_output = out_arr.arr.Copy(out_arr.arr.ctx()); |
| orig_output.WaitToRead(); |
| PrintVerifyMsg(in_arr, out_arr); |
| auto out_mem = out_arr.arr.GetMKLDNNData(); |
| auto output_mem_t = CreateMKLDNNMem(out_arr.arr, out_mem->get_primitive_desc(), kWriteTo); |
| op::MKLDNNSum(*in_mem, *in_mem2, *output_mem_t.second); |
| CommitOutput(out_arr.arr, output_mem_t); |
| stream->Submit(); |
| VerifySumResult({&in_arr.arr, &in_arr2.arr}, {&out_arr.arr}); |
| } |
| } |
| |
| // kWriteInPlace |
| for (int i = 0; i < in_arrs.size(); i++) { |
| auto in_arr = in_arrs[i]; |
| auto in_arr2 = in_arrs2[i]; |
| if (!SupportMKLDNN(in_arr.arr)) |
| continue; |
| if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { |
| continue; |
| } |
| auto input_mem = in_arr.arr.GetMKLDNNData(); |
| auto input_mem2 = in_arr2.arr.GetMKLDNNData(); |
| NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy"); |
| orig_arr.arr.WaitToRead(); |
| PrintVerifyMsg(orig_arr, in_arr); |
| InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc()); |
| orig_arr.arr.CopyFrom(*input_mem); |
| auto output_mem_t = CreateMKLDNNMem(in_arr.arr, |
| input_mem->get_primitive_desc(), kWriteInplace, &in_arr.arr); |
| op::MKLDNNSum(*input_mem, *input_mem2, *output_mem_t.second); |
| CommitOutput(in_arr.arr, output_mem_t); |
| stream->Submit(); |
| VerifySumResult({&orig_arr.arr, &in_arr2.arr}, {&in_arr.arr}); |
| } |
| |
| // kAddTo |
| for (int i = 0; i < in_arrs.size(); i++) { |
| auto in_arr = in_arrs[i]; |
| auto in_arr2 = in_arrs2[i]; |
| if (!SupportMKLDNN(in_arr.arr)) |
| continue; |
| if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { |
| continue; |
| } |
| std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); |
| for (auto &out_arr : out_arrs) { |
| auto in_mem = in_arr.arr.GetMKLDNNData(); |
| auto in_mem2 = in_arr2.arr.GetMKLDNNData(); |
| NDArray orig_output = out_arr.arr.Copy(out_arr.arr.ctx()); |
| orig_output.WaitToRead(); |
| PrintVerifyMsg(in_arr, out_arr); |
| auto out_mem = out_arr.arr.GetMKLDNNData(); |
| auto output_mem_t = CreateMKLDNNMem(out_arr.arr, out_mem->get_primitive_desc(), kAddTo); |
| op::MKLDNNSum(*in_mem, *in_mem2, *output_mem_t.second); |
| CommitOutput(out_arr.arr, output_mem_t); |
| stream->Submit(); |
| VerifyAddRequest( |
| {&in_arr.arr, &in_arr2.arr}, {&orig_output}, {&out_arr.arr}, VerifySumResult); |
| } |
| } |
| |
| // kNullOp |
| for (int i = 0; i < in_arrs.size(); i++) { |
| auto in_arr = in_arrs[i]; |
| auto in_arr2 = in_arrs2[i]; |
| if (!SupportMKLDNN(in_arr.arr)) |
| continue; |
| if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { |
| continue; |
| } |
| auto input_mem = in_arr.arr.GetMKLDNNData(); |
| auto input_mem2 = in_arr2.arr.GetMKLDNNData(); |
| NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy"); |
| orig_arr.arr.WaitToRead(); |
| PrintVerifyMsg(orig_arr, in_arr); |
| InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc()); |
| orig_arr.arr.CopyFrom(*input_mem); |
| auto output_mem_t = CreateMKLDNNMem(in_arr.arr, input_mem->get_primitive_desc(), kNullOp); |
| op::MKLDNNSum(*input_mem, *input_mem2, *output_mem_t.second); |
| CommitOutput(in_arr.arr, output_mem_t); |
| stream->Submit(); |
| // original and input should be the same since noop |
| VerifyCopyResult({&orig_arr.arr}, {&in_arr.arr}); |
| } |
| } |
| |
| #endif |