| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| from __future__ import division |
| |
| import unittest |
| import math |
| import numpy as np |
| |
| from singa import singa_wrap as singa_api |
| from singa import tensor |
| from cuda_helper import gpu_dev, cpu_dev |
| |
| |
| def _np_bn_training(x, scale, bias, rm, rv, momentum=0.1, e=1e-5): |
| channel = x.shape[1] |
| np.testing.assert_array_almost_equal(scale.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(bias.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rm.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rv.shape, (1, channel, 1, 1)) |
| |
| batch_m = x.mean(axis=(0, 2, 3), keepdims=True) |
| batch_v = x.var(axis=(0, 2, 3), keepdims=True) |
| |
| x_norm = (x - batch_m) / np.sqrt(batch_v + e) |
| y_norm = x_norm * scale + bias |
| |
| # https://arxiv.org/pdf/1502.03167.pdf |
| s = list(x.shape) |
| s[1] = 1 |
| batch_v_unbiased = np.prod(s) * batch_v / (np.prod(s) - 1) |
| |
| rm = momentum * batch_m + (1 - momentum) * rm |
| rv = momentum * batch_v_unbiased + (1 - momentum) * rv |
| |
| # https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnBatchNormalizationForwardTraining |
| resultSaveInvVariance = 1 / np.sqrt(batch_v) |
| return y_norm, rm, rv, batch_m, resultSaveInvVariance |
| |
| |
| def _np_bn_testing(x, scale, bias, rm, rv, momentum=0.1, e=1e-5): |
| channel = x.shape[1] |
| np.testing.assert_array_almost_equal(scale.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(bias.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rm.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rv.shape, (1, channel, 1, 1)) |
| return scale * (x - rm) / np.sqrt(rv + e) + bias |
| |
| |
| def _cTensor_to_pyTensor(cTensor): |
| new_t = tensor.Tensor() |
| new_t.data = cTensor |
| new_t.shape = tuple(new_t.data.shape()) |
| new_t.device = new_t.data.device() |
| new_t.dtype = new_t.data.data_type() |
| return new_t |
| |
| |
| def _ctensor_eq_ndarray(t1, np1): |
| d = t1.device() |
| t1.ToHost() |
| if t1.data_type() == singa_api.kInt: |
| np.testing.assert_array_almost_equal(t1.GetIntValue(t1.Size()), |
| np1.flatten()) |
| elif t1.data_type() == singa_api.kFloat32: |
| np.testing.assert_array_almost_equal(t1.GetFloatValue(t1.Size()), |
| np1.flatten()) |
| |
| if np1.dtype == np.float32: |
| np.testing.assert_equal(t1.data_type(), singa_api.kFloat32) |
| elif np1.dtype == np.int32: |
| np.testing.assert_equal(t1.data_type(), singa_api.kInt) |
| |
| np.testing.assert_array_almost_equal(t1.shape(), np1.shape) |
| t1.ToDevice(d) |
| |
| |
| def print_t(t1): |
| d = t1.device() |
| t1.ToHost() |
| if t1.data_type() == singa_api.kInt: |
| print(t1.GetIntValue(t1.Size())) |
| elif t1.data_type() == singa_api.kFloat32: |
| print(t1.GetFloatValue(t1.Size())) |
| t1.ToDevice(d) |
| |
| |
| class TestAPI(unittest.TestCase): |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_batchnorm_training_gpu(self): |
| dev = gpu_dev |
| |
| def _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.1): |
| # np api |
| (y_1, rm_1, rv_1, bm_1, bv_1) = _np_bn_training(x_0, |
| s_0, |
| b_0, |
| rm_0, |
| rv_0, |
| momentum=m_0) |
| |
| # singa api |
| rm_t = tensor.Tensor(device=dev, data=rm_0) |
| rv_t = tensor.Tensor(device=dev, data=rv_0) |
| hndl = singa_api.CudnnBatchNormHandle( |
| m_0, |
| tensor.Tensor(device=dev, data=x_0).data) |
| (y_2_c, bm_2_c, bv_2_c) = singa_api.GpuBatchNormForwardTraining( |
| hndl, |
| tensor.Tensor(device=dev, data=x_0).data, |
| tensor.Tensor(device=dev, data=s_0).data, |
| tensor.Tensor(device=dev, data=b_0).data, rm_t.data, rv_t.data) |
| |
| np.testing.assert_array_almost_equal( |
| y_1, tensor.to_numpy(_cTensor_to_pyTensor(y_2_c)), decimal=4) |
| np.testing.assert_array_almost_equal( |
| bm_1, tensor.to_numpy(_cTensor_to_pyTensor(bm_2_c))) |
| np.testing.assert_array_almost_equal(rm_1, tensor.to_numpy(rm_t)) |
| #print(bv_1) |
| #print(tensor.to_numpy(_cTensor_to_pyTensor(bv_2_c))) |
| np.testing.assert_array_almost_equal( |
| bv_1, tensor.to_numpy(_cTensor_to_pyTensor(bv_2_c)), decimal=3) |
| np.testing.assert_array_almost_equal(rv_1, |
| tensor.to_numpy(rv_t), |
| decimal=4) |
| return |
| |
| x_0 = np.array([1, 1, 1, 1, 2, 2, 2, 2, 10, 10, 10, 10, 20, 20, 20, 20], |
| dtype=np.float32).reshape((2, 2, 2, 2)) |
| s_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| b_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rm_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rv_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.0) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=1.0) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.2) |
| |
| c = 10 |
| x_0 = np.random.random((10, c, 20, 20)).astype(np.float32) |
| s_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| b_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rm_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rv_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.2) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_batchnorm_testing_gpu(self): |
| dev = gpu_dev |
| |
| def _run_testing(x_0, s_0, b_0, rm_0, rv_0, m_0=0.1): |
| # np api |
| y_1 = _np_bn_testing(x_0, s_0, b_0, rm_0, rv_0, momentum=m_0) |
| |
| # singa api |
| hndl = singa_api.CudnnBatchNormHandle( |
| m_0, |
| tensor.Tensor(device=dev, data=x_0).data) |
| y_2_c = singa_api.GpuBatchNormForwardInference( |
| hndl, |
| tensor.Tensor(device=dev, data=x_0).data, |
| tensor.Tensor(device=dev, data=s_0).data, |
| tensor.Tensor(device=dev, data=b_0).data, |
| tensor.Tensor(device=dev, data=rm_0).data, |
| tensor.Tensor(device=dev, data=rv_0).data) |
| #print(y_1) |
| #print(tensor.to_numpy(_cTensor_to_pyTensor(y_2_c))) |
| |
| np.testing.assert_array_almost_equal( |
| y_1, tensor.to_numpy(_cTensor_to_pyTensor(y_2_c)), decimal=3) |
| return |
| |
| x_0 = np.array([1, 1, 1, 1, 2, 2, 2, 2, 10, 10, 10, 10, 20, 20, 20, 20], |
| dtype=np.float32).reshape((2, 2, 2, 2)) |
| s_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| b_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rm_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rv_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| _run_testing(x_0, s_0, b_0, rm_0, rv_0, m_0=1.0) |
| c = 10 |
| x_0 = np.random.random((10, c, 20, 20)).astype(np.float32) |
| s_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| b_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rm_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rv_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| _run_testing(x_0, s_0, b_0, rm_0, rv_0, m_0=1.0) |
| |
| def _softmax_api_helper(self, dev): |
| |
| def _run_test(dev, org_shape, axis, aft_shape): |
| x_0 = np.random.random(org_shape).astype(np.float32) |
| x_0 = x_0 + 1000 |
| x0 = tensor.Tensor(device=dev, data=x_0) |
| |
| # test with axis |
| y0 = tensor._call_singa_func(singa_api.SoftMax, x0.data, axis) |
| |
| # test with numpy |
| x_0 = x_0.reshape(aft_shape) |
| x_0 = x_0 - np.max(x_0) |
| y1 = np.divide(np.exp(x_0), |
| np.sum(np.exp(x_0), axis=1).reshape(x_0.shape[0], |
| 1)) # 2d softmax |
| y1 = y1.reshape(org_shape) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y0), y1) |
| |
| _run_test(dev, [2, 2], 1, [2, 2]) |
| _run_test(dev, [2, 2], 0, [1, 4]) |
| _run_test(dev, [2, 2], -1, [2, 2]) |
| _run_test(dev, [2, 2], -2, [1, 4]) |
| _run_test(dev, [2, 2, 2], 2, [4, 2]) |
| _run_test(dev, [2, 2, 2], 1, [2, 4]) |
| _run_test(dev, [2, 2, 2], 0, [1, 8]) |
| _run_test(dev, [2, 2, 2], -1, [4, 2]) |
| _run_test(dev, [2, 2, 2], -2, [2, 4]) |
| _run_test(dev, [2, 2, 2], -3, [1, 8]) |
| _run_test(dev, [2, 2, 2, 2], 3, [8, 2]) |
| _run_test(dev, [2, 2, 2, 2], 2, [4, 4]) |
| _run_test(dev, [2, 2, 2, 2], 1, [2, 8]) |
| _run_test(dev, [2, 2, 2, 2], 0, [1, 16]) |
| _run_test(dev, [2, 2, 2, 2], -1, [8, 2]) |
| _run_test(dev, [2, 2, 2, 2], -2, [4, 4]) |
| _run_test(dev, [2, 2, 2, 2], -3, [2, 8]) |
| _run_test(dev, [2, 2, 2, 2], -4, [1, 16]) |
| |
| def test_softmax_api_cpu(self): |
| self._softmax_api_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_softmax_api_gpu(self): |
| self._softmax_api_helper(gpu_dev) |
| |
| def _tensor_arithmetic_op_broadcast_helper(self, dev): |
| |
| def _run_test(dev, singa_op, np_op, s1, s2): |
| x_0 = np.random.random(s1).astype(np.float32) |
| y_0 = np.random.random(s2).astype(np.float32) |
| x0 = tensor.Tensor(device=dev, data=x_0) |
| y0 = tensor.Tensor(device=dev, data=y_0) |
| |
| z0 = tensor._call_singa_func(singa_op, x0.data, y0.data) |
| z0.to_host() |
| np.testing.assert_array_almost_equal(tensor.to_numpy(z0), |
| np_op(x_0, y_0)) |
| return |
| |
| for s_op, n_op in zip([ |
| singa_api.Pow, |
| singa_api.__add__, |
| singa_api.__div__, |
| singa_api.__sub__, |
| singa_api.__mul__, |
| ], [np.power, np.add, np.divide, np.subtract, np.multiply]): |
| _run_test(dev, s_op, n_op, [6], [1]) |
| _run_test(dev, s_op, n_op, [2, 3], [2, 3]) |
| _run_test(dev, s_op, n_op, [3, 2], [1]) |
| _run_test(dev, s_op, n_op, [3, 1, 2], [3, 1, 1]) |
| _run_test(dev, s_op, n_op, [2, 3, 4, 5], [5]) |
| _run_test(dev, s_op, n_op, [2, 3, 4, 5], [1, 1, 1]) |
| _run_test(dev, s_op, n_op, [2, 3, 4, 5], [1, 1, 1, 1]) |
| _run_test(dev, s_op, n_op, [2, 3, 4, 5], [4, 5]) # 45+2345=2345 |
| _run_test(dev, s_op, n_op, [3, 1, 2, 1], [3, 1, 2]) |
| _run_test(dev, s_op, n_op, [4, 5], [2, 3, 4, 5]) # 45+2345=2345 |
| _run_test(dev, s_op, n_op, [1, 4, 5], [2, 3, 1, 1]) # 145+2311=2345 |
| _run_test(dev, s_op, n_op, [3, 4, 5], [2, 1, 1, 1]) # 345+2111=2345 |
| |
| def test_tensor_arithmetic_op_broadcast_cpu(self): |
| self._tensor_arithmetic_op_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_tensor_arithmetic_op_broadcast_gpu(self): |
| self._tensor_arithmetic_op_broadcast_helper(gpu_dev) |
| |
| def _transpose_and_arithmetic_op_broadcast_helper(self, dev): |
| |
| def _test(s1, s2, axis1, axis2, s3, s_op, n_op, dev): |
| x_0 = np.random.random(s1).astype(np.float32) |
| y_0 = np.random.random(s2).astype(np.float32) |
| |
| x0 = tensor.Tensor(device=dev, data=x_0) |
| y0 = tensor.Tensor(device=dev, data=y_0) |
| |
| x1 = x0.transpose(axis1) |
| y1 = y0.transpose(axis2) |
| |
| z0 = tensor._call_singa_func(s_op, x1.data, y1.data) |
| z0.to_host() |
| |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(z0), |
| n_op(x_0.transpose(axis1), y_0.transpose(axis2))) |
| np.testing.assert_array_almost_equal(z0.shape, s3) |
| return |
| |
| for s_op, n_op in zip([ |
| singa_api.Pow, |
| singa_api.__add__, |
| singa_api.__div__, |
| singa_api.__sub__, |
| singa_api.__mul__, |
| ], [np.power, np.add, np.divide, np.subtract, np.multiply]): |
| s1 = [1, 5, 1, 3] |
| s2 = [3, 1, 1, 4] |
| axis1 = [3, 2, 1, 0] # 3121 |
| axis2 = [1, 0, 2, 3] # 1314 |
| s3 = [3, 3, 5, 4] |
| _test(s1, s2, axis1, axis2, s3, s_op, n_op, dev) |
| |
| s1 = [1, 5, 1] |
| s2 = [1, 3, 2] |
| axis1 = [2, 1, 0] # 151 |
| axis2 = [1, 0, 2] # 312 |
| s3 = [3, 5, 2] |
| _test(s1, s2, axis1, axis2, s3, s_op, n_op, dev) |
| |
| s1 = [5, 1] |
| s2 = [1, 3] |
| axis1 = [1, 0] # 15 |
| axis2 = [1, 0] # 31 |
| s3 = [3, 5] |
| _test(s1, s2, axis1, axis2, s3, s_op, n_op, dev) |
| |
| def test_transpose_and_arithmetic_op_broadcast_cpu(self): |
| self._transpose_and_arithmetic_op_broadcast_helper(cpu_dev) |
| |
| def _erf(self, dev=cpu_dev): |
| np1 = np.random.random((2, 3)).astype(np.float32) |
| |
| x1 = tensor.from_numpy(np1) |
| x1.to_device(dev) |
| y1 = tensor.from_raw_tensor(singa_api.Erf(x1.data)) |
| |
| # from scipy.special import erf |
| # np.testing.assert_array_almost_equal(erf(np1), tensor.to_numpy(y1)) |
| |
| def test_erf_cpu(self): |
| self._erf(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_transpose_and_arithmetic_op_broadcast_gpu(self): |
| self._transpose_and_arithmetic_op_broadcast_helper(gpu_dev) |
| |
| def test_batchnorm_training_dnnl(self): |
| dev = cpu_dev |
| |
| def _np_bn_training(x, scale, bias, rm, rv, momentum=0.1, e=1e-5): |
| channel = x.shape[1] |
| np.testing.assert_array_almost_equal(scale.shape, |
| (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(bias.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rm.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rv.shape, (1, channel, 1, 1)) |
| |
| batch_m = x.mean(axis=(0, 2, 3), keepdims=True) |
| batch_v = x.var(axis=(0, 2, 3), keepdims=True) |
| |
| x_norm = (x - batch_m) / np.sqrt(batch_v + e) |
| y_norm = x_norm * scale + bias |
| |
| # https://arxiv.org/pdf/1502.03167.pdf |
| s = list(x.shape) |
| s[1] = 1 |
| batch_v_unbiased = np.prod(s) * batch_v / (np.prod(s) - 1) |
| |
| rm = momentum * batch_m + (1 - momentum) * rm |
| rv = momentum * batch_v_unbiased + (1 - momentum) * rv |
| |
| # https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnBatchNormalizationForwardTraining |
| # this value is useful for bwd computation |
| resultSaveInvVariance = 1 / np.sqrt(batch_v) |
| return y_norm, rm, rv, batch_m, resultSaveInvVariance |
| |
| def _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.1): |
| # np api |
| (y_1, rm_1, rv_1, bm_1, bv_1) = _np_bn_training(x_0, |
| s_0, |
| b_0, |
| rm_0, |
| rv_0, |
| momentum=m_0) |
| |
| # singa api |
| hndl = singa_api.BatchNormHandle( |
| m_0, |
| tensor.Tensor(device=dev, data=x_0).data) |
| (y_2_c, bm_2_c, bv_2_c) = singa_api.CpuBatchNormForwardTraining( |
| hndl, |
| tensor.Tensor(device=dev, data=x_0).data, |
| tensor.Tensor(device=dev, data=s_0).data, |
| tensor.Tensor(device=dev, data=b_0).data, |
| tensor.Tensor(device=dev, data=rm_0).data, |
| tensor.Tensor(device=dev, data=rv_0).data) |
| |
| np.testing.assert_array_almost_equal( |
| y_1, tensor.to_numpy(_cTensor_to_pyTensor(y_2_c)), decimal=5) |
| np.testing.assert_array_almost_equal( |
| bm_1, tensor.to_numpy(_cTensor_to_pyTensor(bm_2_c)), decimal=5) |
| #print(bv_1) |
| #print(tensor.to_numpy(_cTensor_to_pyTensor(bv_2_c))) |
| #np.testing.assert_array_almost_equal( |
| # bv_1, tensor.to_numpy(_cTensor_to_pyTensor(bv_2_c)), decimal=3) |
| return |
| |
| x_0 = np.array([1, 1, 1, 1, 2, 2, 2, 2, 10, 10, 10, 10, 20, 20, 20, 20], |
| dtype=np.float32).reshape((2, 2, 2, 2)) |
| s_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| b_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rm_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rv_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=1.0) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.0) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.2) |
| |
| c = 10 |
| x_0 = np.random.random((10, c, 20, 20)).astype(np.float32) |
| s_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| b_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rm_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rv_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| _run_training(x_0, s_0, b_0, rm_0, rv_0, m_0=0.2) |
| |
| def test_batchnorm_testing_dnnl(self): |
| dev = cpu_dev |
| |
| def _np_bn_testing(x, scale, bias, rm, rv, momentum=0.1, e=1e-5): |
| channel = x.shape[1] |
| np.testing.assert_array_almost_equal(scale.shape, |
| (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(bias.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rm.shape, (1, channel, 1, 1)) |
| np.testing.assert_array_almost_equal(rv.shape, (1, channel, 1, 1)) |
| return scale * (x - rm) / np.sqrt(rv + e) + bias |
| |
| def _run_testing(x_0, s_0, b_0, rm_0, rv_0, m_0=0.1): |
| # np api |
| y_1 = _np_bn_testing(x_0, s_0, b_0, rm_0, rv_0, momentum=m_0) |
| |
| # singa api |
| hndl = singa_api.BatchNormHandle( |
| m_0, |
| tensor.Tensor(device=dev, data=x_0).data) |
| y_2_c = singa_api.CpuBatchNormForwardInference( |
| hndl, |
| tensor.Tensor(device=dev, data=x_0).data, |
| tensor.Tensor(device=dev, data=s_0).data, |
| tensor.Tensor(device=dev, data=b_0).data, |
| tensor.Tensor(device=dev, data=rm_0).data, |
| tensor.Tensor(device=dev, data=rv_0).data) |
| #print(y_1) |
| #print(tensor.to_numpy(_cTensor_to_pyTensor(y_2_c))) |
| |
| np.testing.assert_array_almost_equal( |
| y_1, tensor.to_numpy(_cTensor_to_pyTensor(y_2_c)), decimal=5) |
| return |
| |
| x_0 = np.array([1, 1, 1, 1, 2, 2, 2, 2, 10, 10, 10, 10, 20, 20, 20, 20], |
| dtype=np.float32).reshape((2, 2, 2, 2)) |
| s_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| b_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rm_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| rv_0 = np.array([1, 10], dtype=np.float32).reshape((1, 2, 1, 1)) |
| _run_testing(x_0, s_0, b_0, rm_0, rv_0, m_0=1.0) |
| c = 10 |
| x_0 = np.random.random((10, c, 20, 20)).astype(np.float32) |
| s_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| b_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rm_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| rv_0 = np.random.random((1, c, 1, 1)).astype(np.float32) |
| _run_testing(x_0, s_0, b_0, rm_0, rv_0, m_0=1.0) |
| |
| def test_batchnorm_backward_dnnl(self): |
| dev = cpu_dev |
| N = 1 |
| C = 3 |
| H = 2 |
| W = 2 |
| |
| data_shape = [N, C, H, W] |
| param_shape = [1, C, 1, 1] |
| data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] |
| |
| x_0 = np.array(data, dtype=np.float32).reshape(data_shape) |
| y_0 = np.array(data, dtype=np.float32).reshape(data_shape) |
| dy_0 = np.array(data, dtype=np.float32).reshape(data_shape) |
| scale_0 = np.array([1] * C, dtype=np.float32).reshape(param_shape) |
| bias_0 = np.array([0] * C, dtype=np.float32).reshape(param_shape) |
| |
| mean_0 = x_0.mean(axis=(0, 2, 3), keepdims=True) |
| var_0 = x_0.var(axis=(0, 2, 3), keepdims=True) |
| |
| hndl = singa_api.BatchNormHandle( |
| 0.1, |
| tensor.Tensor(device=dev, data=x_0).data) |
| (dx_2_c, _, _) = singa_api.CpuBatchNormBackwardx( |
| hndl, |
| tensor.Tensor(device=dev, data=y_0).data, |
| tensor.Tensor(device=dev, data=dy_0).data, |
| tensor.Tensor(device=dev, data=x_0).data, |
| tensor.Tensor(device=dev, data=scale_0).data, |
| tensor.Tensor(device=dev, data=bias_0).data, |
| tensor.Tensor(device=dev, data=mean_0).data, |
| tensor.Tensor(device=dev, data=var_0).data, |
| ) |
| |
| dx_truth = np.array([[[[-1.0769e-05, -3.5985e-06], |
| [3.5985e-06, 1.0769e-05]], |
| [[-1.0769e-05, -3.5985e-06], |
| [3.5985e-06, 1.0769e-05]], |
| [[-1.0769e-05, -3.5985e-06], |
| [3.5985e-06, 1.0769e-05]]]]) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(dx_2_c)), dx_truth) |
| |
| return |
| |
| def test_softmax_api_dnnl_backend(self): |
| dev = cpu_dev |
| |
| def _run_test(org_shape, axis, aft_shape): |
| x_0 = np.random.random(org_shape).astype(np.float32) |
| x_0 = x_0 + 1000 |
| x0 = tensor.Tensor(device=dev, data=x_0) |
| |
| # test with axis |
| y0 = tensor._call_singa_func(singa_api.SoftMax, x0.data, axis) |
| |
| # test with numpy |
| x_0 = x_0.reshape(aft_shape) |
| x_0 = x_0 - np.max(x_0) |
| y1 = np.divide(np.exp(x_0), |
| np.sum(np.exp(x_0), axis=1).reshape(x_0.shape[0], |
| 1)) # 2d softmax |
| y1 = y1.reshape(org_shape) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y0), y1) |
| |
| _run_test([2, 2], 1, [2, 2]) |
| _run_test([2, 2], 0, [1, 4]) |
| _run_test([2, 2], -1, [2, 2]) |
| _run_test([2, 2], -2, [1, 4]) |
| |
| _run_test([2, 2, 2], 2, [4, 2]) |
| _run_test([2, 2, 2], 1, [2, 4]) |
| _run_test([2, 2, 2], 0, [1, 8]) |
| _run_test([2, 2, 2], -1, [4, 2]) |
| _run_test([2, 2, 2], -2, [2, 4]) |
| _run_test([2, 2, 2], -3, [1, 8]) |
| |
| _run_test([2, 2, 2, 2], 3, [8, 2]) |
| _run_test([2, 2, 2, 2], 2, [4, 4]) |
| _run_test([2, 2, 2, 2], 1, [2, 8]) |
| _run_test([2, 2, 2, 2], 0, [1, 16]) |
| _run_test([2, 2, 2, 2], -1, [8, 2]) |
| _run_test([2, 2, 2, 2], -2, [4, 4]) |
| _run_test([2, 2, 2, 2], -3, [2, 8]) |
| _run_test([2, 2, 2, 2], -4, [1, 16]) |
| |
| def test_dnnl_pooling_max(self): |
| dev = cpu_dev |
| N = 1 |
| C = 3 |
| H = 2 |
| W = 2 |
| |
| data_shape = [N, C, H, W] |
| param_shape = [1, C, 1, 1] |
| data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] |
| |
| x0 = np.array(data, dtype=np.float32).reshape(data_shape) |
| x0_ct = tensor.Tensor(device=dev, data=x0).data |
| |
| dy0 = np.array([1, 2, 3], dtype=np.float32).reshape([1, 3, 1, 1]) |
| dy0_ct = tensor.Tensor(device=dev, data=dy0).data |
| |
| hndl = singa_api.PoolingHandle(x0_ct, [2, 2], [1, 1], [0, 0], True) |
| |
| y0_ct = singa_api.CpuPoolingForward(hndl, x0_ct) |
| y1 = np.array([[[[4.]], [[8.]], [[12.]]]]) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(y0_ct)), y1) |
| |
| dx0_ct = singa_api.CpuPoolingBackward(hndl, dy0_ct, x0_ct, y0_ct) |
| dx1 = np.array([[[[0., 0.], [0., 1.]], [[0., 0.], [0., 2.]], |
| [[0., 0.], [0., 3.]]]]) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(dx0_ct)), dx1) |
| |
| def test_dnnl_pooling_avg(self): |
| dev = cpu_dev |
| N = 1 |
| C = 3 |
| H = 2 |
| W = 2 |
| |
| data_shape = [N, C, H, W] |
| param_shape = [1, C, 1, 1] |
| data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] |
| |
| x0 = np.array(data, dtype=np.float32).reshape(data_shape) |
| x0_ct = tensor.Tensor(device=dev, data=x0).data |
| |
| dy0 = np.array([1, 2, 3], dtype=np.float32).reshape([1, 3, 1, 1]) |
| dy0_ct = tensor.Tensor(device=dev, data=dy0).data |
| |
| hndl = singa_api.PoolingHandle(x0_ct, [2, 2], [1, 1], [0, 0], False) |
| |
| y0_ct = singa_api.CpuPoolingForward(hndl, x0_ct) |
| |
| y1 = np.array([[[[2.5000]], [[6.5000]], [[10.5000]]]]) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(y0_ct)), y1) |
| dx0_ct = singa_api.CpuPoolingBackward(hndl, dy0_ct, x0_ct, y0_ct) |
| dx1 = np.array([[[[0.2500, 0.2500], [0.2500, 0.2500]], |
| [[0.5000, 0.5000], [0.5000, 0.5000]], |
| [[0.7500, 0.7500], [0.7500, 0.7500]]]]) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(dx0_ct)), dx1) |
| |
| def _concat_helper(self, dev): |
| np1 = np.random.random([5, 6, 7, 8]).astype(np.float32) |
| np2 = np.random.random([5, 6, 7, 1]).astype(np.float32) |
| np3 = np.concatenate((np1, np2), axis=3) |
| |
| t1 = tensor.Tensor(device=dev, data=np1) |
| t2 = tensor.Tensor(device=dev, data=np2) |
| |
| ctensors = singa_api.VecTensor() |
| ctensors.append(t1.data) |
| ctensors.append(t2.data) |
| |
| t3_ct = singa_api.ConcatOn(ctensors, 3) |
| |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(t3_ct)), np3) |
| |
| def test_concat_cpu(self): |
| self._concat_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_concat_gpu(self): |
| self._concat_helper(gpu_dev) |
| |
| def _ceil_helper(self, dev): |
| |
| np1 = np.random.random([5, 6, 7, 8]).astype(np.float32) |
| |
| np1 = np.random.random([5, 6, 7, 8]).astype(np.float32) |
| np1 = np1 * 10 |
| np2 = np.ceil(np1) |
| |
| t1 = tensor.Tensor(device=dev, data=np1) |
| |
| t2_ct = singa_api.Ceil(t1.data) |
| |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(t2_ct)), np2) |
| |
| def test_ceil_cpu(self): |
| self._ceil_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_ceil_gpu(self): |
| self._ceil_helper(gpu_dev) |
| |
| def _floor_helper(self, dev): |
| |
| np1 = np.random.random([5, 6, 7, 8]).astype(np.float32) |
| |
| np1 = np.random.random([5, 6, 7, 8]).astype(np.float32) |
| np1 = np1 * 10 |
| np2 = np.floor(np1) |
| |
| t1 = tensor.Tensor(device=dev, data=np1) |
| |
| t2_ct = singa_api.Floor(t1.data) |
| |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(t2_ct)), np2) |
| |
| def test_floor_cpu(self): |
| self._floor_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_floor_gpu(self): |
| self._floor_helper(gpu_dev) |
| |
| def _as_type_helper(self, dev): |
| |
| np1 = np.random.random([3]).astype(np.float32) |
| np1 = np1 * 10 - 5 |
| np2 = np1.astype(np.int32) |
| np3 = np2.astype(np.float32) |
| |
| t1 = tensor.Tensor(device=dev, data=np1) |
| |
| t1 = tensor.Tensor(device=dev, data=np1) |
| |
| t1_ct = t1.data |
| |
| self.assertEqual(t1_ct.data_type(), singa_api.kFloat32) |
| |
| t1_ct = t1_ct.AsType(singa_api.kInt) |
| |
| self.assertEqual(t1_ct.data_type(), singa_api.kInt) |
| |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(t1_ct)), np2) |
| |
| t1_ct = t1_ct.AsType(singa_api.kFloat32) |
| |
| self.assertEqual(t1_ct.data_type(), singa_api.kFloat32) |
| |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(_cTensor_to_pyTensor(t1_ct)), np3) |
| |
| def test_as_type_cpu(self): |
| self._as_type_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_as_type_gpu(self): |
| self._as_type_helper(gpu_dev) |
| |
| def _as_type2_helper(self, dev): |
| shape1 = [1, 2, 3, 4] |
| shape2 = [4, 3, 2, 1] |
| np_int = np.random.randint(0, 10, shape1).astype(np.int32) |
| np_flt = np_int.astype(np.float32) |
| |
| t1 = singa_api.Tensor(shape1, dev, singa_api.kInt) |
| t1.CopyIntDataFromHostPtr(np_int.flatten()) |
| _ctensor_eq_ndarray(t1, np_int) |
| |
| t1 = singa_api.Reshape(t1, shape2) |
| t2 = t1.AsType(singa_api.kFloat32) |
| _ctensor_eq_ndarray(t2, np_flt.reshape(shape2)) |
| |
| t3 = t2.AsType(singa_api.kInt) |
| _ctensor_eq_ndarray(t3, np_int.reshape(shape2)) |
| |
| t1 = singa_api.Reshape(t1, shape1) |
| t4 = t1.AsType(singa_api.kFloat32) |
| _ctensor_eq_ndarray(t4, np_flt.reshape(shape1)) |
| |
| def test_as_type2_cpu(self): |
| self._as_type2_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_as_type2_gpu(self): |
| self._as_type2_helper(gpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_rnn_relu(self): |
| self._rnn_helper(0) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_rnn_tanh(self): |
| self._rnn_helper(1) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_rnn_lstm(self): |
| self._rnn_helper(2) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_rnn_gru(self): |
| self._rnn_helper(3) |
| |
| def _rnn_helper(self, mode): |
| dev = gpu_dev |
| |
| hidden_size = 7 |
| seq_length = 5 |
| batch_size = 6 |
| feature_size = 3 |
| directions = 2 |
| num_layers = 2 |
| |
| x = tensor.Tensor(shape=(seq_length, batch_size, feature_size), |
| device=dev).gaussian(0, 1) |
| hx = tensor.Tensor(shape=(num_layers * directions, batch_size, |
| hidden_size), |
| device=dev).gaussian(0, 1) |
| cx = tensor.Tensor(shape=(num_layers * directions, batch_size, |
| hidden_size), |
| device=dev).gaussian(0, 1) |
| |
| rnn_handle = singa_api.CudnnRNNHandle(x.data, |
| hidden_size, |
| mode, |
| num_layers=num_layers, |
| dropout=0.1, |
| bidirectional=1) |
| |
| w = tensor.Tensor(shape=(rnn_handle.weights_size,), |
| device=dev).gaussian(0, 1) |
| # print("weights size is ", rnn_handle.weights_size) |
| |
| (y, hy, cy) = singa_api.GpuRNNForwardTraining(x.data, hx.data, cx.data, |
| w.data, rnn_handle) |
| self.assertEqual(y.shape(), |
| (seq_length, batch_size, directions * hidden_size)) |
| self.assertEqual(hy.shape(), hx.shape) |
| self.assertEqual(cy.shape(), cx.shape) |
| |
| (y2, hy2, |
| cy2) = singa_api.GpuRNNForwardInference(x.data, hx.data, cx.data, |
| w.data, rnn_handle) |
| self.assertEqual(y2.shape(), |
| (seq_length, batch_size, directions * hidden_size)) |
| self.assertEqual(hy2.shape(), hx.shape) |
| self.assertEqual(cy2.shape(), cx.shape) |
| |
| dy = tensor.Tensor(shape=(seq_length, batch_size, |
| directions * hidden_size), |
| device=dev).gaussian(0, 1) |
| dhy = tensor.Tensor(shape=(num_layers * directions, batch_size, |
| hidden_size), |
| device=dev).gaussian(0, 1) |
| dcy = tensor.Tensor(shape=(num_layers * directions, batch_size, |
| hidden_size), |
| device=dev).gaussian(0, 1) |
| |
| (dx, dhx, dcx) = singa_api.GpuRNNBackwardx(y, dy.data, dhy.data, |
| dcy.data, w.data, hx.data, |
| cx.data, rnn_handle) |
| self.assertEqual(dx.shape(), (seq_length, batch_size, feature_size)) |
| self.assertEqual(dhx.shape(), hx.shape) |
| self.assertEqual(dcx.shape(), cx.shape) |
| |
| dW = singa_api.GpuRNNBackwardW(x.data, hx.data, y, rnn_handle) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_rnn_with_seq_lengths(self): |
| dev = gpu_dev |
| |
| # params |
| hidden_size = 7 |
| seq_length = 5 |
| batch_size = 6 |
| feature_size = 3 |
| directions = 2 |
| num_layers = 2 |
| |
| # shapes |
| x_s = (seq_length, batch_size, feature_size) |
| y_s = (seq_length, batch_size, hidden_size) |
| states_s = (num_layers * directions, batch_size, hidden_size) |
| |
| # tensors |
| x = tensor.Tensor(x_s, dev).gaussian(0, 1) |
| y = tensor.Tensor(y_s, dev).gaussian(0, 1) |
| dy = tensor.Tensor(y_s, dev).gaussian(0, 1) |
| dhy = tensor.Tensor(states_s, dev).gaussian(0, 1) |
| dcy = tensor.Tensor(states_s, dev).gaussian(0, 1) |
| hx = tensor.Tensor(states_s, dev).gaussian(0, 1) |
| cx = tensor.Tensor(states_s, dev).gaussian(0, 1) |
| |
| # handle |
| rnn_handle = singa_api.CudnnRNNHandle(x.data, hidden_size, 2) |
| w = tensor.Tensor((rnn_handle.weights_size,), dev).gaussian(0, 1) |
| |
| # seq lengths |
| seq_lengths = tensor.from_numpy(np.array([seq_length] * batch_size)) |
| |
| # operations |
| (dx, dhx, dcx) = singa_api.GpuRNNBackwardxEx(y.data, dy.data, dhy.data, |
| dcy.data, w.data, hx.data, |
| cx.data, seq_lengths.data, |
| rnn_handle) |
| |
| |
| def test_round_cpu(self): |
| self._round(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_round_gpu(self): |
| self._round(gpu_dev) |
| |
| def _round(self, dev=gpu_dev): |
| x = tensor.Tensor(shape=(3,4,5), device=dev).gaussian(0, 1) |
| y = tensor._call_singa_func(singa_api.Round, x.data) |
| np.testing.assert_array_almost_equal(np.round(tensor.to_numpy(x)), |
| tensor.to_numpy(y)) |
| |
| def test_round_even_cpu(self): |
| self._round_even(cpu_dev) |
| |
| @unittest.skipIf(not singa_api.USE_CUDA, 'CUDA is not enabled') |
| def test_round_even_gpu(self): |
| self._round_even(gpu_dev) |
| |
| def _round_even(self, dev=gpu_dev): |
| q=np.array([0.1, 0.5, 0.9, 1.2, 1.5, |
| 1.8, 2.3, 2.5, 2.7, -1.1, |
| -1.5, -1.9, -2.2, -2.5, -2.8]).astype(np.float32) |
| ans = np.array([0., 0., 1., 1., 2., |
| 2., 2., 2., 3., -1., |
| -2., -2., -2., -2., -3.]).astype(np.float32) |
| |
| x = tensor.Tensor(shape=q.shape, device=dev) |
| x.copy_from_numpy(q) |
| y = tensor._call_singa_func(singa_api.RoundE, x.data) |
| np.testing.assert_array_almost_equal(ans, tensor.to_numpy(y)) |
| |
| |
| if __name__ == '__main__': |
| unittest.main() |