| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================= |
| |
| import unittest |
| from builtins import str |
| |
| from singa import tensor |
| from singa import singa_wrap as singa |
| from singa import autograd |
| from singa import singa_wrap |
| from cuda_helper import gpu_dev, cpu_dev |
| |
| import numpy as np |
| |
| autograd.training = True |
| |
| CTensor = singa.Tensor |
| |
| dy = CTensor([2, 1, 2, 2]) |
| singa.Gaussian(0.0, 1.0, dy) |
| |
| |
| def _tuple_to_string(t): |
| lt = [str(x) for x in t] |
| return '(' + ', '.join(lt) + ')' |
| |
| |
| def axis_helper(y_shape, x_shape): |
| """ |
| check which axes the x has been broadcasted |
| Args: |
| y_shape: the shape of result |
| x_shape: the shape of x |
| Return: |
| a tuple refering the axes |
| """ |
| res = [] |
| j = len(x_shape) - 1 |
| for i in range(len(y_shape) - 1, -1, -1): |
| if j < 0 or x_shape[j] != y_shape[i]: |
| res.append(i) |
| j -= 1 |
| return tuple(res[::-1]) |
| |
| |
| def prepare_inputs_targets_for_rnn_test(dev): |
| x_0 = np.random.random((2, 3)).astype(np.float32) |
| x_1 = np.random.random((2, 3)).astype(np.float32) |
| x_2 = np.random.random((2, 3)).astype(np.float32) |
| |
| h_0 = np.zeros((2, 2)).astype(np.float32) |
| |
| t_0 = np.random.random((2, 2)).astype(np.float32) |
| t_1 = np.random.random((2, 2)).astype(np.float32) |
| t_2 = np.random.random((2, 2)).astype(np.float32) |
| |
| x0 = tensor.Tensor(device=dev, data=x_0) |
| x1 = tensor.Tensor(device=dev, data=x_1) |
| x2 = tensor.Tensor(device=dev, data=x_2) |
| |
| h0 = tensor.Tensor(device=dev, data=h_0) |
| |
| t0 = tensor.Tensor(device=dev, data=t_0) |
| t1 = tensor.Tensor(device=dev, data=t_1) |
| t2 = tensor.Tensor(device=dev, data=t_2) |
| |
| inputs = [x0, x1, x2] |
| targets = [t0, t1, t2] |
| return inputs, targets, h0 |
| |
| |
| class TestPythonOperation(unittest.TestCase): |
| |
| def check_shape(self, actual, expect): |
| self.assertEqual( |
| actual, expect, 'shape mismatch, actual shape is %s' |
| ' exepcted is %s' % |
| (_tuple_to_string(actual), _tuple_to_string(expect))) |
| |
| def _greater_helper(self, dev): |
| x0 = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| y = np.greater(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.greater(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_Greater_cpu(self): |
| self._greater_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Greater_gpu(self): |
| self._greater_helper(gpu_dev) |
| |
| def _conv2d_helper(self, dev): |
| # (in_channels, out_channels, kernel_size) |
| conv_0 = autograd.Conv2d(3, 1, 2) |
| conv_without_bias_0 = autograd.Conv2d(3, 1, 2, bias=False) |
| |
| cpu_input_tensor = tensor.Tensor(shape=(2, 3, 3, 3), device=dev) |
| cpu_input_tensor.gaussian(0.0, 1.0) |
| |
| dy = tensor.Tensor(shape=(2, 1, 2, 2), device=dev) |
| dy.gaussian(0.0, 1.0) |
| |
| y = conv_0(cpu_input_tensor) # PyTensor |
| dx, dW, db = y.creator.backward(dy.data) # CTensor |
| |
| self.check_shape(y.shape, (2, 1, 2, 2)) |
| self.check_shape(dx.shape(), (2, 3, 3, 3)) |
| self.check_shape(dW.shape(), (1, 3, 2, 2)) |
| self.check_shape(db.shape(), (1,)) |
| |
| # forward without bias |
| y_without_bias = conv_without_bias_0(cpu_input_tensor) |
| self.check_shape(y_without_bias.shape, (2, 1, 2, 2)) |
| |
| def test_conv2d_cpu(self): |
| self._conv2d_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_conv2d_gpu(self): |
| self._conv2d_helper(gpu_dev) |
| |
| def _conv_same_pad(self, dev, pad_mode, is_2d): |
| if is_2d: |
| x_h, w_h, k_h, p_h = 32, 4, 4, 1 |
| if pad_mode == "SAME_LOWER": |
| o_p = (0, 1, 0, 1) |
| else: |
| o_p = (1, 0, 1, 0) |
| else: |
| x_h, w_h, k_h, p_h = 1, 1, 1, 0 |
| if pad_mode == "SAME_LOWER": |
| o_p = (0, 0, 0, 1) |
| else: |
| o_p = (0, 0, 1, 0) |
| x = tensor.Tensor(shape=(3, 3, x_h, 32), device=dev) |
| x.gaussian(0.0, 1.0) |
| |
| w = tensor.Tensor(shape=(3, 3, w_h, 4), device=dev) |
| w.gaussian(0.0, 1.0) |
| |
| # with the same padding, the padding should be 3 |
| # for SAME_UPPER, is (1, 1) + (0, 1) |
| # for SAME_LOWER, is (1, 1) + (1, 0) |
| |
| x_shape = x.shape |
| kernel = (k_h, 4) |
| padding = (p_h, 1) |
| stride = (1, 1) |
| group = 1 |
| bias = False |
| in_channels = x_shape[1] |
| w_shape = w.shape |
| out_channels = w_shape[0] |
| assert w_shape[1] == in_channels // group |
| |
| if dev == cpu_dev: |
| handle = singa.ConvHandle(x.data, kernel, stride, padding, |
| in_channels, out_channels, bias, group) |
| else: |
| handle = singa.CudnnConvHandle(x.data, kernel, stride, padding, |
| in_channels, out_channels, bias, |
| group) |
| y = autograd._Conv2d(handle, o_p)(x, w)[0] |
| |
| dy = np.ones((3, 3, x_h, 32), dtype=np.float32) |
| dy = tensor.from_numpy(dy) |
| dy.to_device(dev) |
| |
| dx, dW = y.creator.backward(dy.data) |
| self.check_shape(y.shape, (3, 3, x_h, 32)) |
| self.check_shape(dx.shape(), (3, 3, x_h, 32)) |
| self.check_shape(dW.shape(), (3, 3, w_h, 4)) |
| |
| def test_conv2d_same_pad_cpu(self): |
| self._conv_same_pad(cpu_dev, "SAME_LOWER", True) |
| self._conv_same_pad(cpu_dev, "SAME_UPPER", True) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_conv2d_same_pad_gpu(self): |
| self._conv_same_pad(gpu_dev, "SAME_LOWER", True) |
| self._conv_same_pad(gpu_dev, "SAME_UPPER", True) |
| |
| def test_conv1d_same_pad_cpu(self): |
| self._conv_same_pad(cpu_dev, "SAME_LOWER", False) |
| self._conv_same_pad(cpu_dev, "SAME_UPPER", False) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_conv1d_same_pad_gpu(self): |
| self._conv_same_pad(gpu_dev, "SAME_LOWER", False) |
| self._conv_same_pad(gpu_dev, "SAME_UPPER", False) |
| |
| def _pooling_same_pad(self, dev, pad_mode, is_2d): |
| if is_2d: |
| x_h, k_h, p_h = 32, 4, 1 |
| if pad_mode == "SAME_LOWER": |
| o_p = (0, 1, 0, 1) |
| else: |
| o_p = (1, 0, 1, 0) |
| else: |
| x_h, k_h, p_h = 1, 1, 0 |
| if pad_mode == "SAME_LOWER": |
| o_p = (0, 0, 0, 1) |
| else: |
| o_p = (0, 0, 1, 0) |
| x = tensor.Tensor(shape=(3, 3, x_h, 32), device=dev) |
| x.gaussian(0.0, 1.0) |
| |
| # with the same padding, the padding should be 3 |
| # for SAME_UPPER, is (1, 1) + (0, 1) |
| # for SAME_LOWER, is (1, 1) + (1, 0) |
| |
| x_shape = x.shape |
| kernel = (k_h, 4) |
| # we add 4 padding here and hope the conv and trim one padding then |
| padding = (p_h, 1) |
| stride = (1, 1) |
| |
| if dev == cpu_dev: |
| handle = singa.PoolingHandle(x.data, kernel, stride, padding, True) |
| else: |
| handle = singa.CudnnPoolingHandle(x.data, kernel, stride, padding, |
| True) |
| |
| y = autograd._Pooling2d(handle, o_p)(x)[0] |
| |
| dy = np.ones((3, 3, x_h, 32), dtype=np.float32) |
| dy = tensor.from_numpy(dy) |
| dy.to_device(dev) |
| |
| dx = y.creator.backward(dy.data) |
| self.check_shape(y.shape, (3, 3, x_h, 32)) |
| self.check_shape(dx.shape(), (3, 3, x_h, 32)) |
| |
| def test_pooling2d_same_pad_cpu(self): |
| self._pooling_same_pad(cpu_dev, "SAME_LOWER", True) |
| self._pooling_same_pad(cpu_dev, "SAME_UPPER", True) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_pooling2d_same_pad_gpu(self): |
| self._pooling_same_pad(gpu_dev, "SAME_LOWER", True) |
| self._pooling_same_pad(gpu_dev, "SAME_UPPER", True) |
| |
| def test_pooling1d_same_pad_cpu(self): |
| self._pooling_same_pad(cpu_dev, "SAME_LOWER", False) |
| self._pooling_same_pad(cpu_dev, "SAME_UPPER", False) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_pooling1d_same_pad_gpu(self): |
| self._pooling_same_pad(gpu_dev, "SAME_LOWER", False) |
| self._pooling_same_pad(gpu_dev, "SAME_UPPER", False) |
| |
| def _sum_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = x + x1 |
| dy = np.ones((3, 2), dtype=np.float32) |
| grad0 = dy |
| grad1 = dy |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sum(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_sum_cpu(self): |
| self._sum_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_sum_gpu(self): |
| self._sum_helper(gpu_dev) |
| |
| def _SeparableConv2d_helper(self, dev): |
| # SeparableConv2d(in_channels, out_channels, kernel_size) |
| if dev == cpu_dev: |
| in_channels = 1 |
| else: |
| in_channels = 8 |
| separ_conv = autograd.SeparableConv2d(in_channels, 16, 3, padding=1) |
| |
| x = np.random.random((10, in_channels, 28, 28)).astype(np.float32) |
| x = tensor.Tensor(device=dev, data=x) |
| |
| y1 = separ_conv.depthwise_conv(x) |
| y2 = separ_conv.point_conv(y1) |
| |
| dy1, dW_depth = y2.creator.backward(y2.data) |
| dx, dW_spacial = y1.creator.backward(dy1) |
| |
| self.check_shape(y2.shape, (10, 16, 28, 28)) |
| |
| self.check_shape(dy1.shape(), (10, in_channels, 28, 28)) |
| self.check_shape(dW_depth.shape(), (16, in_channels, 1, 1)) |
| |
| self.check_shape(dx.shape(), (10, in_channels, 28, 28)) |
| self.check_shape(dW_spacial.shape(), (in_channels, 1, 3, 3)) |
| |
| y = separ_conv(x) |
| self.check_shape(y.shape, (10, 16, 28, 28)) |
| |
| def test_SeparableConv2d_cpu(self): |
| self._SeparableConv2d_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SeparableConv2d_gpu(self): |
| self._SeparableConv2d_helper(gpu_dev) |
| |
| def _batchnorm2d_helper(self, dev): |
| batchnorm_0 = autograd.BatchNorm2d(3) |
| |
| cpu_input_tensor = tensor.Tensor(shape=(2, 3, 3, 3), device=dev) |
| cpu_input_tensor.gaussian(0.0, 1.0) |
| |
| dy = cpu_input_tensor.clone().data |
| |
| y = batchnorm_0(cpu_input_tensor) |
| dx, ds, db = y.creator.backward(dy) |
| |
| self.check_shape(y.shape, (2, 3, 3, 3)) |
| self.check_shape(dx.shape(), (2, 3, 3, 3)) |
| self.check_shape(ds.shape(), (3,)) |
| self.check_shape(db.shape(), (3,)) |
| |
| def test_batchnorm2d_cpu(self): |
| self._batchnorm2d_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_batchnorm2d_gpu(self): |
| self._batchnorm2d_helper(gpu_dev) |
| |
| def gradients_check(self, |
| func, |
| param, |
| autograds, |
| h=0.0005, |
| df=1, |
| dev=cpu_dev): |
| # param: PyTensor |
| # autograds: numpy_tensor |
| p = tensor.to_numpy(param) |
| it = np.nditer(p, flags=['multi_index'], op_flags=['readwrite']) |
| while not it.finished: |
| idx = it.multi_index |
| diff = np.zeros_like(p) |
| diff[idx] += h |
| diff = tensor.from_numpy(diff) |
| diff.to_device(dev) |
| |
| param += diff |
| pos = func() |
| pos = tensor.to_numpy(pos) |
| |
| param -= diff |
| param -= diff |
| neg = func() |
| neg = tensor.to_numpy(neg) |
| |
| numerical_grad = np.sum((pos - neg) * df) / (2 * h) |
| #print((autograds[idx] - numerical_grad)/numerical_grad) |
| # threshold set as -5% to +5% |
| #self.assertAlmostEqual((autograds[idx] - numerical_grad)/(numerical_grad+0.0000001), 0., places=1) |
| self.assertAlmostEqual(autograds[idx] - numerical_grad, |
| 0., |
| places=2) |
| |
| it.iternext() |
| |
| def _vanillaRNN_gpu_tiny_ops_shape_check_helper(self, dev): |
| # gradients shape check. |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| rnn = autograd.RNN(3, 2) |
| |
| hs, _ = rnn(inputs, h0) |
| |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| # d=autograd.infer_dependency(loss.creator) |
| # print(d) |
| for t, dt in autograd.backward(loss): |
| self.check_shape(t.shape, dt.shape) |
| |
| def test_vanillaRNN_gpu_tiny_ops_shape_check_cpu(self): |
| self._vanillaRNN_gpu_tiny_ops_shape_check_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_vanillaRNN_gpu_tiny_ops_shape_check_gpu(self): |
| self._vanillaRNN_gpu_tiny_ops_shape_check_helper(gpu_dev) |
| |
| def _LSTM_gpu_tiny_ops_shape_check_helper(self, dev): |
| # gradients shape check. |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| c_0 = np.random.random((2, 1)).astype(np.float32) |
| c0 = tensor.Tensor(device=dev, data=c_0) |
| |
| rnn = autograd.LSTM(3, 2) |
| |
| hs, _, _ = rnn(inputs, (h0, c0)) |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| # d=autograd.infer_dependency(loss.creator) |
| # print(d) |
| for t, dt in autograd.backward(loss): |
| self.check_shape(t.shape, dt.shape) |
| |
| def test_LSTM_gpu_tiny_ops_shape_check_cpu(self): |
| self._LSTM_gpu_tiny_ops_shape_check_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_LSTM_gpu_tiny_ops_shape_check_gpu(self): |
| self._LSTM_gpu_tiny_ops_shape_check_helper(gpu_dev) |
| |
| def _numerical_gradients_check_for_vallina_rnn_helper(self, dev): |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| |
| rnn = autograd.RNN(3, 2) |
| |
| def valinna_rnn_forward(): |
| hs, _ = rnn(inputs, h0) |
| |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| #grads = autograd.gradients(loss) |
| return loss |
| |
| loss1 = valinna_rnn_forward() |
| auto_grads = autograd.gradients(loss1) |
| |
| for param in rnn.params: |
| auto_grad = tensor.to_numpy(auto_grads[param]) |
| |
| self.gradients_check(valinna_rnn_forward, param, auto_grad, dev=dev) |
| |
| def test_numerical_gradients_check_for_vallina_rnn_cpu(self): |
| self._numerical_gradients_check_for_vallina_rnn_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_numerical_gradients_check_for_vallina_rnn_gpu(self): |
| self._numerical_gradients_check_for_vallina_rnn_helper(gpu_dev) |
| |
| def _numerical_gradients_check_for_lstm_helper(self, dev): |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| c_0 = np.zeros((2, 2)).astype(np.float32) |
| c0 = tensor.Tensor(device=dev, data=c_0) |
| |
| rnn = autograd.LSTM(3, 2) |
| |
| def lstm_forward(): |
| hs, _, _ = rnn(inputs, (h0, c0)) |
| |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| return loss |
| |
| loss1 = lstm_forward() |
| auto_grads = autograd.gradients(loss1) |
| |
| for param in rnn.params: |
| auto_grad = tensor.to_numpy(auto_grads[param]) |
| |
| self.gradients_check(lstm_forward, param, auto_grad, dev=dev) |
| |
| def test_numerical_gradients_check_for_lstm_cpu(self): |
| self._numerical_gradients_check_for_lstm_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_numerical_gradients_check_for_lstm_gpu(self): |
| self._numerical_gradients_check_for_lstm_helper(gpu_dev) |
| |
| def _MeanSquareError_helper(self, dev): |
| X = np.array([4.3, 5.4, 3.3, 3.6, 5.7, |
| 6.0]).reshape(3, 2).astype(np.float32) |
| T = np.array([4.4, 5.3, 3.2, 3.7, 5.4, |
| 6.3]).reshape(3, 2).astype(np.float32) |
| x = tensor.from_numpy(X) |
| t = tensor.from_numpy(T) |
| x.to_device(dev) |
| t.to_device(dev) |
| |
| loss = autograd.mse_loss(x, t) |
| dx = loss.creator.backward()[0] |
| |
| loss_np = tensor.to_numpy(loss)[0] |
| self.assertAlmostEqual(loss_np, 0.0366666, places=4) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_MeanSquareError_cpu(self): |
| self._MeanSquareError_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_MeanSquareError_gpu(self): |
| self._MeanSquareError_helper(gpu_dev) |
| |
| def _Abs_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.array([0.8, 1.2, 3.3, 3.6, 0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.abs(x) |
| dx = result.creator.backward(x.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), XT) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_Abs_cpu(self): |
| self._Abs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Abs_gpu(self): |
| self._Abs_helper(gpu_dev) |
| |
| def _Mean_helper(self, dev): |
| x0 = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| y = (x0 + x1) / 2 |
| grad = np.ones(x0.shape) / 2 |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.mean(x0, x1) |
| dy = tensor.from_numpy(np.ones((3, 2)).astype(np.float32)) |
| dy.to_device(dev) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad, |
| decimal=5) |
| |
| def test_Mean_cpu(self): |
| self._Mean_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Mean_gpu(self): |
| self._Mean_helper(gpu_dev) |
| |
| def _Exp_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.exp(X) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.exp(x) |
| dx = result.creator.backward(x.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_Exp_cpu(self): |
| self._Exp_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Exp_gpu(self): |
| self._Exp_helper(gpu_dev) |
| |
| def _Identity_helper(self, dev): |
| x = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| y = x.copy() |
| grad = np.ones(x.shape) |
| x = tensor.from_numpy(x) |
| x.to_device(dev) |
| |
| result = autograd.identity(x) |
| dy = tensor.from_numpy(np.ones((3, 2)).astype(np.float32)) |
| dy.to_device(dev) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_Identity_cpu(self): |
| self._Identity_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Identity_gpu(self): |
| self._Identity_helper(gpu_dev) |
| |
| def _LeakyRelu_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.array([0.8, -0.012, 3.3, -0.036, -0.005, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.leakyrelu(x) |
| |
| dx = result.creator.backward(x.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), XT) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_LeakyRelu_cpu(self): |
| self._LeakyRelu_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_LeakyRelu_gpu(self): |
| self._LeakyRelu_helper(gpu_dev) |
| |
| def _Relu_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.maximum(X, 0) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.relu(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = (X > 0).astype(np.float32) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Relu_cpu(self): |
| self._Relu_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Relu_gpu(self): |
| self._Relu_helper(gpu_dev) |
| |
| def _Cos_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.cos(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.cos(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = -np.sin(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Cos_cpu(self): |
| self._Cos_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Cos_gpu(self): |
| self._Cos_helper(gpu_dev) |
| |
| def _Cosh_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.cosh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.cosh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = np.sinh(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Cosh_cpu(self): |
| self._Cosh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Cosh_gpu(self): |
| self._Cosh_helper(gpu_dev) |
| |
| def _Acos_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arccos(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.acos(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = -1.0 / np.sqrt(1.0 - np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Acos_cpu(self): |
| self._Acos_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Acos_gpu(self): |
| self._Acos_helper(gpu_dev) |
| |
| def _Acosh_helper(self, dev): |
| X = np.array([1.1, 1.5, 1.9, 2.2, 2.5, |
| 2.8]).reshape(3, 2).astype(np.float32) |
| XT = np.arccosh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.acosh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.multiply(np.sqrt(X - 1.0), np.sqrt(X + 1.0)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Acosh_cpu(self): |
| self._Acosh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Acosh_gpu(self): |
| self._Acosh_helper(gpu_dev) |
| |
| def _Sin_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.sin(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sin(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = np.cos(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sin_cpu(self): |
| self._Sin_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sin_gpu(self): |
| self._Sin_helper(gpu_dev) |
| |
| def _Sinh_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.sinh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sinh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = np.cosh(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sinh_cpu(self): |
| self._Sinh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sinh_gpu(self): |
| self._Sinh_helper(gpu_dev) |
| |
| def _Asin_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arcsin(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.asin(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.sqrt(1.0 - np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Asin_cpu(self): |
| self._Asin_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Asin_gpu(self): |
| self._Asin_helper(gpu_dev) |
| |
| def _Asinh_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arcsinh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.asinh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.sqrt(np.square(X) + 1.0) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Asinh_cpu(self): |
| self._Asinh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Asinh_gpu(self): |
| self._Asinh_helper(gpu_dev) |
| |
| def _Tan_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.tan(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.tan(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.square(np.cos(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Tan_cpu(self): |
| self._Tan_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Tan_gpu(self): |
| self._Tan_helper(gpu_dev) |
| |
| def _Tanh_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.tanh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.tanh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.square(np.cosh(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Tanh_cpu(self): |
| self._Tanh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Tanh_gpu(self): |
| self._Tanh_helper(gpu_dev) |
| |
| def _Atan_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arctan(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.atan(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / (1.0 + np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Atan_cpu(self): |
| self._Atan_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Atan_gpu(self): |
| self._Atan_helper(gpu_dev) |
| |
| def _Atanh_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arctanh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.atanh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / (1.0 - np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Atanh_cpu(self): |
| self._Atanh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Atanh_gpu(self): |
| self._Atanh_helper(gpu_dev) |
| |
| def _Less_helper(self, dev): |
| x0 = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| y = np.less(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.less(x0, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_Less_cpu(self): |
| self._Less_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Less_gpu(self): |
| self._Less_helper(gpu_dev) |
| |
| def _Sub_helper(self, dev): |
| X0 = np.array([7, -5, 0.2, -0.1, 0.3, 4]).reshape(3, |
| 2).astype(np.float32) |
| X1 = np.array([0.6, -1.3, 0.1, -0.1, 0.4, |
| 0.3]).reshape(3, 2).astype(np.float32) |
| XT = np.subtract(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sub(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| DX0 = np.multiply(DY, 1.0) |
| DX1 = np.multiply(DY, -1.0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_Sub_cpu(self): |
| self._Sub_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sub_gpu(self): |
| self._Sub_helper(gpu_dev) |
| |
| def _Pow_helper(self, dev): |
| X0 = np.array([7, 5, 0.2, 0.1, 0.3, 4]).reshape(3, 2).astype(np.float32) |
| X1 = np.array([-1.0, 2.0, -1.0, -2.1, 1.0, |
| -2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.power(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.pow(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G0 = np.multiply(X1, np.power(X0, (X1 - 1.0))) |
| DX0 = np.multiply(G0, DY) |
| G1 = np.multiply(np.power(X0, X1), np.log(X0)) |
| DX1 = np.multiply(G1, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=4) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=4) |
| |
| def test_Pow_cpu(self): |
| self._Pow_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Pow_gpu(self): |
| self._Pow_helper(gpu_dev) |
| |
| def _SoftSign_helper(self, dev): |
| # y = x / (1 + np.abs(x)) |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = X / (1 + np.absolute(X)) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.softsign(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.square(np.absolute(X) + 1.0) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_SoftSign_cpu(self): |
| self._SoftSign_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SoftSign_gpu(self): |
| self._SoftSign_helper(gpu_dev) |
| |
| def _SoftPlus_helper(self, dev): |
| #y = np.log(np.exp(x) + 1) |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.log(np.exp(X) + 1) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.softplus(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / (1.0 + np.exp(-X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_SoftPlus_cpu(self): |
| self._SoftPlus_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SoftPlus_gpu(self): |
| self._SoftPlus_helper(gpu_dev) |
| |
| def _unsqueeze_helper(self, dev): |
| data = [0.1, -1.0, 0.4, 4.0, -0.9, 9.0] |
| |
| x = np.array(data).reshape(1, 2, 3).astype(np.float32) |
| y = x.reshape(1, 1, 2, 3, 1) |
| dy = np.ones((1, 1, 2, 3, 1), dtype=np.float32) |
| grad = dy.reshape(1, 2, 3) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.unsqueeze(x, [0, 4]) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_unsqueeze_cpu(self): |
| self._unsqueeze_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_unsqueeze_gpu(self): |
| self._unsqueeze_helper(gpu_dev) |
| |
| def _Sqrt_helper(self, dev): |
| X = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| XT = np.sqrt(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sqrt(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 0.5 * np.power(X, -0.5) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sqrt_cpu(self): |
| self._Sqrt_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sqrt_gpu(self): |
| self._Sqrt_helper(gpu_dev) |
| |
| def _transpose_helper(self, dev): |
| x = np.random.randn(3, 2, 1) |
| y = x.transpose(1, 2, 0) |
| dy = np.random.randn(*(y.shape)) |
| grad = dy.transpose((2, 0, 1)) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.transpose(x, (1, 2, 0)) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_transpose_cpu(self): |
| self._transpose_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_transpose_gpu(self): |
| self._transpose_helper(gpu_dev) |
| |
| def _Sign_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.sign(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.sign(x) |
| dx = result.creator.backward(dy.data) |
| DX = np.multiply(DY, 0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sign_cpu(self): |
| self._Sign_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sign_gpu(self): |
| self._Sign_helper(gpu_dev) |
| |
| def _Log_helper(self, dev): |
| X = np.array([0.1, 1.0, 0.4, 1.4, 0.9, |
| 2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.log(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.log(x) |
| dx = result.creator.backward(dy.data) |
| #dx = 1/x |
| G = 1.0 / X |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Log_cpu(self): |
| self._Log_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Log_gpu(self): |
| self._Log_helper(gpu_dev) |
| |
| def _mul_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = x * x1 |
| dy = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| grad0 = x1 * dy |
| grad1 = x * dy |
| |
| x = tensor.from_numpy(x) |
| slope = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| slope.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.mul(x, slope) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_mul_cpu(self): |
| self._mul_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_mul_gpu(self): |
| self._mul_helper(gpu_dev) |
| |
| def _reshape_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = x.reshape(2, 3) |
| dy = np.array([1, 2, 3, 4, 5, 6]).reshape(2, 3).astype(np.float32) |
| grad = dy.reshape(3, 2) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reshape(x, (2, 3)) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_reshape_cpu(self): |
| self._reshape_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reshape_gpu(self): |
| self._reshape_helper(gpu_dev) |
| |
| def _max_helper(self, dev): |
| X0 = np.array([0.1, 0.2, 2.0, 0.0, 0.1, |
| 0.2]).reshape(3, 2).astype(np.float32) |
| X1 = np.array([1.0, 2.0, 1.0, 2.1, 0.0, |
| 2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.maximum(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.max(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G = np.subtract(X0, X1) |
| DX0 = np.where(G > 0, 1, G * 0) |
| DX1 = np.where(G < 0, 1, G * 0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_max_cpu(self): |
| self._max_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_max_gpu(self): |
| self._max_helper(gpu_dev) |
| |
| def _max_3inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| data_1 = np.array([1, 4, 4]).astype(np.float32) |
| data_2 = np.array([2, 5, 3]).astype(np.float32) |
| XT = np.array([3, 5, 4]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| x1 = tensor.from_numpy(data_1) |
| x2 = tensor.from_numpy(data_2) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| x2.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.max(x0, x1, x2) |
| dx0, dx1, dx2 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([1, 0, 0]).astype(np.float32) |
| DX1 = np.array([0, 0, 1]).astype(np.float32) |
| DX2 = np.array([0, 1, 0]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx2)), |
| DX2, |
| decimal=5) |
| |
| def test_max_3inputs_cpu(self): |
| self._max_3inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_max_3inputs_gpu(self): |
| self._max_3inputs_helper(gpu_dev) |
| |
| def _max_1inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| XT = np.array([3, 2, 1]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.max(x0) |
| dx0 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([1, 1, 1]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| |
| def test_max_1inputs_cpu(self): |
| self._max_1inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_max_1inputs_gpu(self): |
| self._max_1inputs_helper(gpu_dev) |
| |
| def _Div_helper(self, dev): |
| X0 = np.array([7, -5, 0.2, -0.1, 0.3, 4]).reshape(3, |
| 2).astype(np.float32) |
| X1 = np.array([0.6, -1.3, 0.1, -0.1, 0.4, |
| 0.3]).reshape(3, 2).astype(np.float32) |
| XT = np.divide(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.div(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G0 = 1.0 / X1 |
| DX0 = np.multiply(G0, DY) |
| G1 = np.divide(-X0, np.square(X1)) |
| DX1 = np.multiply(G1, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_Div_cpu(self): |
| self._Div_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Div_gpu(self): |
| self._Div_helper(gpu_dev) |
| |
| def _squeeze_helper(self, dev): |
| x = np.random.randn(3, 1, 2, 1, 1) |
| y = x.reshape(3, 2) |
| dy = np.random.randn(3, 2) |
| grad = dy.reshape(3, 1, 2, 1, 1) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.squeeze(x, [1, 3, 4]) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_squeeze_cpu(self): |
| self._squeeze_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_squeeze_gpu(self): |
| self._squeeze_helper(gpu_dev) |
| |
| def _shape_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = list(x.shape) |
| dy = np.ones((3, 2), dtype=np.float32) |
| grad = list(dy.shape) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.shape(x) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(dx, grad, decimal=5) |
| |
| def test_shape_cpu(self): |
| self._shape_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_shape_gpu(self): |
| self._shape_helper(gpu_dev) |
| |
| def _min_helper(self, dev): |
| X0 = np.array([0.1, 0.2, 2.0, 0.0, 0.1, |
| 0.2]).reshape(3, 2).astype(np.float32) |
| X1 = np.array([1.0, 2.0, 1.0, 2.1, 0.0, |
| 2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.minimum(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.min(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G = np.subtract(X0, X1) |
| DX0 = np.where(G < 0, 1, G * 0) |
| DX1 = np.where(G > 0, 1, G * 0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_min_cpu(self): |
| self._min_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_min_gpu(self): |
| self._min_helper(gpu_dev) |
| |
| def _min_3inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| data_1 = np.array([1, 4, 4]).astype(np.float32) |
| data_2 = np.array([2, 5, 0]).astype(np.float32) |
| XT = np.array([1, 2, 0]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| x1 = tensor.from_numpy(data_1) |
| x2 = tensor.from_numpy(data_2) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| x2.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.min(x0, x1, x2) |
| dx0, dx1, dx2 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([0, 1, 0]).astype(np.float32) |
| DX1 = np.array([1, 0, 0]).astype(np.float32) |
| DX2 = np.array([0, 0, 1]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx2)), |
| DX2, |
| decimal=5) |
| |
| def test_min_3inputs_cpu(self): |
| self._min_3inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_min_3inputs_gpu(self): |
| self._min_3inputs_helper(gpu_dev) |
| |
| def _min_1inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| XT = np.array([3, 2, 1]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.min(x0) |
| dx0 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([1, 1, 1]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| |
| def test_min_1inputs_cpu(self): |
| self._min_1inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_min_1inputs_gpu(self): |
| self._min_1inputs_helper(gpu_dev) |
| |
| def _HardSigmoid_helper(self, dev): |
| x = np.random.randn(3, 2) |
| #y = max(0, min(1, alpha * x + gamma)) |
| a = 0.2 |
| g = 0.5 |
| y = np.clip(x * 0.2 + 0.5, 0, 1) |
| dy = np.random.randn(3, 2) |
| grad = (0 < (np.clip(x * 0.2 + 0.5, 0, 1)) * |
| (np.clip(x * 0.2 + 0.5, 0, 1) < 1)) * 0.2 * dy |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.hardsigmoid(x, a, g) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_HardSigmoid_cpu(self): |
| self._HardSigmoid_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_HardSigmoid_gpu(self): |
| self._HardSigmoid_helper(gpu_dev) |
| |
| def _prelu_helper(self, dev): |
| x = np.random.randn(3, 2) |
| slope = np.random.randn(3, 2) |
| y = np.clip(x, 0, np.inf) + np.clip(x, -np.inf, 0) * slope |
| dy = np.random.randn(3, 2) |
| x0 = x.copy() |
| x0[x0 > 0] = 1 |
| x0[x0 < 1] = 0 |
| grad0 = (x0 + (1 - x0) * slope) * dy |
| grad1 = (1 - x0) * x * dy |
| x = tensor.from_numpy(x) |
| slope = tensor.from_numpy(slope) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| slope.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.prelu(x, slope) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_prelu_cpu(self): |
| self._prelu_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_prelu_gpu(self): |
| self._prelu_helper(gpu_dev) |
| |
| def _SeLU_helper(self, dev): |
| x = np.random.randn(3, 2) |
| a = 0.2 |
| g = 0.3 |
| y = np.clip(x, 0, |
| np.inf) * g + (np.exp(np.clip(x, -np.inf, 0)) - 1) * a * g |
| dy = np.random.randn(3, 2) |
| grad = (np.exp(np.clip(x, -np.inf, 0))) * g |
| grad[x <= 0] = grad[x <= 0] * a |
| grad *= dy |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.selu(x, a, g) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_SeLU_cpu(self): |
| self._SeLU_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SeLU_gpu(self): |
| self._SeLU_helper(gpu_dev) |
| |
| def _and_helper(self, dev): |
| x0 = np.array([0, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0.5, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| |
| y = np.logical_and(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._and(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_and_cpu(self): |
| self._and_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_and_gpu(self): |
| self._and_helper(gpu_dev) |
| |
| def _or_helper(self, dev): |
| x0 = np.array([1.0, 1.0, 2.0, -3.0, 0, |
| -7.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([-1.0, 0, 2.0, 4.0, 0, |
| -7.0]).reshape(3, 2).astype(np.float32) |
| |
| y = np.logical_or(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._or(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_or_cpu(self): |
| self._or_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_or_gpu(self): |
| self._or_helper(gpu_dev) |
| |
| def _not_helper(self, dev): |
| x = np.array([1.0, -1.0, 0, -0.1, 0, |
| -7.0]).reshape(3, 2).astype(np.float32) |
| |
| y = np.logical_not(x) |
| x = tensor.from_numpy(x) |
| x.to_device(dev) |
| |
| result = autograd._not(x) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_not_cpu(self): |
| self._not_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_not_gpu(self): |
| self._not_helper(gpu_dev) |
| |
| def _xor_helper(self, dev): |
| x0 = np.array([0, -0.3, -0.1, 0.1, 0.5, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| |
| y = np.logical_xor(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._xor(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_xor_cpu(self): |
| self._xor_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_xor_gpu(self): |
| self._xor_helper(gpu_dev) |
| |
| def _negative_helper(self, dev): |
| X = np.array([0.1, 0, 0.4, 1. - 4, 0.9, |
| -2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.negative(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.negative(x) |
| dx = result.creator.backward(dy.data) |
| DX = np.negative(DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_negative_cpu(self): |
| self._negative_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_negative_gpu(self): |
| self._negative_helper(gpu_dev) |
| |
| def _reciprocal_helper(self, dev): |
| X = np.array([0.1, 0, 0.4, 1. - 4, 0.9, |
| -2.0]).reshape(3, 2).astype(np.float32) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reciprocal(x) |
| dx = result.creator.backward(dy.data) |
| #dy/dx = -1/x**2 |
| with np.errstate(divide='ignore'): |
| XT = np.reciprocal(X) |
| DX = -1 / np.square(X) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_reciprocal_cpu(self): |
| self._reciprocal_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reciprocal_gpu(self): |
| self._reciprocal_helper(gpu_dev) |
| |
| def _and_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = (np.random.randn(*in1) > 0).astype(np.float32) |
| x1 = (np.random.randn(*in2) > 0).astype(np.float32) |
| y = np.logical_and(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._and(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_and_broadcast_cpu(self): |
| self._and_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_and_broadcast_gpu(self): |
| self._and_broadcast_helper(gpu_dev) |
| |
| def _or_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = (np.random.randn(*in1) > 0).astype(np.float32) |
| x1 = (np.random.randn(*in2) > 0).astype(np.float32) |
| y = np.logical_or(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._or(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_or_broadcast_cpu(self): |
| self._or_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_or_broadcast_gpu(self): |
| self._or_broadcast_helper(gpu_dev) |
| |
| def _xor_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = (np.random.randn(*in1) > 0).astype(np.float32) |
| x1 = (np.random.randn(*in2) > 0).astype(np.float32) |
| y = np.logical_xor(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._xor(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_xor_broadcast_cpu(self): |
| self._xor_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_xor_broadcast_gpu(self): |
| self._xor_broadcast_helper(gpu_dev) |
| |
| def _greater_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = np.greater(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.greater(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_greater_broadcast_cpu(self): |
| self._greater_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_greater_broadcast_gpu(self): |
| self._greater_broadcast_helper(gpu_dev) |
| |
| def _less_broadcast_helper(self, dev): |
| dev = cpu_dev |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = np.less(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.less(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_less_broadcast_cpu(self): |
| self._less_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_less_broadcast_gpu(self): |
| self._less_broadcast_helper(gpu_dev) |
| |
| def _add_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = x + x1 |
| |
| dy = np.random.randn(*y.shape) |
| grad0 = np.sum(dy, axis=axis_helper(y.shape, |
| x.shape)).reshape(x.shape) |
| grad1 = np.sum(dy, axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.add(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_add_broadcast_cpu(self): |
| self._add_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_add_broadcast_gpu(self): |
| self._add_broadcast_helper(gpu_dev) |
| |
| def _sub_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = x - x1 |
| |
| dy = np.random.randn(*y.shape) |
| grad0 = np.sum(dy, axis=axis_helper(y.shape, |
| x.shape)).reshape(x.shape) |
| grad1 = np.sum(-dy, axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sub(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_sub_broadcast_cpu(self): |
| self._sub_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_sub_broadcast_gpu(self): |
| self._sub_broadcast_helper(gpu_dev) |
| |
| def _mul_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = x * x1 |
| |
| dy = np.random.randn(*y.shape) |
| grad0 = np.sum(x1 * dy, axis=axis_helper(y.shape, |
| x.shape)).reshape(x.shape) |
| grad1 = np.sum(x * dy, axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.mul(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_mul_broadcast_cpu(self): |
| self._mul_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_mul_broadcast_gpu(self): |
| self._mul_broadcast_helper(gpu_dev) |
| |
| def _div_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) + 1.0 |
| y = x / x1 |
| |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| grad0 = np.sum(np.power(x1, -1) * dy, |
| axis=axis_helper(y.shape, x.shape)).reshape(x.shape) |
| grad1 = np.sum(x * -np.power(x1, -2) * dy, |
| axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.div(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| # use realtive and total error instead of demical number |
| np.testing.assert_allclose(tensor.to_numpy(result), |
| y, |
| rtol=1e-4, |
| atol=1e-4) |
| np.testing.assert_allclose(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| rtol=1e-4, |
| atol=1e-4) |
| np.testing.assert_allclose(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| rtol=1e-4, |
| atol=1e-4) |
| |
| def test_div_broadcast_cpu(self): |
| self._div_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_div_broadcast_gpu(self): |
| self._div_broadcast_helper(gpu_dev) |
| |
| def _pow_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randint(1, 10, size=in1).astype(np.float32) |
| x1 = np.random.randint(1, 5, size=in2).astype(np.float32) |
| y = np.power(x, x1).astype(np.float32) |
| |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| grad0 = np.sum(x1 * np.power(x, x1 - 1) * dy, |
| axis=axis_helper(y.shape, x.shape)).reshape(x.shape) |
| grad1 = np.sum(np.power(x, x1) * np.log(x) * dy, |
| axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.pow(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=2) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=2) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=2) |
| |
| def test_pow_broadcast_cpu(self): |
| self._pow_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_pow_broadcast_gpu(self): |
| self._pow_broadcast_helper(gpu_dev) |
| |
| def _prelu_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| slope = np.random.randn(*in2).astype(np.float32) |
| y = np.clip(x, 0, np.inf) + np.clip(x, -np.inf, 0) * slope |
| |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| x0 = x.copy() |
| x0[x0 > 0] = 1 |
| x0[x0 < 1] = 0 |
| grad0 = np.sum((x0 + (1 - x0) * slope) * dy, |
| axis=axis_helper(y.shape, x.shape)).reshape(x.shape) |
| grad1 = np.sum((1 - x0) * x * dy, |
| axis=axis_helper(y.shape, |
| slope.shape)).reshape(slope.shape) |
| |
| x = tensor.from_numpy(x) |
| slope = tensor.from_numpy(slope) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| slope.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.prelu(x, slope) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_prelu_broadcast_cpu(self): |
| self._prelu_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_prelu_broadcast_gpu(self): |
| self._prelu_broadcast_helper(gpu_dev) |
| |
| def _gemm_helper(self, dev): |
| configs = [ |
| # alpha, beta, transA, transB, shapeA, shapeB, shapeC, shapeY |
| [0.25, 0.35, 0, 0, (3, 4), (4, 5), (1, 5), (3, 5)], |
| [0.25, 0.35, 0, 1, (3, 4), (5, 4), (1, 5), (3, 5)], |
| [0.25, 0.35, 1, 0, (4, 3), (4, 5), (1, 5), (3, 5)], |
| [0.25, 0.35, 1, 1, (4, 3), (5, 4), (1, 5), (3, 5)], |
| ] |
| for config in configs: |
| alpha = config[0] |
| beta = config[1] |
| transA = config[2] |
| transB = config[3] |
| shapeA = config[4] |
| shapeB = config[5] |
| shapeC = config[6] |
| shapeY = config[7] |
| A = np.random.randn(*shapeA).astype(np.float32) |
| B = np.random.randn(*shapeB).astype(np.float32) |
| C = np.random.randn(*shapeC).astype(np.float32) |
| DY = np.ones(shapeY, dtype=np.float32) |
| |
| a = tensor.from_numpy(A) |
| a.to_device(dev) |
| b = tensor.from_numpy(B) |
| b.to_device(dev) |
| c = tensor.from_numpy(C) |
| c.to_device(dev) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| result = autograd.gemm(a, b, c, alpha, beta, transA, transB) |
| da, db, dc = result.creator.backward(dy.data) |
| |
| # Y = alpha * A' * B' + beta * C |
| _A = A if transA == 0 else A.T |
| _B = B if transB == 0 else B.T |
| C = C if C is not None else np.array(0) |
| Y = alpha * np.dot(_A, _B) + beta * C |
| |
| DA = alpha * np.matmul(DY, _B.T) |
| DA = DA if transA == 0 else DA.T |
| DB = alpha * np.matmul(_A.T, DY) |
| DB = DB if transB == 0 else DB.T |
| DC = beta * np.sum(DY, axis=axis_helper(Y.shape, C.shape)).reshape( |
| C.shape) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| Y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(da)), |
| DA, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(db)), |
| DB, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dc)), |
| DC, |
| decimal=5) |
| |
| def test_gemm_cpu(self): |
| self._gemm_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gemm_gpu(self): |
| self._gemm_helper(gpu_dev) |
| |
| def globalaveragepool_channel_first(self, dev): |
| X = np.array([[[ |
| [1, 2, 3], |
| [4, 5, 6], |
| [7, 8, 9], |
| ]]]).astype(np.float32) |
| XT = np.array([[[[5]]]]).astype(np.float32) |
| DY = np.ones((1, 1, 1, 1), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| result = autograd.globalaveragepool(x) |
| dx = result.creator.backward(dy.data) |
| |
| DX = np.ones(X.shape, dtype=np.float32) |
| DX = np.multiply(DX, DY) / np.prod(X.shape[2:]) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def globalaveragepool_channel_last(self, dev): |
| X = np.array([[ |
| [[1], [2], [3]], |
| [[4], [5], [6]], |
| [[7], [8], [9]], |
| ]]).astype(np.float32) |
| XT = np.array([[[[5]]]]).astype(np.float32) |
| DY = np.ones((1, 1, 1, 1), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| result = autograd.globalaveragepool(x, 'channel_last') |
| dx = result.creator.backward(dy.data) |
| |
| DX = np.ones(X.shape, dtype=np.float32) |
| DX = np.multiply(DX, DY) / np.prod(X.shape[1:-1]) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_globalaveragepool_cpu(self): |
| self.globalaveragepool_channel_first(cpu_dev) |
| self.globalaveragepool_channel_last(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_globalaveragepool_gpu(self): |
| self.globalaveragepool_channel_first(gpu_dev) |
| self.globalaveragepool_channel_last(gpu_dev) |
| |
| def constantOfShape_test(self, dev): |
| # float_ones |
| X = np.array([4, 3, 2]).astype(np.int64) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| y = np.ones(X, dtype=np.float32) |
| result = autograd.constant_of_shape(x, 1.0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| # int32_zeros |
| X = np.array([10, 6]).astype(np.int64) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| y = np.ones(X, dtype=np.int32) |
| result = autograd.constant_of_shape(x, 1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_constantOfShape_cpu(self): |
| self.constantOfShape_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_constantOfShape_gpu(self): |
| self.constantOfShape_test(gpu_dev) |
| |
| def dropout_test(self, dev): |
| X = np.random.randn(3, 4, 5).astype(np.float32) |
| dy = np.random.randn(3, 4, 5).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.dropout(x, 0.5) |
| dx = result.creator.backward(dy.data) |
| self.check_shape(result.shape, (3, 4, 5)) |
| self.check_shape(dx.shape(), (3, 4, 5)) |
| |
| def test_dropout_cpu(self): |
| self.dropout_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_dropout_gpu(self): |
| self.dropout_test(gpu_dev) |
| |
| def reduceSum_test(self, dev): |
| shape = [3, 2, 2] |
| cases = [(None, 1), ([1], 0), ([1], 1), ([-2], 1), ([1, 2], 1)] |
| for axes, keepdims in cases: |
| X = np.random.uniform(-10, 10, shape).astype(np.float32) |
| _axes = tuple(axes) if axes is not None else None |
| y = np.sum(X, axis=_axes, keepdims=keepdims == 1) |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reduce_sum(x, axes, keepdims) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| self.check_shape(dx.shape(), tuple(shape)) |
| |
| def test_reduceSum_cpu(self): |
| self.reduceSum_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reduceSum_gpu(self): |
| self.reduceSum_test(gpu_dev) |
| |
| def reduceMean_test(self, dev): |
| shape = [3, 2, 2] |
| cases = [(None, 1), ([1], 0), ([1], 1), ([-2], 1), ([1, 2], 1)] |
| for axes, keepdims in cases: |
| X = np.random.uniform(-10, 10, shape).astype(np.float32) |
| _axes = tuple(axes) if axes is not None else None |
| y = np.mean(X, axis=_axes, keepdims=keepdims == 1) |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reduce_mean(x, axes, keepdims) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| self.check_shape(dx.shape(), tuple(shape)) |
| |
| def test_reduceMean_cpu(self): |
| self.reduceMean_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reduceMean_gpu(self): |
| self.reduceMean_test(gpu_dev) |
| |
| def slice_test(self, dev): |
| X = np.random.randn(20, 10, 5).astype(np.float32) |
| indexes = np.array(range(20 * 10 * 5)).reshape(20, 10, 5) |
| configs = [ |
| # starts, ends, axes, steps, y |
| [[0, 0], [3, 10], [0, 1], [1, 1], X[0:3, 0:10], |
| indexes[0:3, 0:10]], # slice |
| [[0, 0, 3], [20, 10, 4], None, None, X[:, :, 3:4], |
| indexes[:, :, 3:4]], # slice_default_axes |
| [[1], [1000], [1], [1], X[:, 1:1000], |
| indexes[:, 1:1000]], # slice_end_out_of_bounds |
| [[0], [-1], [1], [1], X[:, 0:-1], |
| indexes[:, 0:-1]], # slice_end_out_of_bounds |
| [[20, 10, 4], [0, 0, 1], [0, 1, 2], [-1, -3, -2], |
| X[20:0:-1, 10:0:-3, 4:1:-2], indexes[20:0:-1, 10:0:-3, |
| 4:1:-2]], # slice_neg_steps |
| [[0, 0, 3], [20, 10, 4], [0, -2, -1], None, X[:, :, 3:4], |
| indexes[:, :, 3:4]], # slice_negative_axes |
| # [[1000], [1000], [1], [1], X[:, 1000:1000], indexes[:, 1000:1000]], # slice_start_out_of_bounds # cannot support empty tensor |
| ] |
| for starts, ends, axes, steps, y, dx_idx in configs: |
| dy = np.ones(y.shape).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.slice(x, starts, ends, axes, steps) |
| dx = result.creator.backward(dy.data) |
| |
| dx_idx = tuple(dx_idx.flatten().tolist()) |
| dX = np.array([ |
| 1. if i in dx_idx else 0. for i in range(20 * 10 * 5) |
| ]).reshape(X.shape) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| dX, |
| decimal=5) |
| |
| def test_slice_cpu(self): |
| self.slice_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_slice_gpu(self): |
| self.slice_test(gpu_dev) |
| |
| def ceil_test(self, dev): |
| X = np.array([-1.5, 1.2]).astype(np.float32) |
| DY = np.ones((2), dtype=np.float32) |
| y = np.ceil(X) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.ceil(x) |
| dx = result.creator.backward(dy.data) |
| DX = np.zeros((2), dtype=np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_ceil_cpu(self): |
| self.ceil_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_ceil_gpu(self): |
| self.ceil_test(gpu_dev) |
| |
| def split_test(self, dev): |
| X = np.array([1., 2., 3., 4., 5., 6.]).astype(np.float32) |
| DY1 = np.ones((2), dtype=np.float32) |
| DY2 = np.ones((4), dtype=np.float32) |
| y = [ |
| np.array([1., 2.]).astype(np.float32), |
| np.array([3., 4., 5., 6.]).astype(np.float32) |
| ] |
| |
| x = tensor.from_numpy(X) |
| dy1 = tensor.from_numpy(DY1) |
| dy2 = tensor.from_numpy(DY2) |
| x.to_device(dev) |
| dy1.to_device(dev) |
| dy2.to_device(dev) |
| |
| result = autograd.split(x, 0, (2, 4)) |
| dx = result[0].creator.backward(dy1.data, dy2.data) |
| DX = np.ones((6), dtype=np.float32) |
| |
| for idx, _r in enumerate(result): |
| np.testing.assert_array_almost_equal(tensor.to_numpy(_r), |
| y[idx], |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_split_cpu(self): |
| self.split_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_split_gpu(self): |
| self.split_test(gpu_dev) |
| |
| def gather_test(self, dev): |
| config = [([0, 1, 3], 0), ([0, 1, 3], 1), ([[0, 1], [1, 2], [2, 3]], 1), |
| ([0, -1, -2], 0)] # (indices, axis) |
| for indices, _axis in config: |
| X = np.random.randn(5, 4, 3, 2).astype(np.float32) |
| y = np.take(X, indices, axis=_axis) |
| DY = np.ones(y.shape, dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.gather(x, _axis, indices) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| self.check_shape(dx.shape(), tuple(X.shape)) |
| |
| def test_gather_cpu(self): |
| self.gather_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gather_gpu(self): |
| self.gather_test(gpu_dev) |
| |
| def tile_test(self, dev): |
| config_repeats = [ |
| 2, |
| [2, 2], |
| [2, 1, 2], |
| ] |
| for repeats in config_repeats: |
| X = np.array([0, 1, 2]).astype(np.float32) |
| y = np.tile(X, repeats) |
| DY = np.copy(y) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.tile(x, repeats) |
| dx = result.creator.backward(dy.data) |
| DX = np.multiply(X, np.prod(repeats)) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_tile_cpu(self): |
| self.tile_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_tile_gpu(self): |
| self.tile_test(gpu_dev) |
| |
| def noneZero_test(self, dev): |
| X = np.array([[1, 0], [1, 1]]).astype(np.float32) |
| y = np.array((np.nonzero(X))) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.nonzero(x) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_noneZero_cpu(self): |
| self.noneZero_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_noneZero_gpu(self): |
| self.noneZero_test(gpu_dev) |
| |
| def cast_test(self, dev): |
| config = [ |
| (np.float32, np.int32, tensor.int32), |
| (np.int32, np.float32, tensor.float32), |
| ] |
| for t1, t2, t3 in config: |
| X = np.array([[1, 0], [1, 1]]).astype(t1) |
| y = np.array([[1, 0], [1, 1]]).astype(t2) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.cast(x, t3) |
| result_np = tensor.to_numpy(result) |
| assert result_np.dtype == y.dtype, "type %s != %s." % ( |
| result_np.dtype, y.dtype) |
| np.testing.assert_array_almost_equal(result_np, y, decimal=5) |
| |
| def test_cast_cpu(self): |
| self.cast_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_cast_gpu(self): |
| self.cast_test(gpu_dev) |
| |
| def onehot_test(self, dev): |
| |
| def one_hot(indices, depth, axis=-1, dtype=np.float32): # type: ignore |
| ''' Compute one hot from indices at a specific axis ''' |
| values = np.asarray(indices) |
| rank = len(values.shape) |
| depth_range = np.arange(depth) |
| if axis < 0: |
| axis += (rank + 1) |
| ls = values.shape[0:axis] |
| rs = values.shape[axis:rank] |
| targets = np.reshape(depth_range, (1,) * len(ls) + |
| depth_range.shape + (1,) * len(rs)) |
| values = np.reshape(np.mod(values, depth), ls + (1,) + rs) |
| return np.asarray(targets == values, dtype=dtype) |
| |
| axisValue = 1 |
| on_value = 3 |
| off_value = 1 |
| output_type = np.float32 |
| indices = np.array([[1, 9], [2, 4]], dtype=np.float32) |
| depth = np.array([10], dtype=np.float32) |
| values = np.array([off_value, on_value], dtype=output_type) |
| y = one_hot(indices, depth, axis=axisValue, dtype=output_type) |
| y = y * (on_value - off_value) + off_value |
| |
| x = tensor.from_numpy(indices) |
| x.to_device(dev) |
| |
| result = autograd.onehot(axisValue, x, depth, values) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_onehot_cpu(self): |
| self.onehot_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_onehot_gpu(self): |
| self.onehot_test(gpu_dev) |
| |
| |
| if __name__ == '__main__': |
| unittest.main() |