| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================= |
| |
| import unittest |
| from builtins import str |
| |
| from singa import tensor |
| from singa import singa_wrap as singa |
| from singa import autograd |
| from singa import layer |
| from singa import singa_wrap |
| from cuda_helper import gpu_dev, cpu_dev |
| |
| import numpy as np |
| |
| autograd.training = True |
| |
| CTensor = singa.Tensor |
| |
| dy = CTensor([2, 1, 2, 2]) |
| singa.Gaussian(0.0, 1.0, dy) |
| |
| |
| def _tuple_to_string(t): |
| lt = [str(x) for x in t] |
| return '(' + ', '.join(lt) + ')' |
| |
| |
| def axis_helper(y_shape, x_shape): |
| """ |
| check which axes the x has been broadcasted |
| Args: |
| y_shape: the shape of result |
| x_shape: the shape of x |
| Return: |
| a tuple refering the axes |
| """ |
| res = [] |
| j = len(x_shape) - 1 |
| for i in range(len(y_shape) - 1, -1, -1): |
| if j < 0 or x_shape[j] != y_shape[i]: |
| res.append(i) |
| j -= 1 |
| return tuple(res[::-1]) |
| |
| |
| def prepare_inputs_targets_for_rnn_test(dev): |
| x_0 = np.random.random((2, 3)).astype(np.float32) |
| x_1 = np.random.random((2, 3)).astype(np.float32) |
| x_2 = np.random.random((2, 3)).astype(np.float32) |
| |
| h_0 = np.zeros((2, 2)).astype(np.float32) |
| |
| t_0 = np.random.random((2, 2)).astype(np.float32) |
| t_1 = np.random.random((2, 2)).astype(np.float32) |
| t_2 = np.random.random((2, 2)).astype(np.float32) |
| |
| x0 = tensor.Tensor(device=dev, data=x_0) |
| x1 = tensor.Tensor(device=dev, data=x_1) |
| x2 = tensor.Tensor(device=dev, data=x_2) |
| |
| h0 = tensor.Tensor(device=dev, data=h_0) |
| |
| t0 = tensor.Tensor(device=dev, data=t_0) |
| t1 = tensor.Tensor(device=dev, data=t_1) |
| t2 = tensor.Tensor(device=dev, data=t_2) |
| |
| inputs = [x0, x1, x2] |
| targets = [t0, t1, t2] |
| return inputs, targets, h0 |
| |
| |
| class TestPythonOperation(unittest.TestCase): |
| |
| def check_shape(self, actual, expect): |
| self.assertEqual( |
| actual, expect, 'shape mismatch, actual shape is %s' |
| ' exepcted is %s' % |
| (_tuple_to_string(actual), _tuple_to_string(expect))) |
| |
| def _greater_helper(self, dev): |
| x0 = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| y = np.greater(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.greater(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_Greater_cpu(self): |
| self._greater_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Greater_gpu(self): |
| self._greater_helper(gpu_dev) |
| |
| def _conv2d_helper(self, dev): |
| # (out_channels, kernel_size) |
| conv_0 = layer.Conv2d(1, 2) |
| conv_without_bias_0 = layer.Conv2d(1, 2, bias=False) |
| |
| cpu_input_tensor = tensor.Tensor(shape=(2, 3, 3, 3), device=dev) |
| cpu_input_tensor.gaussian(0.0, 1.0) |
| |
| dy = tensor.Tensor(shape=(2, 1, 2, 2), device=dev) |
| dy.gaussian(0.0, 1.0) |
| |
| y = conv_0(cpu_input_tensor) # PyTensor |
| dx, dW, db = y.creator.backward(dy.data) # CTensor |
| |
| self.check_shape(y.shape, (2, 1, 2, 2)) |
| self.check_shape(dx.shape(), (2, 3, 3, 3)) |
| self.check_shape(dW.shape(), (1, 3, 2, 2)) |
| self.check_shape(db.shape(), (1,)) |
| |
| # forward without bias |
| y_without_bias = conv_without_bias_0(cpu_input_tensor) |
| self.check_shape(y_without_bias.shape, (2, 1, 2, 2)) |
| |
| def test_conv2d_cpu(self): |
| self._conv2d_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_conv2d_gpu(self): |
| self._conv2d_helper(gpu_dev) |
| |
| def _conv_same_pad(self, dev, pad_mode, is_2d): |
| if is_2d: |
| x_h, w_h, k_h, p_h = 32, 4, 4, 1 |
| else: |
| x_h, w_h, k_h, p_h = 1, 1, 1, 0 |
| |
| x = tensor.Tensor(shape=(3, 3, x_h, 32), device=dev) |
| x.gaussian(0.0, 1.0) |
| |
| # with the same padding, the padding should be 3 |
| # for SAME_UPPER, is (1, 1) + (0, 1) |
| # for SAME_LOWER, is (1, 1) + (1, 0) |
| |
| kernel = (k_h, 4) |
| padding = (p_h, 1) |
| stride = (1, 1) |
| group = 1 |
| bias = False |
| out_channels = 3 |
| |
| conv_0 = layer.Conv2d(out_channels, |
| kernel, |
| stride=stride, |
| group=group, |
| bias=bias, |
| pad_mode=pad_mode) |
| |
| y = conv_0(x) |
| dy = np.ones((3, 3, x_h, 32), dtype=np.float32) |
| dy = tensor.from_numpy(dy) |
| dy.to_device(dev) |
| |
| dx, dW = y.creator.backward(dy.data) |
| self.check_shape(y.shape, (3, 3, x_h, 32)) |
| self.check_shape(dx.shape(), (3, 3, x_h, 32)) |
| self.check_shape(dW.shape(), (3, 3, w_h, 4)) |
| |
| def test_conv2d_same_pad_cpu(self): |
| self._conv_same_pad(cpu_dev, "SAME_LOWER", True) |
| self._conv_same_pad(cpu_dev, "SAME_UPPER", True) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_conv2d_same_pad_gpu(self): |
| self._conv_same_pad(gpu_dev, "SAME_LOWER", True) |
| self._conv_same_pad(gpu_dev, "SAME_UPPER", True) |
| |
| def test_conv1d_same_pad_cpu(self): |
| self._conv_same_pad(cpu_dev, "SAME_LOWER", False) |
| self._conv_same_pad(cpu_dev, "SAME_UPPER", False) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_conv1d_same_pad_gpu(self): |
| self._conv_same_pad(gpu_dev, "SAME_LOWER", False) |
| self._conv_same_pad(gpu_dev, "SAME_UPPER", False) |
| |
| def _pooling_same_pad(self, dev, pad_mode, is_2d): |
| if is_2d: |
| x_h, k_h, p_h = 32, 4, 1 |
| else: |
| x_h, k_h, p_h = 1, 1, 0 |
| |
| x = tensor.Tensor(shape=(3, 3, x_h, 32), device=dev) |
| x.gaussian(0.0, 1.0) |
| |
| # with the same padding, the padding should be 3 |
| # for SAME_UPPER, is (1, 1) + (0, 1) |
| # for SAME_LOWER, is (1, 1) + (1, 0) |
| |
| kernel = (k_h, 4) |
| # we add 4 padding here and hope the conv and trim one padding then |
| padding = (p_h, 1) |
| stride = (1, 1) |
| |
| pooling = layer.Pooling2d(kernel, stride=stride, pad_mode=pad_mode) |
| |
| y = pooling(x) |
| |
| dy = np.ones((3, 3, x_h, 32), dtype=np.float32) |
| dy = tensor.from_numpy(dy) |
| dy.to_device(dev) |
| |
| dx = y.creator.backward(dy.data) |
| self.check_shape(y.shape, (3, 3, x_h, 32)) |
| self.check_shape(dx.shape(), (3, 3, x_h, 32)) |
| |
| def test_pooling2d_same_pad_cpu(self): |
| self._pooling_same_pad(cpu_dev, "SAME_LOWER", True) |
| self._pooling_same_pad(cpu_dev, "SAME_UPPER", True) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_pooling2d_same_pad_gpu(self): |
| self._pooling_same_pad(gpu_dev, "SAME_LOWER", True) |
| self._pooling_same_pad(gpu_dev, "SAME_UPPER", True) |
| |
| def test_pooling1d_same_pad_cpu(self): |
| self._pooling_same_pad(cpu_dev, "SAME_LOWER", False) |
| self._pooling_same_pad(cpu_dev, "SAME_UPPER", False) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_pooling1d_same_pad_gpu(self): |
| self._pooling_same_pad(gpu_dev, "SAME_LOWER", False) |
| self._pooling_same_pad(gpu_dev, "SAME_UPPER", False) |
| |
| def _sum_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = x + x1 |
| dy = np.ones((3, 2), dtype=np.float32) |
| grad0 = dy |
| grad1 = dy |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sum(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_sum_cpu(self): |
| self._sum_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_sum_gpu(self): |
| self._sum_helper(gpu_dev) |
| |
| def _SeparableConv2d_helper(self, dev): |
| # SeparableConv2d(in_channels, out_channels, kernel_size) |
| if dev == cpu_dev: |
| in_channels = 1 |
| else: |
| in_channels = 8 |
| separ_conv = layer.SeparableConv2d(16, 3, padding=1) |
| |
| x = np.random.random((10, in_channels, 28, 28)).astype(np.float32) |
| x = tensor.Tensor(device=dev, data=x) |
| |
| y = separ_conv(x) |
| self.check_shape(y.shape, (10, 16, 28, 28)) |
| |
| y1 = separ_conv.depthwise_conv(x) |
| y2 = separ_conv.point_conv(y1) |
| |
| dy1, dW_depth = y2.creator.backward(y2.data) |
| dx, dW_spacial = y1.creator.backward(dy1) |
| |
| self.check_shape(y2.shape, (10, 16, 28, 28)) |
| |
| self.check_shape(dy1.shape(), (10, in_channels, 28, 28)) |
| self.check_shape(dW_depth.shape(), (16, in_channels, 1, 1)) |
| |
| self.check_shape(dx.shape(), (10, in_channels, 28, 28)) |
| self.check_shape(dW_spacial.shape(), (in_channels, 1, 3, 3)) |
| |
| def test_SeparableConv2d_cpu(self): |
| self._SeparableConv2d_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SeparableConv2d_gpu(self): |
| self._SeparableConv2d_helper(gpu_dev) |
| |
| def _batchnorm2d_helper(self, dev): |
| batchnorm_0 = layer.BatchNorm2d(3) |
| |
| cpu_input_tensor = tensor.Tensor(shape=(2, 3, 3, 3), device=dev) |
| cpu_input_tensor.gaussian(0.0, 1.0) |
| |
| dy = cpu_input_tensor.clone().data |
| |
| y = batchnorm_0(cpu_input_tensor) |
| dx, ds, db = y.creator.backward(dy) |
| |
| self.check_shape(y.shape, (2, 3, 3, 3)) |
| self.check_shape(dx.shape(), (2, 3, 3, 3)) |
| self.check_shape(ds.shape(), (3,)) |
| self.check_shape(db.shape(), (3,)) |
| |
| def test_batchnorm2d_cpu(self): |
| self._batchnorm2d_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_batchnorm2d_gpu(self): |
| self._batchnorm2d_helper(gpu_dev) |
| |
| def gradients_check(self, |
| func, |
| param, |
| autograds, |
| h=0.0005, |
| df=1, |
| dev=cpu_dev): |
| # param: PyTensor |
| # autograds: numpy_tensor |
| p = tensor.to_numpy(param) |
| it = np.nditer(p, flags=['multi_index'], op_flags=['readwrite']) |
| while not it.finished: |
| idx = it.multi_index |
| diff = np.zeros_like(p) |
| diff[idx] += h |
| diff = tensor.from_numpy(diff) |
| diff.to_device(dev) |
| |
| param += diff |
| pos = func() |
| pos = tensor.to_numpy(pos) |
| |
| param -= diff |
| param -= diff |
| neg = func() |
| neg = tensor.to_numpy(neg) |
| |
| numerical_grad = np.sum((pos - neg) * df) / (2 * h) |
| #print((autograds[idx] - numerical_grad)/numerical_grad) |
| # threshold set as -5% to +5% |
| #self.assertAlmostEqual((autograds[idx] - numerical_grad)/(numerical_grad+0.0000001), 0., places=1) |
| self.assertAlmostEqual(autograds[idx] - numerical_grad, |
| 0., |
| places=2) |
| |
| it.iternext() |
| |
| def _vanillaRNN_gpu_tiny_ops_shape_check_helper(self, dev): |
| # gradients shape check. |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| rnn = layer.RNN(3, 2) |
| |
| hs, _ = rnn(inputs, h0) |
| |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| # d=autograd.infer_dependency(loss.creator) |
| # print(d) |
| for t, dt in autograd.backward(loss): |
| self.check_shape(t.shape, dt.shape) |
| |
| def test_vanillaRNN_gpu_tiny_ops_shape_check_cpu(self): |
| self._vanillaRNN_gpu_tiny_ops_shape_check_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_vanillaRNN_gpu_tiny_ops_shape_check_gpu(self): |
| self._vanillaRNN_gpu_tiny_ops_shape_check_helper(gpu_dev) |
| |
| def _LSTM_gpu_tiny_ops_shape_check_helper(self, dev): |
| # gradients shape check. |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| c_0 = np.random.random((2, 1)).astype(np.float32) |
| c0 = tensor.Tensor(device=dev, data=c_0) |
| |
| rnn = layer.LSTM(3, 2) |
| |
| hs, _, _ = rnn(inputs, (h0, c0)) |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| # d=autograd.infer_dependency(loss.creator) |
| # print(d) |
| for t, dt in autograd.backward(loss): |
| self.check_shape(t.shape, dt.shape) |
| |
| def test_LSTM_gpu_tiny_ops_shape_check_cpu(self): |
| self._LSTM_gpu_tiny_ops_shape_check_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_LSTM_gpu_tiny_ops_shape_check_gpu(self): |
| self._LSTM_gpu_tiny_ops_shape_check_helper(gpu_dev) |
| |
| def _numerical_gradients_check_for_vallina_rnn_helper(self, dev): |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| |
| rnn = layer.RNN(3, 2) |
| |
| def valinna_rnn_forward(): |
| hs, _ = rnn(inputs, h0) |
| |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| #grads = autograd.gradients(loss) |
| return loss |
| |
| loss1 = valinna_rnn_forward() |
| auto_grads = autograd.gradients(loss1) |
| |
| params = rnn.get_params() |
| for key, param in params.items(): |
| auto_grad = tensor.to_numpy(auto_grads[id(param)]) |
| |
| self.gradients_check(valinna_rnn_forward, param, auto_grad, dev=dev) |
| |
| def _gradient_check_cudnn_rnn(self, mode="vanilla", dev=gpu_dev): |
| seq = 10 |
| bs = 2 |
| fea = 10 |
| hid = 10 |
| x = np.random.random((seq, bs, fea)).astype(np.float32) |
| tx = tensor.Tensor(device=dev, data=x) |
| y = np.random.random((seq, bs, hid)).astype(np.float32) |
| y = np.reshape(y, (-1, hid)) |
| ty = tensor.Tensor(device=dev, data=y) |
| rnn = layer.CudnnRNN(hid, rnn_mode=mode, return_sequences=True) |
| |
| def vanilla_rnn_forward(): |
| out = rnn(tx) |
| out = autograd.reshape(out, (-1, hid)) |
| loss = autograd.softmax_cross_entropy(out, ty) |
| return loss |
| |
| loss = vanilla_rnn_forward() |
| auto_grads = autograd.gradients(loss) |
| |
| params = rnn.get_params() |
| for key, param in params.items(): |
| auto_grad = tensor.to_numpy(auto_grads[id(param)]) |
| self.gradients_check(vanilla_rnn_forward, param, auto_grad, dev=dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gradient_check_cudnn_rnn_vanilla(self): |
| self._gradient_check_cudnn_rnn(mode="vanilla", dev=gpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gradient_check_cudnn_rnn_lstm(self): |
| self._gradient_check_cudnn_rnn(mode="lstm", dev=gpu_dev) |
| |
| # Cos Sim Gradient Check |
| def _gradient_check_cossim(self, dev=gpu_dev): |
| bs = 2 |
| vec = 3 |
| ta = tensor.random((bs, vec), dev) |
| tb = tensor.random((bs, vec), dev) |
| # treat ta, tb as params |
| ta.stores_grad = True |
| tb.stores_grad = True |
| ty = tensor.random((bs,), dev) |
| |
| def _forward(): |
| out = autograd.cossim(ta, tb) |
| loss = autograd.mse_loss(out, ty) |
| return loss |
| |
| loss = _forward() |
| auto_grads = autograd.gradients(loss) |
| |
| params = {id(ta): ta, id(tb): tb} |
| |
| for key, param in params.items(): |
| auto_grad = tensor.to_numpy(auto_grads[id(param)]) |
| self.gradients_check(_forward, param, auto_grad, dev=dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gradient_check_cossim_gpu(self): |
| self._gradient_check_cossim(dev=gpu_dev) |
| |
| def test_gradient_check_cossim_cpu(self): |
| self._gradient_check_cossim(dev=cpu_dev) |
| |
| def test_numerical_gradients_check_for_vallina_rnn_cpu(self): |
| self._numerical_gradients_check_for_vallina_rnn_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_numerical_gradients_check_for_vallina_rnn_gpu(self): |
| self._numerical_gradients_check_for_vallina_rnn_helper(gpu_dev) |
| |
| def _numerical_gradients_check_for_lstm_helper(self, dev): |
| inputs, target, h0 = prepare_inputs_targets_for_rnn_test(dev) |
| c_0 = np.zeros((2, 2)).astype(np.float32) |
| c0 = tensor.Tensor(device=dev, data=c_0) |
| |
| rnn = layer.LSTM(3, 2) |
| |
| def lstm_forward(): |
| hs, _, _ = rnn(inputs, (h0, c0)) |
| |
| loss = autograd.softmax_cross_entropy(hs[0], target[0]) |
| for i in range(1, len(hs)): |
| l = autograd.softmax_cross_entropy(hs[i], target[i]) |
| loss = autograd.add(loss, l) |
| return loss |
| |
| loss1 = lstm_forward() |
| auto_grads = autograd.gradients(loss1) |
| |
| params = rnn.get_params() |
| for key, param in params.items(): |
| auto_grad = tensor.to_numpy(auto_grads[id(param)]) |
| |
| self.gradients_check(lstm_forward, param, auto_grad, dev=dev) |
| |
| def test_numerical_gradients_check_for_lstm_cpu(self): |
| self._numerical_gradients_check_for_lstm_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_numerical_gradients_check_for_lstm_gpu(self): |
| self._numerical_gradients_check_for_lstm_helper(gpu_dev) |
| |
| def _MeanSquareError_helper(self, dev): |
| X = np.array([4.3, 5.4, 3.3, 3.6, 5.7, |
| 6.0]).reshape(3, 2).astype(np.float32) |
| T = np.array([4.4, 5.3, 3.2, 3.7, 5.4, |
| 6.3]).reshape(3, 2).astype(np.float32) |
| x = tensor.from_numpy(X) |
| t = tensor.from_numpy(T) |
| x.to_device(dev) |
| t.to_device(dev) |
| |
| loss = autograd.mse_loss(x, t) |
| dx = loss.creator.backward() |
| |
| loss_np = tensor.to_numpy(loss)[0] |
| self.assertAlmostEqual(loss_np, 0.0366666, places=4) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_MeanSquareError_cpu(self): |
| self._MeanSquareError_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_MeanSquareError_gpu(self): |
| self._MeanSquareError_helper(gpu_dev) |
| |
| def _Abs_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.array([0.8, 1.2, 3.3, 3.6, 0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.abs(x) |
| dx = result.creator.backward(x.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), XT) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_Abs_cpu(self): |
| self._Abs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Abs_gpu(self): |
| self._Abs_helper(gpu_dev) |
| |
| def _Mean_helper(self, dev): |
| x0 = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| y = (x0 + x1) / 2 |
| grad = np.ones(x0.shape) / 2 |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.mean(x0, x1) |
| dy = tensor.from_numpy(np.ones((3, 2)).astype(np.float32)) |
| dy.to_device(dev) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad, |
| decimal=5) |
| |
| def test_Mean_cpu(self): |
| self._Mean_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Mean_gpu(self): |
| self._Mean_helper(gpu_dev) |
| |
| def _Exp_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.exp(X) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.exp(x) |
| dx = result.creator.backward(x.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_Exp_cpu(self): |
| self._Exp_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Exp_gpu(self): |
| self._Exp_helper(gpu_dev) |
| |
| def _Identity_helper(self, dev): |
| x = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| y = x.copy() |
| grad = np.ones(x.shape) |
| x = tensor.from_numpy(x) |
| x.to_device(dev) |
| |
| result = autograd.identity(x) |
| dy = tensor.from_numpy(np.ones((3, 2)).astype(np.float32)) |
| dy.to_device(dev) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_Identity_cpu(self): |
| self._Identity_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Identity_gpu(self): |
| self._Identity_helper(gpu_dev) |
| |
| def _LeakyRelu_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.array([0.8, -0.012, 3.3, -0.036, -0.005, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.leakyrelu(x) |
| |
| dx = result.creator.backward(x.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), XT) |
| self.check_shape(dx.shape(), (3, 2)) |
| |
| def test_LeakyRelu_cpu(self): |
| self._LeakyRelu_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_LeakyRelu_gpu(self): |
| self._LeakyRelu_helper(gpu_dev) |
| |
| def _Relu_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.maximum(X, 0) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.relu(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = (X > 0).astype(np.float32) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Relu_cpu(self): |
| self._Relu_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Relu_gpu(self): |
| self._Relu_helper(gpu_dev) |
| |
| def _Cos_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.cos(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.cos(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = -np.sin(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Cos_cpu(self): |
| self._Cos_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Cos_gpu(self): |
| self._Cos_helper(gpu_dev) |
| |
| def _Cosh_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.cosh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.cosh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = np.sinh(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Cosh_cpu(self): |
| self._Cosh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Cosh_gpu(self): |
| self._Cosh_helper(gpu_dev) |
| |
| def _Acos_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arccos(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.acos(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = -1.0 / np.sqrt(1.0 - np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Acos_cpu(self): |
| self._Acos_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Acos_gpu(self): |
| self._Acos_helper(gpu_dev) |
| |
| def _Acosh_helper(self, dev): |
| X = np.array([1.1, 1.5, 1.9, 2.2, 2.5, |
| 2.8]).reshape(3, 2).astype(np.float32) |
| XT = np.arccosh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.acosh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.multiply(np.sqrt(X - 1.0), np.sqrt(X + 1.0)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Acosh_cpu(self): |
| self._Acosh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Acosh_gpu(self): |
| self._Acosh_helper(gpu_dev) |
| |
| def _Sin_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.sin(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sin(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = np.cos(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sin_cpu(self): |
| self._Sin_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sin_gpu(self): |
| self._Sin_helper(gpu_dev) |
| |
| def _Sinh_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.sinh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sinh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = np.cosh(X) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sinh_cpu(self): |
| self._Sinh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sinh_gpu(self): |
| self._Sinh_helper(gpu_dev) |
| |
| def _Asin_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arcsin(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.asin(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.sqrt(1.0 - np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Asin_cpu(self): |
| self._Asin_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Asin_gpu(self): |
| self._Asin_helper(gpu_dev) |
| |
| def _Asinh_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arcsinh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.asinh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.sqrt(np.square(X) + 1.0) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Asinh_cpu(self): |
| self._Asinh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Asinh_gpu(self): |
| self._Asinh_helper(gpu_dev) |
| |
| def _Tan_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.tan(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.tan(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.square(np.cos(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Tan_cpu(self): |
| self._Tan_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Tan_gpu(self): |
| self._Tan_helper(gpu_dev) |
| |
| def _Tanh_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.tanh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.tanh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.square(np.cosh(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Tanh_cpu(self): |
| self._Tanh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Tanh_gpu(self): |
| self._Tanh_helper(gpu_dev) |
| |
| def _Atan_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arctan(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.atan(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / (1.0 + np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Atan_cpu(self): |
| self._Atan_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Atan_gpu(self): |
| self._Atan_helper(gpu_dev) |
| |
| def _Atanh_helper(self, dev): |
| X = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| XT = np.arctanh(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.atanh(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / (1.0 - np.square(X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Atanh_cpu(self): |
| self._Atanh_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Atanh_gpu(self): |
| self._Atanh_helper(gpu_dev) |
| |
| def _Less_helper(self, dev): |
| x0 = np.array([-0.9, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| y = np.less(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.less(x0, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_Less_cpu(self): |
| self._Less_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Less_gpu(self): |
| self._Less_helper(gpu_dev) |
| |
| def _Sub_helper(self, dev): |
| X0 = np.array([7, -5, 0.2, -0.1, 0.3, 4]).reshape(3, |
| 2).astype(np.float32) |
| X1 = np.array([0.6, -1.3, 0.1, -0.1, 0.4, |
| 0.3]).reshape(3, 2).astype(np.float32) |
| XT = np.subtract(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sub(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| DX0 = np.multiply(DY, 1.0) |
| DX1 = np.multiply(DY, -1.0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_Sub_cpu(self): |
| self._Sub_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sub_gpu(self): |
| self._Sub_helper(gpu_dev) |
| |
| def _Pow_helper(self, dev): |
| X0 = np.array([7, 5, 0.2, 0.1, 0.3, 4]).reshape(3, 2).astype(np.float32) |
| X1 = np.array([-1.0, 2.0, -1.0, -2.1, 1.0, |
| -2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.power(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.pow(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G0 = np.multiply(X1, np.power(X0, (X1 - 1.0))) |
| DX0 = np.multiply(G0, DY) |
| G1 = np.multiply(np.power(X0, X1), np.log(X0)) |
| DX1 = np.multiply(G1, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=4) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=4) |
| |
| def test_Pow_cpu(self): |
| self._Pow_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Pow_gpu(self): |
| self._Pow_helper(gpu_dev) |
| |
| def _SoftSign_helper(self, dev): |
| # y = x / (1 + np.abs(x)) |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = X / (1 + np.absolute(X)) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.softsign(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / np.square(np.absolute(X) + 1.0) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_SoftSign_cpu(self): |
| self._SoftSign_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SoftSign_gpu(self): |
| self._SoftSign_helper(gpu_dev) |
| |
| def _SoftPlus_helper(self, dev): |
| #y = np.log(np.exp(x) + 1) |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.log(np.exp(X) + 1) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.softplus(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 1.0 / (1.0 + np.exp(-X)) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_SoftPlus_cpu(self): |
| self._SoftPlus_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SoftPlus_gpu(self): |
| self._SoftPlus_helper(gpu_dev) |
| |
| def _unsqueeze_helper(self, dev): |
| data = [0.1, -1.0, 0.4, 4.0, -0.9, 9.0] |
| |
| x = np.array(data).reshape(1, 2, 3).astype(np.float32) |
| y = x.reshape(1, 1, 2, 3, 1) |
| dy = np.ones((1, 1, 2, 3, 1), dtype=np.float32) |
| grad = dy.reshape(1, 2, 3) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.unsqueeze(x, [0, 4]) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_unsqueeze_cpu(self): |
| self._unsqueeze_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_unsqueeze_gpu(self): |
| self._unsqueeze_helper(gpu_dev) |
| |
| def _Sqrt_helper(self, dev): |
| X = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| XT = np.sqrt(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sqrt(x) |
| dx = result.creator.backward(dy.data) |
| |
| G = 0.5 * np.power(X, -0.5) |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sqrt_cpu(self): |
| self._Sqrt_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sqrt_gpu(self): |
| self._Sqrt_helper(gpu_dev) |
| |
| def _transpose_helper(self, dev): |
| x = np.random.randn(3, 2, 1) |
| y = x.transpose(1, 2, 0) |
| dy = np.random.randn(*(y.shape)) |
| grad = dy.transpose((2, 0, 1)) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.transpose(x, (1, 2, 0)) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_transpose_cpu(self): |
| self._transpose_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_transpose_gpu(self): |
| self._transpose_helper(gpu_dev) |
| |
| def _Sign_helper(self, dev): |
| X = np.array([0.8, -1.2, 3.3, -3.6, -0.5, |
| 0.5]).reshape(3, 2).astype(np.float32) |
| XT = np.sign(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.sign(x) |
| dx = result.creator.backward(dy.data) |
| DX = np.multiply(DY, 0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Sign_cpu(self): |
| self._Sign_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Sign_gpu(self): |
| self._Sign_helper(gpu_dev) |
| |
| def _Log_helper(self, dev): |
| X = np.array([0.1, 1.0, 0.4, 1.4, 0.9, |
| 2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.log(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.log(x) |
| dx = result.creator.backward(dy.data) |
| #dx = 1/x |
| G = 1.0 / X |
| DX = np.multiply(G, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_Log_cpu(self): |
| self._Log_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Log_gpu(self): |
| self._Log_helper(gpu_dev) |
| |
| def _mul_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = x * x1 |
| dy = np.array([0.1, 1.0, 0.4, 4.0, 0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| grad0 = x1 * dy |
| grad1 = x * dy |
| |
| x = tensor.from_numpy(x) |
| slope = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| slope.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.mul(x, slope) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_mul_cpu(self): |
| self._mul_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_mul_gpu(self): |
| self._mul_helper(gpu_dev) |
| |
| def _reshape_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = x.reshape(2, 3) |
| dy = np.array([1, 2, 3, 4, 5, 6]).reshape(2, 3).astype(np.float32) |
| grad = dy.reshape(3, 2) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reshape(x, (2, 3)) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_reshape_cpu(self): |
| self._reshape_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reshape_gpu(self): |
| self._reshape_helper(gpu_dev) |
| |
| def _max_helper(self, dev): |
| X0 = np.array([0.1, 0.2, 2.0, 0.0, 0.1, |
| 0.2]).reshape(3, 2).astype(np.float32) |
| X1 = np.array([1.0, 2.0, 1.0, 2.1, 0.0, |
| 2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.maximum(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.max(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G = np.subtract(X0, X1) |
| DX0 = np.where(G > 0, 1, G * 0) |
| DX1 = np.where(G < 0, 1, G * 0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_max_cpu(self): |
| self._max_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_max_gpu(self): |
| self._max_helper(gpu_dev) |
| |
| def _max_3inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| data_1 = np.array([1, 4, 4]).astype(np.float32) |
| data_2 = np.array([2, 5, 3]).astype(np.float32) |
| XT = np.array([3, 5, 4]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| x1 = tensor.from_numpy(data_1) |
| x2 = tensor.from_numpy(data_2) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| x2.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.max(x0, x1, x2) |
| dx0, dx1, dx2 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([1, 0, 0]).astype(np.float32) |
| DX1 = np.array([0, 0, 1]).astype(np.float32) |
| DX2 = np.array([0, 1, 0]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx2)), |
| DX2, |
| decimal=5) |
| |
| def test_max_3inputs_cpu(self): |
| self._max_3inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_max_3inputs_gpu(self): |
| self._max_3inputs_helper(gpu_dev) |
| |
| def _max_1inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| XT = np.array([3, 2, 1]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.max(x0) |
| dx0 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([1, 1, 1]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| |
| def test_max_1inputs_cpu(self): |
| self._max_1inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_max_1inputs_gpu(self): |
| self._max_1inputs_helper(gpu_dev) |
| |
| def _Div_helper(self, dev): |
| X0 = np.array([7, -5, 0.2, -0.1, 0.3, 4]).reshape(3, |
| 2).astype(np.float32) |
| X1 = np.array([0.6, -1.3, 0.1, -0.1, 0.4, |
| 0.3]).reshape(3, 2).astype(np.float32) |
| XT = np.divide(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.div(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G0 = 1.0 / X1 |
| DX0 = np.multiply(G0, DY) |
| G1 = np.divide(-X0, np.square(X1)) |
| DX1 = np.multiply(G1, DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_Div_cpu(self): |
| self._Div_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_Div_gpu(self): |
| self._Div_helper(gpu_dev) |
| |
| def _squeeze_helper(self, dev): |
| x = np.random.randn(3, 1, 2, 1, 1) |
| y = x.reshape(3, 2) |
| dy = np.random.randn(3, 2) |
| grad = dy.reshape(3, 1, 2, 1, 1) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.squeeze(x, [1, 3, 4]) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_squeeze_cpu(self): |
| self._squeeze_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_squeeze_gpu(self): |
| self._squeeze_helper(gpu_dev) |
| |
| def _shape_helper(self, dev): |
| x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| y = list(x.shape) |
| dy = np.ones((3, 2), dtype=np.float32) |
| grad = list(dy.shape) |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.shape(x) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(dx, grad, decimal=5) |
| |
| def test_shape_cpu(self): |
| self._shape_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_shape_gpu(self): |
| self._shape_helper(gpu_dev) |
| |
| def _min_helper(self, dev): |
| X0 = np.array([0.1, 0.2, 2.0, 0.0, 0.1, |
| 0.2]).reshape(3, 2).astype(np.float32) |
| X1 = np.array([1.0, 2.0, 1.0, 2.1, 0.0, |
| 2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.minimum(X0, X1) |
| |
| DY = np.ones((3, 2), dtype=np.float32) |
| x0 = tensor.from_numpy(X0) |
| x1 = tensor.from_numpy(X1) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.min(x0, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| |
| G = np.subtract(X0, X1) |
| DX0 = np.where(G < 0, 1, G * 0) |
| DX1 = np.where(G > 0, 1, G * 0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| |
| def test_min_cpu(self): |
| self._min_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_min_gpu(self): |
| self._min_helper(gpu_dev) |
| |
| def _min_3inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| data_1 = np.array([1, 4, 4]).astype(np.float32) |
| data_2 = np.array([2, 5, 0]).astype(np.float32) |
| XT = np.array([1, 2, 0]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| x1 = tensor.from_numpy(data_1) |
| x2 = tensor.from_numpy(data_2) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| x2.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.min(x0, x1, x2) |
| dx0, dx1, dx2 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([0, 1, 0]).astype(np.float32) |
| DX1 = np.array([1, 0, 0]).astype(np.float32) |
| DX2 = np.array([0, 0, 1]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| DX1, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx2)), |
| DX2, |
| decimal=5) |
| |
| def test_min_3inputs_cpu(self): |
| self._min_3inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_min_3inputs_gpu(self): |
| self._min_3inputs_helper(gpu_dev) |
| |
| def _min_1inputs_helper(self, dev): |
| data_0 = np.array([3, 2, 1]).astype(np.float32) |
| XT = np.array([3, 2, 1]).astype(np.float32) |
| |
| DY = np.array([1, 1, 1]).astype(np.float32) |
| x0 = tensor.from_numpy(data_0) |
| dy = tensor.from_numpy(DY) |
| x0.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.min(x0) |
| dx0 = result.creator.backward(dy.data) |
| |
| DX0 = np.array([1, 1, 1]).astype(np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| DX0, |
| decimal=5) |
| |
| def test_min_1inputs_cpu(self): |
| self._min_1inputs_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_min_1inputs_gpu(self): |
| self._min_1inputs_helper(gpu_dev) |
| |
| def _HardSigmoid_helper(self, dev): |
| x = np.random.randn(3, 2) |
| #y = max(0, min(1, alpha * x + gamma)) |
| a = 0.2 |
| g = 0.5 |
| y = np.clip(x * 0.2 + 0.5, 0, 1) |
| dy = np.random.randn(3, 2) |
| grad = (0 < (np.clip(x * 0.2 + 0.5, 0, 1)) * |
| (np.clip(x * 0.2 + 0.5, 0, 1) < 1)) * 0.2 * dy |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.hardsigmoid(x, a, g) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_HardSigmoid_cpu(self): |
| self._HardSigmoid_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_HardSigmoid_gpu(self): |
| self._HardSigmoid_helper(gpu_dev) |
| |
| def _prelu_helper(self, dev): |
| x = np.random.randn(3, 2) |
| slope = np.random.randn(3, 2) |
| y = np.clip(x, 0, np.inf) + np.clip(x, -np.inf, 0) * slope |
| dy = np.random.randn(3, 2) |
| x0 = x.copy() |
| x0[x0 > 0] = 1 |
| x0[x0 < 1] = 0 |
| grad0 = (x0 + (1 - x0) * slope) * dy |
| grad1 = (1 - x0) * x * dy |
| x = tensor.from_numpy(x) |
| slope = tensor.from_numpy(slope) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| slope.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.prelu(x, slope) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_prelu_cpu(self): |
| self._prelu_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_prelu_gpu(self): |
| self._prelu_helper(gpu_dev) |
| |
| def _SeLU_helper(self, dev): |
| x = np.random.randn(3, 2) |
| a = 0.2 |
| g = 0.3 |
| y = np.clip(x, 0, |
| np.inf) * g + (np.exp(np.clip(x, -np.inf, 0)) - 1) * a * g |
| dy = np.random.randn(3, 2) |
| grad = (np.exp(np.clip(x, -np.inf, 0))) * g |
| grad[x <= 0] = grad[x <= 0] * a |
| grad *= dy |
| |
| x = tensor.from_numpy(x) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| result = autograd.selu(x, a, g) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| grad, |
| decimal=5) |
| |
| def test_SeLU_cpu(self): |
| self._SeLU_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_SeLU_gpu(self): |
| self._SeLU_helper(gpu_dev) |
| |
| def _and_helper(self, dev): |
| x0 = np.array([0, -0.3, -0.1, 0.1, 0.5, |
| 0.9]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0.5, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| |
| y = np.logical_and(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._and(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_and_cpu(self): |
| self._and_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_and_gpu(self): |
| self._and_helper(gpu_dev) |
| |
| def _or_helper(self, dev): |
| x0 = np.array([1.0, 1.0, 2.0, -3.0, 0, |
| -7.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([-1.0, 0, 2.0, 4.0, 0, |
| -7.0]).reshape(3, 2).astype(np.float32) |
| |
| y = np.logical_or(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._or(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_or_cpu(self): |
| self._or_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_or_gpu(self): |
| self._or_helper(gpu_dev) |
| |
| def _not_helper(self, dev): |
| x = np.array([1.0, -1.0, 0, -0.1, 0, |
| -7.0]).reshape(3, 2).astype(np.float32) |
| |
| y = np.logical_not(x) |
| x = tensor.from_numpy(x) |
| x.to_device(dev) |
| |
| result = autograd._not(x) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_not_cpu(self): |
| self._not_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_not_gpu(self): |
| self._not_helper(gpu_dev) |
| |
| def _xor_helper(self, dev): |
| x0 = np.array([0, -0.3, -0.1, 0.1, 0.5, |
| 9.0]).reshape(3, 2).astype(np.float32) |
| x1 = np.array([0, -0.3, 0, 0.1, 0, 0.9]).reshape(3, |
| 2).astype(np.float32) |
| |
| y = np.logical_xor(x0, x1) |
| x0 = tensor.from_numpy(x0) |
| x1 = tensor.from_numpy(x1) |
| x0.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._xor(x0, x1) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_xor_cpu(self): |
| self._xor_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_xor_gpu(self): |
| self._xor_helper(gpu_dev) |
| |
| def _negative_helper(self, dev): |
| X = np.array([0.1, 0, 0.4, 1. - 4, 0.9, |
| -2.0]).reshape(3, 2).astype(np.float32) |
| XT = np.negative(X) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.negative(x) |
| dx = result.creator.backward(dy.data) |
| DX = np.negative(DY) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_negative_cpu(self): |
| self._negative_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_negative_gpu(self): |
| self._negative_helper(gpu_dev) |
| |
| def _reciprocal_helper(self, dev): |
| X = np.array([0.1, 0, 0.4, 1. - 4, 0.9, |
| -2.0]).reshape(3, 2).astype(np.float32) |
| DY = np.ones((3, 2), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reciprocal(x) |
| dx = result.creator.backward(dy.data) |
| #dy/dx = -1/x**2 |
| with np.errstate(divide='ignore'): |
| XT = np.reciprocal(X) |
| DX = -1 / np.square(X) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_reciprocal_cpu(self): |
| self._reciprocal_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reciprocal_gpu(self): |
| self._reciprocal_helper(gpu_dev) |
| |
| def _and_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = (np.random.randn(*in1) > 0).astype(np.float32) |
| x1 = (np.random.randn(*in2) > 0).astype(np.float32) |
| y = np.logical_and(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._and(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_and_broadcast_cpu(self): |
| self._and_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_and_broadcast_gpu(self): |
| self._and_broadcast_helper(gpu_dev) |
| |
| def _or_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = (np.random.randn(*in1) > 0).astype(np.float32) |
| x1 = (np.random.randn(*in2) > 0).astype(np.float32) |
| y = np.logical_or(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._or(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_or_broadcast_cpu(self): |
| self._or_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_or_broadcast_gpu(self): |
| self._or_broadcast_helper(gpu_dev) |
| |
| def _xor_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = (np.random.randn(*in1) > 0).astype(np.float32) |
| x1 = (np.random.randn(*in2) > 0).astype(np.float32) |
| y = np.logical_xor(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd._xor(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_xor_broadcast_cpu(self): |
| self._xor_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_xor_broadcast_gpu(self): |
| self._xor_broadcast_helper(gpu_dev) |
| |
| def _greater_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = np.greater(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.greater(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_greater_broadcast_cpu(self): |
| self._greater_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_greater_broadcast_gpu(self): |
| self._greater_broadcast_helper(gpu_dev) |
| |
| def _less_broadcast_helper(self, dev): |
| dev = cpu_dev |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = np.less(x, x1) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| x.to_device(dev) |
| x1.to_device(dev) |
| |
| result = autograd.less(x, x1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_less_broadcast_cpu(self): |
| self._less_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_less_broadcast_gpu(self): |
| self._less_broadcast_helper(gpu_dev) |
| |
| def _add_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = x + x1 |
| |
| dy = np.random.randn(*y.shape) |
| grad0 = np.sum(dy, axis=axis_helper(y.shape, |
| x.shape)).reshape(x.shape) |
| grad1 = np.sum(dy, axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.add(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_add_broadcast_cpu(self): |
| self._add_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_add_broadcast_gpu(self): |
| self._add_broadcast_helper(gpu_dev) |
| |
| def _sub_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = x - x1 |
| |
| dy = np.random.randn(*y.shape) |
| grad0 = np.sum(dy, axis=axis_helper(y.shape, |
| x.shape)).reshape(x.shape) |
| grad1 = np.sum(-dy, axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.sub(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_sub_broadcast_cpu(self): |
| self._sub_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_sub_broadcast_gpu(self): |
| self._sub_broadcast_helper(gpu_dev) |
| |
| def _mul_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) |
| y = x * x1 |
| |
| dy = np.random.randn(*y.shape) |
| grad0 = np.sum(x1 * dy, axis=axis_helper(y.shape, |
| x.shape)).reshape(x.shape) |
| grad1 = np.sum(x * dy, axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.mul(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_mul_broadcast_cpu(self): |
| self._mul_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_mul_broadcast_gpu(self): |
| self._mul_broadcast_helper(gpu_dev) |
| |
| def _div_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| x1 = np.random.randn(*in2).astype(np.float32) + 1.0 |
| y = x / x1 |
| |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| grad0 = np.sum(np.power(x1, -1) * dy, |
| axis=axis_helper(y.shape, x.shape)).reshape(x.shape) |
| grad1 = np.sum(x * -np.power(x1, -2) * dy, |
| axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.div(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| # use realtive and total error instead of demical number |
| np.testing.assert_allclose(tensor.to_numpy(result), |
| y, |
| rtol=1e-4, |
| atol=1e-4) |
| np.testing.assert_allclose(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| rtol=1e-4, |
| atol=1e-4) |
| np.testing.assert_allclose(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| rtol=1e-4, |
| atol=1e-4) |
| |
| def test_div_broadcast_cpu(self): |
| self._div_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_div_broadcast_gpu(self): |
| self._div_broadcast_helper(gpu_dev) |
| |
| def _pow_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randint(1, 10, size=in1).astype(np.float32) |
| x1 = np.random.randint(1, 5, size=in2).astype(np.float32) |
| y = np.power(x, x1).astype(np.float32) |
| |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| grad0 = np.sum(x1 * np.power(x, x1 - 1) * dy, |
| axis=axis_helper(y.shape, x.shape)).reshape(x.shape) |
| grad1 = np.sum(np.power(x, x1) * np.log(x) * dy, |
| axis=axis_helper(y.shape, |
| x1.shape)).reshape(x1.shape) |
| |
| x = tensor.from_numpy(x) |
| x1 = tensor.from_numpy(x1) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| x1.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.pow(x, x1) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=2) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=2) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=2) |
| |
| def test_pow_broadcast_cpu(self): |
| self._pow_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_pow_broadcast_gpu(self): |
| self._pow_broadcast_helper(gpu_dev) |
| |
| def _prelu_broadcast_helper(self, dev): |
| cases = [ |
| ([3, 4, 5], [5]), # 3d vs 1d |
| ([3, 4, 5], [4, 5]), # 3d vs 2d |
| ([3, 4, 5, 6], [5, 6]), # 4d vs 2d |
| ([3, 4, 5, 6], [4, 5, 6]), # 4d vs 3d |
| ([1, 4, 1, 6], [3, 1, 5, 6]) # 4d vs 4d |
| ] |
| for in1, in2 in cases: |
| x = np.random.randn(*in1).astype(np.float32) |
| slope = np.random.randn(*in2).astype(np.float32) |
| y = np.clip(x, 0, np.inf) + np.clip(x, -np.inf, 0) * slope |
| |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| x0 = x.copy() |
| x0[x0 > 0] = 1 |
| x0[x0 < 1] = 0 |
| grad0 = np.sum((x0 + (1 - x0) * slope) * dy, |
| axis=axis_helper(y.shape, x.shape)).reshape(x.shape) |
| grad1 = np.sum((1 - x0) * x * dy, |
| axis=axis_helper(y.shape, |
| slope.shape)).reshape(slope.shape) |
| |
| x = tensor.from_numpy(x) |
| slope = tensor.from_numpy(slope) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| slope.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.prelu(x, slope) |
| dx0, dx1 = result.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx0)), |
| grad0, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx1)), |
| grad1, |
| decimal=5) |
| |
| def test_prelu_broadcast_cpu(self): |
| self._prelu_broadcast_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_prelu_broadcast_gpu(self): |
| self._prelu_broadcast_helper(gpu_dev) |
| |
| def _gemm_helper(self, dev): |
| configs = [ |
| # alpha, beta, transA, transB, shapeA, shapeB, shapeC, shapeY |
| [0.25, 0.35, 0, 0, (3, 4), (4, 5), (1, 5), (3, 5)], |
| [0.25, 0.35, 0, 1, (3, 4), (5, 4), (1, 5), (3, 5)], |
| [0.25, 0.35, 1, 0, (4, 3), (4, 5), (1, 5), (3, 5)], |
| [0.25, 0.35, 1, 1, (4, 3), (5, 4), (1, 5), (3, 5)], |
| ] |
| for config in configs: |
| alpha = config[0] |
| beta = config[1] |
| transA = config[2] |
| transB = config[3] |
| shapeA = config[4] |
| shapeB = config[5] |
| shapeC = config[6] |
| shapeY = config[7] |
| |
| A = np.random.randn(*shapeA).astype(np.float32) |
| DY = np.ones(shapeY, dtype=np.float32) |
| |
| if transB == 0: |
| out_features = shapeB[1] |
| else: |
| out_features = shapeB[0] |
| |
| a = tensor.from_numpy(A) |
| a.to_device(dev) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| gemm = layer.Gemm(out_features, alpha, beta, transA == 1, |
| transB == 1) |
| result = gemm(a) |
| |
| params = gemm.get_params() |
| B = tensor.to_numpy(params['W']) |
| C = tensor.to_numpy(params['b']) |
| |
| da, db, dc = result.creator.backward(dy.data) |
| |
| # Y = alpha * A' * B' + beta * C |
| _A = A if transA == 0 else A.T |
| _B = B if transB == 0 else B.T |
| C = C if C is not None else np.array(0) |
| Y = alpha * np.dot(_A, _B) + beta * C |
| |
| DA = alpha * np.matmul(DY, _B.T) |
| DA = DA if transA == 0 else DA.T |
| DB = alpha * np.matmul(_A.T, DY) |
| DB = DB if transB == 0 else DB.T |
| DC = beta * np.sum(DY, axis=axis_helper(Y.shape, C.shape)).reshape( |
| C.shape) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| Y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(da)), |
| DA, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(db)), |
| DB, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dc)), |
| DC, |
| decimal=5) |
| |
| def test_gemm_cpu(self): |
| self._gemm_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gemm_gpu(self): |
| self._gemm_helper(gpu_dev) |
| |
| def globalaveragepool_channel_first(self, dev): |
| X = np.array([[[ |
| [1, 2, 3], |
| [4, 5, 6], |
| [7, 8, 9], |
| ]]]).astype(np.float32) |
| XT = np.array([[[[5]]]]).astype(np.float32) |
| DY = np.ones((1, 1, 1, 1), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| result = autograd.globalaveragepool(x) |
| dx = result.creator.backward(dy.data) |
| |
| DX = np.ones(X.shape, dtype=np.float32) |
| DX = np.multiply(DX, DY) / np.prod(X.shape[2:]) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def globalaveragepool_channel_last(self, dev): |
| X = np.array([[ |
| [[1], [2], [3]], |
| [[4], [5], [6]], |
| [[7], [8], [9]], |
| ]]).astype(np.float32) |
| XT = np.array([[[[5]]]]).astype(np.float32) |
| DY = np.ones((1, 1, 1, 1), dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| result = autograd.globalaveragepool(x, 'channel_last') |
| dx = result.creator.backward(dy.data) |
| |
| DX = np.ones(X.shape, dtype=np.float32) |
| DX = np.multiply(DX, DY) / np.prod(X.shape[1:-1]) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| XT, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_globalaveragepool_cpu(self): |
| self.globalaveragepool_channel_first(cpu_dev) |
| self.globalaveragepool_channel_last(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_globalaveragepool_gpu(self): |
| self.globalaveragepool_channel_first(gpu_dev) |
| self.globalaveragepool_channel_last(gpu_dev) |
| |
| def constantOfShape_test(self, dev): |
| # float_ones |
| X = np.array([4, 3, 2]).astype(np.int64) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| y = np.ones(X, dtype=np.float32) |
| result = autograd.constant_of_shape(x, 1.0) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| # int32_zeros |
| X = np.array([10, 6]).astype(np.int64) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| y = np.ones(X, dtype=np.int32) |
| result = autograd.constant_of_shape(x, 1) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_constantOfShape_cpu(self): |
| self.constantOfShape_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_constantOfShape_gpu(self): |
| self.constantOfShape_test(gpu_dev) |
| |
| def dropout_test(self, dev): |
| X = np.random.randn(3, 4, 5).astype(np.float32) |
| dy = np.random.randn(3, 4, 5).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.dropout(x, 0.5) |
| dx = result.creator.backward(dy.data) |
| self.check_shape(result.shape, (3, 4, 5)) |
| self.check_shape(dx.shape(), (3, 4, 5)) |
| |
| def test_dropout_cpu(self): |
| self.dropout_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_dropout_gpu(self): |
| self.dropout_test(gpu_dev) |
| |
| def reduceSum_test(self, dev): |
| shape = [3, 2, 2] |
| cases = [(None, 1), ([1], 0), ([1], 1), ([-2], 1), ([1, 2], 1)] |
| for axes, keepdims in cases: |
| X = np.random.uniform(-10, 10, shape).astype(np.float32) |
| _axes = tuple(axes) if axes is not None else None |
| y = np.sum(X, axis=_axes, keepdims=keepdims == 1) |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reduce_sum(x, axes, keepdims) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| self.check_shape(dx.shape(), tuple(shape)) |
| |
| def test_reduceSum_cpu(self): |
| self.reduceSum_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reduceSum_gpu(self): |
| self.reduceSum_test(gpu_dev) |
| |
| def reduceMean_test(self, dev): |
| shape = [3, 2, 2] |
| cases = [(None, 1), ([1], 0), ([1], 1), ([-2], 1), ([1, 2], 1)] |
| for axes, keepdims in cases: |
| X = np.random.uniform(-10, 10, shape).astype(np.float32) |
| _axes = tuple(axes) if axes is not None else None |
| y = np.mean(X, axis=_axes, keepdims=keepdims == 1) |
| dy = np.random.randn(*y.shape).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.reduce_mean(x, axes, keepdims) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| self.check_shape(dx.shape(), tuple(shape)) |
| |
| def test_reduceMean_cpu(self): |
| self.reduceMean_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_reduceMean_gpu(self): |
| self.reduceMean_test(gpu_dev) |
| |
| def slice_test(self, dev): |
| X = np.random.randn(20, 10, 5).astype(np.float32) |
| indexes = np.array(range(20 * 10 * 5)).reshape(20, 10, 5) |
| configs = [ |
| # starts, ends, axes, steps, y |
| [[0, 0], [3, 10], [0, 1], [1, 1], X[0:3, 0:10], |
| indexes[0:3, 0:10]], # slice |
| [[0, 0, 3], [20, 10, 4], None, None, X[:, :, 3:4], |
| indexes[:, :, 3:4]], # slice_default_axes |
| [[1], [1000], [1], [1], X[:, 1:1000], |
| indexes[:, 1:1000]], # slice_end_out_of_bounds |
| [[0], [-1], [1], [1], X[:, 0:-1], |
| indexes[:, 0:-1]], # slice_end_out_of_bounds |
| [[20, 10, 4], [0, 0, 1], [0, 1, 2], [-1, -3, -2], |
| X[20:0:-1, 10:0:-3, 4:1:-2], indexes[20:0:-1, 10:0:-3, |
| 4:1:-2]], # slice_neg_steps |
| [[0, 0, 3], [20, 10, 4], [0, -2, -1], None, X[:, :, 3:4], |
| indexes[:, :, 3:4]], # slice_negative_axes |
| # [[1000], [1000], [1], [1], X[:, 1000:1000], indexes[:, 1000:1000]], # slice_start_out_of_bounds # cannot support empty tensor |
| ] |
| for starts, ends, axes, steps, y, dx_idx in configs: |
| dy = np.ones(y.shape).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(dy) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.slice(x, starts, ends, axes, steps) |
| dx = result.creator.backward(dy.data) |
| |
| dx_idx = tuple(dx_idx.flatten().tolist()) |
| dX = np.array([ |
| 1. if i in dx_idx else 0. for i in range(20 * 10 * 5) |
| ]).reshape(X.shape) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| dX, |
| decimal=5) |
| |
| def test_slice_cpu(self): |
| self.slice_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_slice_gpu(self): |
| self.slice_test(gpu_dev) |
| |
| def ceil_test(self, dev): |
| X = np.array([-1.5, 1.2]).astype(np.float32) |
| DY = np.ones((2), dtype=np.float32) |
| y = np.ceil(X) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.ceil(x) |
| dx = result.creator.backward(dy.data) |
| DX = np.zeros((2), dtype=np.float32) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_ceil_cpu(self): |
| self.ceil_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_ceil_gpu(self): |
| self.ceil_test(gpu_dev) |
| |
| def floor_test(self,dev): |
| X = np.array([-1.9,1.2]).astype(np.float32) |
| DY = np.ones((2),dtype=np.float32) |
| y = np.floor(X) |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.floor(x) |
| dx = result.creator.backward(dy.data) |
| DX = np.zeros((2),dtype=np.float32) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result),y,decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(tensor.from_raw_tensor(dx)),DX,decimal=5) |
| |
| def test_floor_cpu(self): |
| self.floor_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_floor_gpu(self): |
| self.floor_test(gpu_dev) |
| |
| def _test_scatter_elements(self, dev): |
| # testing witout axis |
| data = np.zeros((3, 3), dtype=np.float32) |
| indices = np.array([[1, 0, 2], [0, 2, 1]], dtype=np.int32) |
| updates = np.array([[1.0, 1.1, 1.2], [2.0, 2.1, 2.2]], dtype=np.float32) |
| output = np.array([[2.0, 1.1, 0.0], [1.0, 0.0, 2.2], [0.0, 2.1, 1.2]], |
| dtype=np.float32) |
| |
| data = tensor.from_numpy(data) |
| indices = tensor.from_numpy(indices) |
| updates = tensor.from_numpy(updates) |
| data.to_device(dev) |
| indices.to_device(dev) |
| updates.to_device(dev) |
| |
| result = autograd.scatter_elements(data, indices, updates) |
| dy = tensor.from_numpy(np.ones(data.shape, dtype=np.float32)) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_almost_equal(tensor.to_numpy(result), |
| output, |
| decimal=5) |
| self.check_shape(dx.shape(), data.shape) |
| |
| # testing with axis |
| data = np.array([[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=np.float32) |
| indices = np.array([[1, 3]], dtype=np.int32) |
| updates = np.array([[1.1, 2.1]], dtype=np.float32) |
| output = np.array([[1.0, 1.1, 3.0, 2.1, 5.0]], dtype=np.float32) |
| |
| data = tensor.from_numpy(data) |
| indices = tensor.from_numpy(indices) |
| updates = tensor.from_numpy(updates) |
| data.to_device(dev) |
| indices.to_device(dev) |
| updates.to_device(dev) |
| |
| result = autograd.scatter_elements(data, indices, updates, axis=1) |
| dy = tensor.from_numpy(np.ones(data.shape, dtype=np.float32)) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_almost_equal(tensor.to_numpy(result), |
| output, |
| decimal=5) |
| self.check_shape(dx.shape(), data.shape) |
| |
| # testing with negative indices: |
| data = np.array([[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=np.float32) |
| indices = np.array([[1, -3]], dtype=np.int64) |
| updates = np.array([[1.1, 2.1]], dtype=np.float32) |
| output = np.array([[1.0, 1.1, 2.1, 4.0, 5.0]], dtype=np.float32) |
| |
| data = tensor.from_numpy(data) |
| indices = tensor.from_numpy(indices) |
| updates = tensor.from_numpy(updates) |
| data.to_device(dev) |
| indices.to_device(dev) |
| updates.to_device(dev) |
| |
| result = autograd.scatter_elements(data, indices, updates, axis=1) |
| dy = tensor.from_numpy(np.ones(data.shape, dtype=np.float32)) |
| dx = result.creator.backward(dy.data) |
| np.testing.assert_almost_equal(tensor.to_numpy(result), |
| output, |
| decimal=5) |
| self.check_shape(dx.shape(), data.shape) |
| |
| def test_cpu_scatter_elements(self): |
| self._test_scatter_elements(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gpu_scatter_elements(self): |
| self._test_scatter_elements(gpu_dev) |
| |
| def split_test(self, dev): |
| X = np.array([1., 2., 3., 4., 5., 6.]).astype(np.float32) |
| DY1 = np.ones((2), dtype=np.float32) |
| DY2 = np.ones((4), dtype=np.float32) |
| y = [ |
| np.array([1., 2.]).astype(np.float32), |
| np.array([3., 4., 5., 6.]).astype(np.float32) |
| ] |
| |
| x = tensor.from_numpy(X) |
| dy1 = tensor.from_numpy(DY1) |
| dy2 = tensor.from_numpy(DY2) |
| x.to_device(dev) |
| dy1.to_device(dev) |
| dy2.to_device(dev) |
| |
| result = autograd.split(x, 0, (2, 4)) |
| dx = result[0].creator.backward(dy1.data, dy2.data) |
| DX = np.ones((6), dtype=np.float32) |
| |
| for idx, _r in enumerate(result): |
| np.testing.assert_array_almost_equal(tensor.to_numpy(_r), |
| y[idx], |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_split_cpu(self): |
| self.split_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_split_gpu(self): |
| self.split_test(gpu_dev) |
| |
| def gather_test(self, dev): |
| config = [([0, 1, 3], 0), ([0, 1, 3], 1), ([[0, 1], [1, 2], [2, 3]], 1), |
| ([0, -1, -2], 0)] # (indices, axis) |
| for indices, _axis in config: |
| X = np.random.randn(5, 4, 3, 2).astype(np.float32) |
| y = np.take(X, indices, axis=_axis) |
| DY = np.ones(y.shape, dtype=np.float32) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.gather(x, _axis, indices) |
| dx = result.creator.backward(dy.data) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| self.check_shape(dx.shape(), tuple(X.shape)) |
| |
| def test_gather_cpu(self): |
| self.gather_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_gather_gpu(self): |
| self.gather_test(gpu_dev) |
| |
| def tile_test(self, dev): |
| config_repeats = [ |
| 2, |
| [2, 2], |
| [2, 1, 2], |
| ] |
| for repeats in config_repeats: |
| X = np.array([0, 1, 2]).astype(np.float32) |
| y = np.tile(X, repeats) |
| DY = np.copy(y) |
| |
| x = tensor.from_numpy(X) |
| dy = tensor.from_numpy(DY) |
| x.to_device(dev) |
| dy.to_device(dev) |
| |
| result = autograd.tile(x, repeats) |
| dx = result.creator.backward(dy.data) |
| DX = np.multiply(X, np.prod(repeats)) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy( |
| tensor.from_raw_tensor(dx)), |
| DX, |
| decimal=5) |
| |
| def test_tile_cpu(self): |
| self.tile_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_tile_gpu(self): |
| self.tile_test(gpu_dev) |
| |
| def noneZero_test(self, dev): |
| X = np.array([[1, 0], [1, 1]]).astype(np.float32) |
| y = np.array((np.nonzero(X))) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.nonzero(x) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_noneZero_cpu(self): |
| self.noneZero_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_noneZero_gpu(self): |
| self.noneZero_test(gpu_dev) |
| |
| def cast_test(self, dev): |
| config = [ |
| (np.float32, np.int32, tensor.int32), |
| (np.int32, np.float32, tensor.float32), |
| ] |
| for t1, t2, t3 in config: |
| X = np.array([[1, 0], [1, 1]]).astype(t1) |
| y = np.array([[1, 0], [1, 1]]).astype(t2) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| result = autograd.cast(x, t3) |
| result_np = tensor.to_numpy(result) |
| assert result_np.dtype == y.dtype, "type %s != %s." % ( |
| result_np.dtype, y.dtype) |
| np.testing.assert_array_almost_equal(result_np, y, decimal=5) |
| |
| def test_cast_cpu(self): |
| self.cast_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_cast_gpu(self): |
| self.cast_test(gpu_dev) |
| |
| def onehot_test(self, dev): |
| |
| def one_hot(indices, depth, axis=-1, dtype=np.float32): # type: ignore |
| ''' Compute one hot from indices at a specific axis ''' |
| values = np.asarray(indices) |
| rank = len(values.shape) |
| depth_range = np.arange(depth) |
| if axis < 0: |
| axis += (rank + 1) |
| ls = values.shape[0:axis] |
| rs = values.shape[axis:rank] |
| targets = np.reshape(depth_range, (1,) * len(ls) + |
| depth_range.shape + (1,) * len(rs)) |
| values = np.reshape(np.mod(values, depth), ls + (1,) + rs) |
| return np.asarray(targets == values, dtype=dtype) |
| |
| axisValue = 1 |
| on_value = 3 |
| off_value = 1 |
| output_type = np.float32 |
| indices = np.array([[1, 9], [2, 4]], dtype=np.float32) |
| depth = np.array([10], dtype=np.float32) |
| values = np.array([off_value, on_value], dtype=output_type) |
| y = one_hot(indices, depth, axis=axisValue, dtype=output_type) |
| y = y * (on_value - off_value) + off_value |
| |
| x = tensor.from_numpy(indices) |
| x.to_device(dev) |
| |
| result = autograd.onehot(axisValue, x, depth, values) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(result), |
| y, |
| decimal=5) |
| |
| def test_onehot_cpu(self): |
| self.onehot_test(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_onehot_gpu(self): |
| self.onehot_test(gpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_cudnn_rnn_operation(self, dev=gpu_dev): |
| # init params, inputs |
| hidden_size = 7 |
| seq_length = 5 |
| batch_size = 6 |
| feature_size = 3 |
| directions = 2 |
| num_layers = 2 |
| |
| for mode in [0, 1, 2, 3]: # 0-relu, 1-tanh, 2-lstm, 3-gru |
| x = tensor.Tensor(shape=(seq_length, batch_size, feature_size), |
| device=dev).gaussian(0, 1) |
| hx = tensor.Tensor(shape=(num_layers * directions, batch_size, |
| hidden_size), |
| device=dev).gaussian(0, 1) |
| cx = tensor.Tensor(shape=(num_layers * directions, batch_size, |
| hidden_size), |
| device=dev).gaussian(0, 1) |
| dy = tensor.Tensor(shape=(seq_length, batch_size, |
| directions * hidden_size), |
| device=dev).gaussian(0, 1) |
| |
| # init cudnn rnn op |
| rnn_handle = singa.CudnnRNNHandle(x.data, |
| hidden_size, |
| mode, |
| num_layers=num_layers, |
| dropout=0.1, |
| bidirectional=1) |
| |
| w = tensor.Tensor(shape=(rnn_handle.weights_size,), |
| device=dev).gaussian(0, 1) |
| |
| # return sequence, y shape = {seq, bs, hidden} |
| # init operator/operation |
| _rnn = autograd._RNN(rnn_handle, return_sequences=True) |
| |
| # forward |
| y = _rnn(x, hx, cx, w)[0] |
| assert y.shape == dy.shape |
| # print(ys) |
| |
| # backward |
| dx, dhx, dcx, dw = _rnn.backward(dy.data) |
| |
| # return no sequence, y shape = {bs, hidden} |
| _rnn = autograd._RNN(rnn_handle, return_sequences=False) |
| dy = tensor.Tensor(shape=(batch_size, directions * hidden_size), |
| device=dev).gaussian(0, 1) |
| y = _rnn(x, hx, cx, w)[0] |
| |
| assert y.shape == dy.shape |
| # backward |
| dx, dhx, dcx, dw = _rnn.backward(dy.data) |
| |
| def cossim_helper(self, dev): |
| A = np.random.randn(*[3, 10]).astype(np.float32) |
| B = np.random.randn(*[3, 10]).astype(np.float32) |
| |
| a = tensor.from_numpy(A) |
| a.to_device(dev) |
| b = tensor.from_numpy(B) |
| b.to_device(dev) |
| |
| DY = np.random.randn(3).astype(np.float32) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| y = autograd.cossim(a, b) |
| da, db = y.creator.backward(dy.data) # CTensor |
| |
| self.check_shape(y.shape, (3,)) |
| self.check_shape(da.shape(), (3, 10)) |
| self.check_shape(db.shape(), (3, 10)) |
| |
| def test_cossim_cpu(self): |
| self.cossim_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_cossim_gpu(self): |
| self.cossim_helper(gpu_dev) |
| |
| def expand_helper(self, dev): |
| shape = [3, 1] |
| X = np.reshape(np.arange(1, np.prod(shape) + 1, dtype=np.float32), |
| shape) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| # dim_changed |
| new_shape = [2, 1, 6] |
| y_t = X * np.ones(new_shape, dtype=np.float32) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| y = autograd.expand(x, new_shape) |
| dx = y.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| self.check_shape(dx.shape(), tuple(shape)) |
| |
| # dim_unchanged |
| new_shape_2 = [3, 4] |
| y_t2 = np.tile(X, 4) |
| dy2 = tensor.from_numpy(y_t2) |
| dy2.to_device(dev) |
| y2 = autograd.expand(x, new_shape_2) |
| dx2 = y2.creator.backward(dy2.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y2), y_t2) |
| self.check_shape(dx2.shape(), tuple(shape)) |
| |
| def test_expand_cpu(self): |
| self.expand_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_expand_gpu(self): |
| self.expand_helper(gpu_dev) |
| |
| def pad_helper(self, dev): |
| X = np.array([ |
| [1.0, 1.2], |
| [2.3, 3.4], |
| [4.5, 5.7], |
| ]).astype(np.float32) |
| Y1 = np.array([ |
| [0.0, 0.0, 1.0, 1.2], |
| [0.0, 0.0, 2.3, 3.4], |
| [0.0, 0.0, 4.5, 5.7], |
| ],).astype(np.float32) |
| Y2 = np.array([ |
| [1.0, 1.2, 1.0, 1.2], |
| [2.3, 3.4, 2.3, 3.4], |
| [4.5, 5.7, 4.5, 5.7], |
| ],).astype(np.float32) |
| Y3 = np.array([ |
| [1.0, 1.0, 1.0, 1.2], |
| [2.3, 2.3, 2.3, 3.4], |
| [4.5, 4.5, 4.5, 5.7], |
| ],).astype(np.float32) |
| |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| pads = [0, 2, 0, 0] |
| |
| DY = np.random.randn(3, 4).astype(np.float32) |
| dy = tensor.from_numpy(DY) |
| dy.to_device(dev) |
| |
| y1 = autograd.pad(x, "constant", pads) |
| y2 = autograd.pad(x, "reflect", pads) |
| y3 = autograd.pad(x, "edge", pads) |
| dx1 = y1.creator.backward(dy.data) |
| dx2 = y2.creator.backward(dy.data) |
| dx3 = y3.creator.backward(dy.data) |
| pad_width = [] |
| half_width = len(pads) // 2 |
| for i in range(half_width): |
| pad_width += [[pads[i], pads[i + half_width]]] |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y1), |
| np.pad( |
| X, |
| pad_width=pad_width, |
| mode="constant", |
| constant_values=0., |
| ), |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y2), |
| np.pad( |
| X, |
| pad_width=pad_width, |
| mode="reflect", |
| ), |
| decimal=5) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y3), |
| np.pad( |
| X, |
| pad_width=pad_width, |
| mode="edge", |
| ), |
| decimal=5) |
| self.check_shape(dx1.shape(), (3, 2)) |
| self.check_shape(dx2.shape(), (3, 2)) |
| self.check_shape(dx3.shape(), (3, 2)) |
| |
| def test_pad_cpu(self): |
| self.pad_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_pad_gpu(self): |
| self.pad_helper(gpu_dev) |
| |
| def upsample_helper(self, dev): |
| X = np.array([[[ |
| [1, 2], |
| [3, 4], |
| ]]], dtype=np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| scales = np.array([1.0, 1.0, 2.0, 3.0], dtype=np.float32) |
| y_t = np.array([[[ |
| [1, 1, 1, 2, 2, 2], |
| [1, 1, 1, 2, 2, 2], |
| [3, 3, 3, 4, 4, 4], |
| [3, 3, 3, 4, 4, 4], |
| ]]], |
| dtype=np.float32) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| |
| y = autograd.upsample(x, "nearest", scales) |
| dx = y.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| self.check_shape(dx.shape(), tuple(X.shape)) |
| |
| def test_upsample_cpu(self): |
| self.upsample_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_upsample_gpu(self): |
| self.upsample_helper(gpu_dev) |
| |
| def depth_space_helper(self, dev): |
| # (1, 8, 2, 3) input tensor |
| X = np.array( |
| [[[[0., 1., 2.], [3., 4., 5.]], [[9., 10., 11.], [12., 13., 14.]], |
| [[18., 19., 20.], [21., 22., 23.]], |
| [[27., 28., 29.], [30., 31., 32.]], |
| [[36., 37., 38.], [39., 40., 41.]], |
| [[45., 46., 47.], [48., 49., 50.]], |
| [[54., 55., 56.], [57., 58., 59.]], |
| [[63., 64., 65.], [66., 67., 68.]]]], |
| dtype=np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| # (1, 2, 4, 6) output tensor |
| y_t = np.array( |
| [[[[0., 18., 1., 19., 2., 20.], [36., 54., 37., 55., 38., 56.], |
| [3., 21., 4., 22., 5., 23.], [39., 57., 40., 58., 41., 59.]], |
| [[9., 27., 10., 28., 11., 29.], [45., 63., 46., 64., 47., 65.], |
| [12., 30., 13., 31., 14., 32.], [48., 66., 49., 67., 50., 68.]]] |
| ], |
| dtype=np.float32) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| y = autograd.depth_to_space(x, 2, "DCR") |
| dx = y.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(tensor.from_raw_tensor(dx)), X) |
| |
| y = autograd.space_to_depth(dy, 2, "DCR") |
| dx = y.creator.backward(x.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), X) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(tensor.from_raw_tensor(dx)), y_t) |
| |
| y_t = np.array( |
| [[[[0., 9., 1., 10., 2., 11.], [18., 27., 19., 28., 20., 29.], |
| [3., 12., 4., 13., 5., 14.], [21., 30., 22., 31., 23., 32.]], |
| [[36., 45., 37., 46., 38., 47.], [54., 63., 55., 64., 56., 65.], |
| [39., 48., 40., 49., 41., 50.], [57., 66., 58., 67., 59., 68.]]] |
| ], |
| dtype=np.float32) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| y = autograd.depth_to_space(x, 2, "CRD") |
| dx = y.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(tensor.from_raw_tensor(dx)), X) |
| |
| y = autograd.space_to_depth(dy, 2, "CRD") |
| dx = y.creator.backward(x.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), X) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(tensor.from_raw_tensor(dx)), y_t) |
| |
| def test_depth_space_cpu(self): |
| self.depth_space_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_depth_space_gpu(self): |
| self.depth_space_helper(gpu_dev) |
| |
| def test_invalid_inputs(self, dev=cpu_dev): |
| _1d = tensor.Tensor((10,), dev) |
| _2d = tensor.Tensor((10, 10), dev) |
| _3d = tensor.Tensor((10, 10, 10), dev) |
| self.assertRaises(AssertionError, autograd.softmax_cross_entropy, _2d, |
| _3d) |
| self.assertRaises(AssertionError, autograd.mse_loss, _2d, _3d) |
| self.assertRaises(AssertionError, autograd.add_bias, _2d, _1d, 3) |
| self.assertRaises(AssertionError, autograd.ranking_loss, _2d, _1d) |
| |
| def where_helper(self, dev): |
| X = np.array([[1, 2], [3, 4]], dtype=np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| X2 = np.array([[9, 8], [7, 6]], dtype=np.float32) |
| x2 = tensor.from_numpy(X2) |
| x2.to_device(dev) |
| |
| condition = [[True, False], [True, True]] |
| y_t = np.where(condition, X, X2) |
| dx1_t = np.array([[1, 0], [3, 4]], dtype=np.float32) |
| dx2_t = np.array([[0, 8], [0, 0]], dtype=np.float32) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| |
| y = autograd.where(x, x2, condition) |
| dx1, dx2 = y.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(tensor.from_raw_tensor(dx1)), dx1_t) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(tensor.from_raw_tensor(dx2)), dx2_t) |
| |
| def test_where_cpu(self): |
| self.where_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_where_gpu(self): |
| self.where_helper(gpu_dev) |
| |
| def rounde_helper(self, dev): |
| X = np.array([ |
| 0.1, 0.5, 0.9, 1.2, 1.5, 1.8, 2.3, 2.5, 2.7, -1.1, -1.5, -1.9, -2.2, |
| -2.5, -2.8 |
| ]).astype(np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| y_t = np.array( |
| [0., 0., 1., 1., 2., 2., 2., 2., 3., -1., -2., -2., -2., -2., |
| -3.]).astype(np.float32) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| |
| y = autograd.rounde(x) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| |
| def test_rounde_cpu(self): |
| self.rounde_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_rounde_gpu(self): |
| self.rounde_helper(gpu_dev) |
| |
| def round_helper(self, dev): |
| X = np.array([ |
| 0.1, 0.5, 0.9, 1.2, 1.5, 1.8, 2.3, 2.5, 2.7, -1.1, -1.5, -1.9, -2.2, |
| -2.5, -2.8 |
| ]).astype(np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| y_t = np.array( |
| [0., 1., 1., 1., 2., 2., 2., 3., 3., -1., -2., -2., -2., -3., |
| -3.]).astype(np.float32) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| |
| y = autograd.round(x) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| |
| def test_round_cpu(self): |
| self.round_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_round_gpu(self): |
| self.round_helper(gpu_dev) |
| |
| def embedding_helper(self, dev): |
| embedding = layer.Embedding(10, 3) |
| |
| X = np.array([[0, 1, 2, 3], [9, 8, 7, 6]]) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| dy = tensor.Tensor(shape=(2, 4, 3), device=dev) |
| dy.gaussian(0.0, 1.0) |
| |
| y = embedding(x) # PyTensor |
| dx, dW = y.creator.backward(dy.data) # CTensor |
| |
| self.check_shape(y.shape, (2, 4, 3)) |
| self.check_shape(dx.shape(), (2, 4)) |
| self.check_shape(dW.shape(), (10, 3)) |
| |
| def test_embedding_cpu(self): |
| self.embedding_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_embedding_gpu(self): |
| self.embedding_helper(gpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def _cossim_value(self, dev=gpu_dev): |
| # numpy val |
| np.random.seed(0) |
| bs = 1000 |
| vec_s = 1200 |
| a = np.random.random((bs, vec_s)).astype(np.float32) |
| b = np.random.random((bs, vec_s)).astype(np.float32) |
| dy = np.random.random((bs,)).astype(np.float32) |
| |
| # singa tensor |
| ta = tensor.from_numpy(a) |
| tb = tensor.from_numpy(b) |
| tdy = tensor.from_numpy(dy) |
| ta.to_device(dev) |
| tb.to_device(dev) |
| tdy.to_device(dev) |
| |
| # singa forward and backward |
| ty = autograd.cossim(ta, tb) |
| tda, tdb = ty.creator.backward(tdy.data) |
| |
| np_forward = list() |
| for i in range(len(a)): |
| a_norm = np.linalg.norm(a[i]) |
| b_norm = np.linalg.norm(b[i]) |
| ab_dot = np.dot(a[i], b[i]) |
| out = ab_dot / (a_norm * b_norm) |
| np_forward.append(out) |
| |
| np_backward_a = list() |
| np_backward_b = list() |
| for i in range(len(a)): |
| a_norm = np.linalg.norm(a[i]) |
| b_norm = np.linalg.norm(b[i]) |
| da = dy[i] * (b[i] / (a_norm * b_norm) - (np_forward[i] * a[i]) / |
| (a_norm * a_norm)) |
| db = dy[i] * (a[i] / (a_norm * b_norm) - (np_forward[i] * b[i]) / |
| (b_norm * b_norm)) |
| np_backward_a.append(da) |
| np_backward_b.append(db) |
| |
| np.testing.assert_array_almost_equal(tensor.to_numpy(ty), |
| np.array(np_forward)) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(tensor.from_raw_tensor(tda)), np_backward_a) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_cossim_value_gpu(self): |
| self._cossim_value(gpu_dev) |
| |
| def test_cossim_value_cpu(self): |
| self._cossim_value(cpu_dev) |
| |
| def test_mse_loss_value(self, dev=cpu_dev): |
| y = np.random.random((1000, 1200)).astype(np.float32) |
| tar = np.random.random((1000, 1200)).astype(np.float32) |
| # get singa value |
| sy = tensor.from_numpy(y, dev) |
| starget = tensor.from_numpy(tar, dev) |
| sloss = autograd.mse_loss(sy, starget) |
| sgrad = sloss.creator.backward() |
| # get np value result |
| np_loss = np.mean(np.square(tar - y)) |
| np_grad = -2 * (tar - y) / np.prod(tar.shape) |
| # value check |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(tensor.from_raw_tensor(sgrad)), np_grad) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(sloss), np_loss) |
| |
| def erf_helper(self, dev): |
| X = np.array([ |
| 0.1, 0.5, 0.9, 1.2, 1.5, 1.8, 2.3, 2.5, 2.7, -1.1, -1.5, -1.9, -2.2, |
| -2.5, -2.8 |
| ]).astype(np.float32) |
| x = tensor.from_numpy(X) |
| x.to_device(dev) |
| |
| import math |
| |
| y_t = np.vectorize(math.erf)(X) |
| dy = tensor.from_numpy(y_t) |
| dy.to_device(dev) |
| dx_t = 2. / np.pi**0.5 * np.exp(-np.power(y_t, 2)) |
| |
| y = autograd.erf(x) |
| dx = y.creator.backward(dy.data) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t) |
| np.testing.assert_array_almost_equal( |
| tensor.to_numpy(tensor.from_raw_tensor(dx)), dx_t) |
| |
| def test_erf_cpu(self): |
| self.erf_helper(cpu_dev) |
| |
| @unittest.skipIf(not singa_wrap.USE_CUDA, 'CUDA is not enabled') |
| def test_erf_gpu(self): |
| self.erf_helper(gpu_dev) |
| |
| |
| if __name__ == '__main__': |
| unittest.main() |