| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # ============================================================================= |
| from __future__ import division |
| |
| import math |
| import unittest |
| import numpy as np |
| import functools |
| |
| |
| from singa import tensor |
| from singa import singa_wrap as singa |
| from singa import opt |
| |
| from cuda_helper import gpu_dev, cpu_dev |
| |
| def assertTensorEqual(x,y,decimal=6): |
| assert x.shape == y.shape |
| assert x.dtype == y.dtype |
| assert x.device.id() == y.device.id() |
| d = x.device |
| x.to_host() |
| y.to_host() |
| np.testing.assert_array_almost_equal( |
| x.data.GetFloatValue(int(x.size())), |
| y.data.GetFloatValue(int(y.size())), |
| decimal) |
| x.to_device(d) |
| y.to_device(d) |
| |
| def on_cpu_gpu(func): |
| @functools.wraps(func) |
| def wrapper_decorator(*args, **kwargs): |
| func(*args, dev=cpu_dev, **kwargs) |
| if singa.USE_CUDA: |
| func(*args, dev=gpu_dev, **kwargs) |
| return wrapper_decorator |
| |
| class TestDecayScheduler(unittest.TestCase): |
| def test_exponential_decay_cpu(self): |
| lr = opt.ExponentialDecay(0.1, 2, 0.5, True) |
| sgd1 = opt.SGD(lr=lr) |
| for i in range(5): |
| np.testing.assert_array_almost_equal(tensor.to_numpy(sgd1.lr_value), [0.1*0.5**(i//2)]) |
| sgd1.step() |
| |
| def test_exponential_decay_no_staircase_cpu(self): |
| lr = opt.ExponentialDecay(0.1, 2, 0.5, False) |
| sgd1 = opt.SGD(lr=lr) |
| for i in range(5): |
| np.testing.assert_array_almost_equal(tensor.to_numpy(sgd1.lr_value), [0.1*0.5**(i/2)]) |
| sgd1.step() |
| |
| @on_cpu_gpu |
| def test_const_decay_scheduler(self, dev): |
| c1 = opt.Constant(0.2) |
| step = tensor.Tensor((1,), device=dev).set_value(0) |
| lr_val = c1(step) |
| np.testing.assert_array_almost_equal(tensor.to_numpy(c1(step)) , [0.2]) |
| step+=1 |
| np.testing.assert_array_almost_equal(tensor.to_numpy(c1(step)) , [0.2]) |
| |
| class TestOptimizer(unittest.TestCase): |
| @on_cpu_gpu |
| def test_optimizer(self, dev): |
| o1 = opt.Optimizer(0.1) |
| |
| # test step |
| o1.step() |
| o1.step() |
| |
| # test get states |
| s1 = o1.get_states() |
| self.assertAlmostEqual(s1['step_counter'], 2) |
| |
| # test set states |
| s2 = {'step_counter': 5} |
| o1.set_states(s2) |
| np.testing.assert_array_almost_equal( tensor.to_numpy(o1.step_counter), [5]) |
| |
| @on_cpu_gpu |
| def test_sgd_const_lr(self, dev=cpu_dev): |
| cpu_dev.EnableGraph(False) |
| sgd1 = opt.SGD(lr=0.1) |
| w_shape=(2,3) |
| w = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| g = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| |
| w_step1 = w-0.1*g |
| sgd1.apply(w.name, w, g) |
| |
| assertTensorEqual(w, w_step1) |
| |
| @on_cpu_gpu |
| def test_RMSProp_const_lr(self, dev=cpu_dev): |
| cpu_dev.EnableGraph(False) |
| opt1 = opt.RMSProp(lr=0.1) |
| w_shape=(2,3) |
| w = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| g = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| |
| # running_average = running_average * rho + param_grad * param_grad * (1 - rho) |
| # param_value = param_value - lr * param_grad / sqrt(running_average + epsilon) |
| |
| running_average = 0.1 * tensor.square(g) |
| tmp = running_average + 1e-8 |
| tmp = tensor.sqrt(tmp) |
| tmp = g / tmp |
| |
| w_step1 = w - 0.1 * tmp |
| opt1.apply(w.name, w, g) |
| |
| assertTensorEqual(w, w_step1) |
| |
| @on_cpu_gpu |
| def test_AdaGrad_const_lr(self, dev=cpu_dev): |
| cpu_dev.EnableGraph(False) |
| opt1 = opt.AdaGrad(lr=0.1) |
| w_shape=(2,3) |
| w = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| g = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| |
| # history = history + param_grad * param_grad |
| # param_value = param_value - lr * param_grad / sqrt(history + epsilon) |
| |
| history = tensor.square(g) |
| tmp = history + 1e-8 |
| tmp = tensor.sqrt(tmp) |
| tmp = g / tmp |
| |
| w_step1 = w - 0.1 * tmp |
| opt1.apply(w.name, w, g) |
| |
| assertTensorEqual(w, w_step1) |
| |
| @on_cpu_gpu |
| def test_Adam_const_lr(self, dev=cpu_dev): |
| cpu_dev.EnableGraph(False) |
| opt1 = opt.Adam(lr=0.1) |
| w_shape=(2,3) |
| w = tensor.Tensor(w_shape, device=dev).set_value(1.0) |
| g = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| |
| # m := beta_1 * m + (1 - beta_1) * grad |
| # v := beta_2 * v + (1 - beta_2) * grad * grad |
| # m_norm = m / (1 - beta_1 ^ step) |
| # v_norm = v / (1 - beta_2 ^ step) |
| # param := param - (lr * m_norm) / ( sqrt(v_norm) + epsilon) ) |
| |
| m = 0.1 * g |
| tmp = tensor.square(g) |
| v = 0.001 * tmp |
| |
| m_norm = m / 0.1 |
| v_norm = v / 0.001 |
| |
| tmp = tensor.sqrt(v_norm) + 1e-8 |
| tmp = m_norm / tmp |
| |
| w_step1 = w - 0.1 * tmp |
| opt1.apply(w.name, w, g) |
| |
| assertTensorEqual(w, w_step1, decimal=5) |
| |
| @on_cpu_gpu |
| def test_sgd_const_lr_momentum(self, dev=cpu_dev): |
| sgd1 = opt.SGD(lr=0.1,momentum=0.9) |
| w_shape=(2,3) |
| w = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| g = tensor.Tensor(w_shape, device=dev).set_value(0.01) |
| |
| w_step1 = w-0.1*g |
| buf = g |
| |
| sgd1.apply(w.name, w, g) |
| sgd1.step() |
| |
| assertTensorEqual(w,w_step1) |
| |
| buf = g + buf*0.9 |
| w_step2 = w-0.1*buf |
| |
| sgd1.apply(w.name, w, g) |
| |
| assertTensorEqual(w, w_step2) |
| |
| @on_cpu_gpu |
| def test_sgd_const_lr_momentum_weight_decay(self, dev=cpu_dev): |
| sgd1 = opt.SGD(lr=0.1, weight_decay=0.2) |
| w_shape=(2,3) |
| w = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| g = tensor.Tensor(w_shape, device=dev).set_value(0.01) |
| |
| w_step1 = w-0.1*(g+0.2*w) |
| |
| sgd1.apply(w.name, w, g) |
| |
| assertTensorEqual(w,w_step1) |
| |
| # @on_cpu_gpu |
| def test_sgd_const_lr_momentum_nesterov(self, dev=cpu_dev): |
| sgd1 = opt.SGD(lr=0.1, momentum=0.9, nesterov=True) |
| w_shape=(2,3) |
| w = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| g = tensor.Tensor(w_shape, device=dev).set_value(0.1) |
| |
| buf = g |
| w_step1 = w-0.1*(g+0.9*buf) |
| |
| sgd1.apply(w.name, w, g) |
| |
| assertTensorEqual(w,w_step1) |
| |
| if __name__ == '__main__': |
| unittest.main() |