blob: 1eec83f2d46edff6ef2d217348a15d205772e059 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
context("optimizer")
if (Sys.getenv("R_GPU_ENABLE") != "" & as.integer(Sys.getenv("R_GPU_ENABLE")) ==
1) {
mx.ctx.default(new = mx.gpu())
message("Using GPU for testing.")
}
test_that("sgd", {
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
fc_weight <- mx.symbol.Variable("fc_weight")
fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T,
name = "fc1", num_hidden = 1)
loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss")
x <- mx.nd.array(array(1:6, dim = 2:3))
y <- mx.nd.array(c(5, 11, 16))
w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x,
fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write",
"null"))
optimizer <- mx.opt.create("sgd", learning.rate = 1, momentum = 0, wd = 0, rescale.grad = 1,
clip_gradient = -1)
updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
mx.exec.forward(exec, is.train = T)
mx.exec.backward(exec)
arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
expect_equal(as.array(arg.blocks[[2]]), array(c(1.4, 2.6), dim = c(2, 1)), tolerance = 0.1)
})
test_that("rmsprop", {
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
fc_weight <- mx.symbol.Variable("fc_weight")
fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T,
name = "fc1", num_hidden = 1)
loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss")
x <- mx.nd.array(array(1:6, dim = 2:3))
y <- mx.nd.array(c(5, 11, 16))
w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x,
fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write",
"null"))
optimizer <- mx.opt.create("rmsprop", learning.rate = 1, centered = TRUE, gamma1 = 0.95,
gamma2 = 0.9, epsilon = 1e-04, wd = 0, rescale.grad = 1, clip_gradient = -1)
updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
mx.exec.forward(exec, is.train = T)
mx.exec.backward(exec)
arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
expect_equal(as.array(arg.blocks[[2]]), array(c(5.64, 6.38), dim = c(2, 1)),
tolerance = 0.1)
})
test_that("adam", {
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
fc_weight <- mx.symbol.Variable("fc_weight")
fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T,
name = "fc1", num_hidden = 1)
loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss")
x <- mx.nd.array(array(1:6, dim = 2:3))
y <- mx.nd.array(c(5, 11, 16))
w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x,
fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write",
"null"))
optimizer <- mx.opt.create("adam", learning.rate = 1, beta1 = 0.9, beta2 = 0.999,
epsilon = 1e-08, wd = 0, rescale.grad = 1, clip_gradient = -1)
updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
mx.exec.forward(exec, is.train = T)
mx.exec.backward(exec)
arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
expect_equal(as.array(arg.blocks[[2]]), array(c(4.26, 4.96), dim = c(2, 1)),
tolerance = 0.1)
})
test_that("adagrad", {
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
fc_weight <- mx.symbol.Variable("fc_weight")
fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T,
name = "fc1", num_hidden = 1)
loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss")
x <- mx.nd.array(array(1:6, dim = 2:3))
y <- mx.nd.array(c(5, 11, 16))
w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x,
fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write",
"null"))
optimizer <- mx.opt.create("adagrad", learning.rate = 1, epsilon = 1e-08, wd = 0,
rescale.grad = 1, clip_gradient = -1)
updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
mx.exec.forward(exec, is.train = T)
mx.exec.backward(exec)
arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
expect_equal(as.array(arg.blocks[[2]]), array(c(2.1, 2.8), dim = c(2, 1)), tolerance = 0.1)
})
test_that("adadelta", {
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
fc_weight <- mx.symbol.Variable("fc_weight")
fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T,
name = "fc1", num_hidden = 1)
loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss")
x <- mx.nd.array(array(1:6, dim = 2:3))
y <- mx.nd.array(c(5, 11, 16))
w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x,
fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write",
"null"))
optimizer <- mx.opt.create("adadelta", rho = 0.9, epsilon = 1e-05, wd = 0, rescale.grad = 1,
clip_gradient = -1)
updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
mx.exec.forward(exec, is.train = T)
mx.exec.backward(exec)
arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
expect_equal(as.array(arg.blocks[[2]]), array(c(1.11, 1.81), dim = c(2, 1)),
tolerance = 0.1)
})
test_that("nag_no_momentum", {
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
fc_weight <- mx.symbol.Variable("fc_weight")
fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T,
name = "fc1", num_hidden = 1)
loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss")
x <- mx.nd.array(array(1:6, dim = 2:3))
y <- mx.nd.array(c(5, 11, 16))
w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x,
fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", "null"))
optimizer <- mx.opt.create("nag", learning.rate = 1, momentum = 0, wd = 0, rescale.grad = 1,
clip_gradient = -1)
updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
mx.exec.forward(exec, is.train = T)
mx.exec.backward(exec)
arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
expect_equal(as.array(arg.blocks[[2]]), array(c(1.4, 2.6), dim = c(2, 1)), tolerance = 0.05)
})
test_that("nag_momentum", {
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
fc_weight <- mx.symbol.Variable("fc_weight")
fc <- mx.symbol.FullyConnected(data = data, weight = fc_weight, no.bias = T,
name = "fc1", num_hidden = 1)
loss <- mx.symbol.LinearRegressionOutput(data = fc, label = label, name = "loss")
x <- mx.nd.array(array(1:6, dim = 2:3))
y <- mx.nd.array(c(5, 11, 16))
w1 <- mx.nd.array(array(c(1.1, 1.8), dim = c(2, 1)))
exec <- mxnet:::mx.symbol.bind(symbol = loss, ctx = mx.ctx.default(), arg.arrays = list(data = x,
fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", "null"))
optimizer <- mx.opt.create("nag", learning.rate = 1, momentum = 0.1, wd = 0, rescale.grad = 1,
clip_gradient = 5)
updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
mx.exec.forward(exec, is.train = T)
mx.exec.backward(exec)
arg.blocks <- updaters(exec$ref.arg.arrays, exec$ref.grad.arrays)
mx.exec.update.arg.arrays(exec, arg.blocks, skip.null = TRUE)
expect_equal(as.array(arg.blocks[[2]]), array(c(1.45, 2.65), dim = c(2, 1)), tolerance = 0.1)
})