blob: 27c0ffb5651cf000a058fc7279be446f8ea8241c [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import numpy as np
import tvm
import tvm.testing
from tvm import relax
from tvm.relay.testing import rand
from tvm.testing import assert_allclose
from tvm.testing.utils import check_numerical_grads
from tvm.script.parser import ir as I, relax as R
def _legalize_and_build(mod, target, dev):
ex = relax.build(mod, target)
vm = relax.VirtualMachine(ex, dev)
return vm
@tvm.testing.parametrize_targets("llvm")
def test_manual_gradient(target, dev):
# The expression computed is sum((2x - 2y) * (y + z))
# the gradient of x is broadcast_to(2y + 2z, x.shape)
# the gradient of y is collapse_sum_to((2x - 4y - 2z), y.shape)
# the gradient of z is collapse_sum_to((2x - 2y), z.shape)
# the gradient of u is 0
@I.ir_module
class Before:
@R.function
def main(
x: R.Tensor((3, 5), "float32"),
y: R.Tensor((5,), "float32"),
z: R.Tensor((5,), "float32"),
u: R.Tensor((5,), "float32"),
):
with R.dataflow():
lv1 = R.add(x, x)
lv2 = R.subtract(lv1, y)
lv3 = R.subtract(lv2, y)
lv4 = R.add(y, z)
lv5 = R.multiply(lv3, lv4)
lv6 = R.sum(lv5)
R.output(lv6)
return lv6
After = relax.transform.Gradient("main")(Before)
args = [rand("float32", 3, 5), rand("float32", 5), rand("float32", 5), rand("float32", 5)]
args_np = [x.numpy() for x in args]
vm = _legalize_and_build(After, target, dev)
output, grads = vm["main_adjoint"](*args)
output_np = np.sum((2 * args_np[0] - 2 * args_np[1]) * (args_np[1] + args_np[2]))
assert_allclose(output.numpy(), output_np, atol=1e-4)
expected_grads_nd = [
(2 * args_np[1] + 2 * args_np[2]) * np.ones_like(args_np[0]),
np.sum((2 * args_np[0] - 4 * args_np[1] - 2 * args_np[2]), axis=0),
np.sum((2 * args_np[0] - 2 * args_np[1]), axis=0),
np.zeros_like(args_np[3]),
]
for i, j in zip(grads, expected_grads_nd):
assert_allclose(i.numpy(), j, atol=1e-4)
@tvm.testing.parametrize_targets("llvm")
def test_mlp_blockbuilder(target, dev):
layers, in_size, out_size, hidden_size, batch_size = 3, 5, 5, 5, 4
input_list = [relax.Var("x", R.Tensor((batch_size, in_size), "float32"))]
w_list = (
[relax.Var("w_0", R.Tensor((in_size, hidden_size), "float32"))]
+ [
relax.Var("w_" + str(i + 1), R.Tensor((hidden_size, hidden_size), "float32"))
for i in range(layers - 2)
]
+ [relax.Var("w_" + str(layers - 1), R.Tensor((hidden_size, out_size), "float32"))]
)
b_list = [
relax.Var("b_" + str(i), R.Tensor((hidden_size,), "float32")) for i in range(layers - 1)
] + [relax.Var("b_" + str(layers - 1), R.Tensor((out_size,), "float32"))]
label_list = [relax.Var("y", R.Tensor((batch_size,), "int64"))]
args_list = input_list + w_list + b_list + label_list
bb = relax.BlockBuilder()
with bb.function("MLP", args_list):
with bb.dataflow():
current = input_list[0]
for i in range(layers):
lv0 = bb.emit(R.matmul(current, w_list[i]))
lv1 = bb.emit(R.add(lv0, b_list[i]))
current = bb.emit(R.nn.relu(lv1) if i < layers - 1 else lv1)
logits = R.nn.log_softmax(current)
loss = bb.emit(R.nn.nll_loss(logits, label_list[0]))
gv0 = bb.emit_output(loss)
bb.emit_func_output(gv0)
Before = bb.get()
After = relax.transform.Gradient("MLP", w_list + b_list)(Before)
# Check numerical gradients equal
args = []
for arg in After["MLP_adjoint"].params:
shape = [int(l) for l in arg.struct_info.shape]
if arg.struct_info.dtype == "int64":
args.append(tvm.nd.array(np.random.randint(0, out_size, size=shape).astype(np.int64)))
else: # float32
args.append(rand("float32", *shape))
vm_before = _legalize_and_build(Before, target, dev)
vm_after = _legalize_and_build(After, target, dev)
_, grad = vm_after["MLP_adjoint"](*args)
def func(*inputs):
loss = vm_before["MLP"](args[0], *[tvm.nd.array(i) for i in inputs], args[-1])
return loss.numpy()
check_numerical_grads(func, [i.numpy() for i in args[1:-1]], [i.numpy() for i in grad])
@tvm.testing.parametrize_targets("llvm")
def test_complex(target, dev):
cst = relax.const(np.ones((6,)), dtype="float32")
cst1 = relax.const(np.array(3), dtype="int64")
@tvm.script.ir_module
class Before:
@R.function
def main(x: R.Tensor((6,), "float32"), y: R.Tensor((6, 3, 4), "float32")):
with R.dataflow():
lv1 = R.split(x, 2)
lv2 = lv1[0]
lv3 = lv1[1]
lv4 = lv2 + lv3
lv5 = (lv4, lv3)
lv6 = R.concat(lv5)
lv7 = (x, x)
lv8 = R.concat(lv7)
lv9 = R.concat(lv7)
lv10 = R.add(lv8, lv9)
lv11 = R.split(lv10, 2)
lv12 = R.add(lv6, lv11[0])
lv13 = cst
lv14 = R.add(lv12, lv13)
lv15 = R.subtract(lv13, lv14)
lv16 = R.multiply(lv14, lv15)
lv17 = R.multiply(lv15, lv16)
lv18 = R.tanh(lv17)
lv19 = R.sigmoid(lv18)
lv20 = R.permute_dims(y, axes=[0, 2, 1])
lv21 = R.sigmoid(lv20)
lv22 = R.matmul(y, lv21)
lv23 = R.sum(lv22, axis=[1, 2])
lv24 = R.add(lv19, lv23)
lv25 = R.nn.log_softmax(lv24)
gv = R.nn.nll_loss(lv25, cst1)
R.output(gv)
return gv
After = relax.transform.Gradient("main")(Before)
args = []
for arg in After["main_adjoint"].params:
shape = [int(l) for l in arg.struct_info.shape]
args.append(rand("float32", *shape))
vm_before = _legalize_and_build(Before, target, dev)
vm_after = _legalize_and_build(After, target, dev)
_, grad = vm_after["main_adjoint"](*args)
def func(*inputs):
loss = vm_before["main"](*[tvm.nd.array(i) for i in inputs])
return loss.numpy()
check_numerical_grads(func, [i.numpy() for i in args], [i.numpy() for i in grad])
@tvm.testing.parametrize_targets("llvm")
def test_matmul(target, dev):
@tvm.script.ir_module
class Before:
@R.function
def main(x: R.Tensor((3, 3), "float32"), y: R.Tensor((3, 3), "float32")):
with R.dataflow():
lv1 = R.matmul(x, y)
lv2 = R.permute_dims(x)
lv3 = R.matmul(lv2, y)
lv4 = R.permute_dims(y)
lv5 = R.matmul(x, lv4)
lv6 = R.permute_dims(x)
lv7 = R.permute_dims(y)
lv8 = R.matmul(lv6, lv7)
lv9 = lv1 + lv3 + lv5 + lv8
gv = R.sum(lv9)
R.output(gv)
return gv
After = relax.transform.Gradient("main")(Before)
args = []
for arg in After["main_adjoint"].params:
shape = [int(l) for l in arg.struct_info.shape]
args.append(rand("float32", *shape))
vm_before = _legalize_and_build(Before, target, dev)
vm_after = _legalize_and_build(After, target, dev)
_, grad = vm_after["main_adjoint"](*args)
def func(*inputs):
loss = vm_before["main"](*[tvm.nd.array(i) for i in inputs])
return loss.numpy()
check_numerical_grads(func, [i.numpy() for i in args], [i.numpy() for i in grad])
if __name__ == "__main__":
tvm.testing.main()