tests/python/relay/test_cpp_build_module.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import numpy as np

 import tvm
 from tvm import te
 from tvm import relay, runtime
 from tvm.contrib.nvcc import have_fp16
 import tvm.testing


 def test_basic_build():
     tgt = "llvm"
     dev = tvm.cpu()
     # func
     a = relay.var("a", dtype="float32", shape=(16, 8))
     b = relay.var("b", dtype="float32", shape=(8, 8))
     c = relay.var("c", dtype="float32", shape=(16, 8))
     x = relay.nn.dense(a, b)
     y = relay.nn.relu(x)
     z = y + c
     func = relay.Function([a, b, c], z)
     A = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), device=dev)
     B = tvm.nd.array(np.random.uniform(-1, 1, (8, 8)).astype("float32"), device=dev)
     C = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), device=dev)
     params = {"b": B, "c": C}
     # build
     targets = {tvm.tir.IntImm("int32", dev.device_type): tgt}
     mod = tvm.IRModule.from_expr(func)
     func_in_mod = mod["main"]
     assert mod["main"] == func_in_mod, "cannot compare function to itself"

     lib = relay.build(mod, targets, "llvm", params=params)
     assert mod["main"] == func_in_mod, "relay.build changed module in-place"

     # test
     rt = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
     rt.set_input("a", A)
     rt.run()
     out = rt.get_output(0)

     np.testing.assert_allclose(
         out.numpy(),
         np.maximum(np.dot(A.numpy(), B.numpy().T), 0) + C.numpy(),
         atol=1e-5,
         rtol=1e-5,
     )


 @tvm.testing.requires_cuda
 def test_fp16_build():
     dtype = "float16"

     dev = tvm.cuda(0)
     if dtype == "float16" and not have_fp16(dev.compute_version):
         print("skip because gpu does not support fp16")
         return

     x = relay.var("x", dtype=dtype, shape=(4, 4))
     y = relay.var("y", dtype=dtype, shape=(4, 4))
     z = x + y
     func = relay.Function([x, y], z)
     X = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), device=dev)
     Y = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), device=dev)
     params = {
         "x": X,
         "y": Y,
     }

     # build
     g_json, mmod, params = relay.build(func, "cuda", params=params)

     # test
     rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
     rt.load_params(runtime.save_param_dict(params))
     rt.run()
     out = rt.get_output(0)

     np.testing.assert_allclose(out.numpy(), X.numpy() + Y.numpy(), atol=1e-5, rtol=1e-5)


 @tvm.testing.requires_llvm
 def test_bf16_build():
     data = relay.var("data", shape=(1, 3, 224, 224), dtype="float32")
     weight = relay.var("weight", shape=(64, 3, 7, 7), dtype="float32")
     bn_gamma = relay.var("gamma", shape=(64,), dtype="float32")
     bn_beta = relay.var("beta", shape=(64,), dtype="float32")
     bn_mean = relay.var("mean", shape=(64,), dtype="float32")
     bn_var = relay.var("var", shape=(64,), dtype="float32")
     params = {
         "weight": np.random.uniform(-1, 1, size=(64, 3, 7, 7)).astype("float32"),
         "gamma": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
         "beta": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
         "mean": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
         "var": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
     }
     conv_bf16 = relay.nn.conv2d(
         relay.cast(data, "bfloat16"),
         relay.cast(weight, "bfloat16"),
         strides=(2, 2),
         padding=(3, 3, 3, 3),
         channels=64,
         kernel_size=(7, 7),
         out_dtype="bfloat16",
     )
     bn_bf16 = relay.nn.batch_norm(
         conv_bf16,
         relay.cast(bn_gamma, "bfloat16"),
         relay.cast(bn_beta, "bfloat16"),
         relay.cast(bn_mean, "bfloat16"),
         relay.cast(bn_var, "bfloat16"),
     )
     relu_bf16 = relay.nn.relu(bn_bf16[0])
     maxpool_bf16 = relay.nn.max_pool2d(relu_bf16, pool_size=(2, 2), strides=(2, 2))
     avgpool_bf16 = relay.nn.avg_pool2d(maxpool_bf16, pool_size=(2, 2), strides=(2, 2))
     flattened_bf16 = relay.nn.batch_flatten(avgpool_bf16)
     softmax_bf16 = relay.nn.softmax(flattened_bf16)
     mod_bf16 = tvm.IRModule.from_expr(softmax_bf16)
     with tvm.transform.PassContext(opt_level=3):
         relay.build(mod_bf16, target="llvm", params=params)


 @tvm.testing.parametrize_targets("llvm", "cuda")
 def test_fp16_conversion(target, dev):
     if target == "cuda" and not have_fp16(dev.compute_version):
         print("skip because gpu does not support fp16")
         return

     n = 10

     for (src, dst) in [("float32", "float16"), ("float16", "float32")]:
         x = relay.var("x", relay.TensorType((n,), src))
         y = x.astype(dst)
         func = relay.Function([x], y)

         # init input
         X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2)

         # build
         with tvm.transform.PassContext(opt_level=1):
             g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), target)

         # test
         rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
         rt.set_input("x", X)
         rt.run()
         out = rt.get_output(0)

         np.testing.assert_allclose(out.numpy(), X.numpy().astype(dst), atol=1e-5, rtol=1e-5)


 if __name__ == "__main__":
     test_basic_build()
     test_fp16_build()
     test_fp16_conversion()
     test_bf16_build()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	import numpy as np

	import tvm
	from tvm import te
	from tvm import relay, runtime
	from tvm.contrib.nvcc import have_fp16
	import tvm.testing


	def test_basic_build():
	tgt = "llvm"
	dev = tvm.cpu()
	# func
	a = relay.var("a", dtype="float32", shape=(16, 8))
	b = relay.var("b", dtype="float32", shape=(8, 8))
	c = relay.var("c", dtype="float32", shape=(16, 8))
	x = relay.nn.dense(a, b)
	y = relay.nn.relu(x)
	z = y + c
	func = relay.Function([a, b, c], z)
	A = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), device=dev)
	B = tvm.nd.array(np.random.uniform(-1, 1, (8, 8)).astype("float32"), device=dev)
	C = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), device=dev)
	params = {"b": B, "c": C}
	# build
	targets = {tvm.tir.IntImm("int32", dev.device_type): tgt}
	mod = tvm.IRModule.from_expr(func)
	func_in_mod = mod["main"]
	assert mod["main"] == func_in_mod, "cannot compare function to itself"

	lib = relay.build(mod, targets, "llvm", params=params)
	assert mod["main"] == func_in_mod, "relay.build changed module in-place"

	# test
	rt = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
	rt.set_input("a", A)
	rt.run()
	out = rt.get_output(0)

	np.testing.assert_allclose(
	out.numpy(),
	np.maximum(np.dot(A.numpy(), B.numpy().T), 0) + C.numpy(),
	atol=1e-5,
	rtol=1e-5,
	)


	@tvm.testing.requires_cuda
	def test_fp16_build():
	dtype = "float16"

	dev = tvm.cuda(0)
	if dtype == "float16" and not have_fp16(dev.compute_version):
	print("skip because gpu does not support fp16")
	return

	x = relay.var("x", dtype=dtype, shape=(4, 4))
	y = relay.var("y", dtype=dtype, shape=(4, 4))
	z = x + y
	func = relay.Function([x, y], z)
	X = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), device=dev)
	Y = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), device=dev)
	params = {
	"x": X,
	"y": Y,
	}

	# build
	g_json, mmod, params = relay.build(func, "cuda", params=params)

	# test
	rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
	rt.load_params(runtime.save_param_dict(params))
	rt.run()
	out = rt.get_output(0)

	np.testing.assert_allclose(out.numpy(), X.numpy() + Y.numpy(), atol=1e-5, rtol=1e-5)


	@tvm.testing.requires_llvm
	def test_bf16_build():
	data = relay.var("data", shape=(1, 3, 224, 224), dtype="float32")
	weight = relay.var("weight", shape=(64, 3, 7, 7), dtype="float32")
	bn_gamma = relay.var("gamma", shape=(64,), dtype="float32")
	bn_beta = relay.var("beta", shape=(64,), dtype="float32")
	bn_mean = relay.var("mean", shape=(64,), dtype="float32")
	bn_var = relay.var("var", shape=(64,), dtype="float32")
	params = {
	"weight": np.random.uniform(-1, 1, size=(64, 3, 7, 7)).astype("float32"),
	"gamma": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
	"beta": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
	"mean": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
	"var": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
	}
	conv_bf16 = relay.nn.conv2d(
	relay.cast(data, "bfloat16"),
	relay.cast(weight, "bfloat16"),
	strides=(2, 2),
	padding=(3, 3, 3, 3),
	channels=64,
	kernel_size=(7, 7),
	out_dtype="bfloat16",
	)
	bn_bf16 = relay.nn.batch_norm(
	conv_bf16,
	relay.cast(bn_gamma, "bfloat16"),
	relay.cast(bn_beta, "bfloat16"),
	relay.cast(bn_mean, "bfloat16"),
	relay.cast(bn_var, "bfloat16"),
	)
	relu_bf16 = relay.nn.relu(bn_bf16[0])
	maxpool_bf16 = relay.nn.max_pool2d(relu_bf16, pool_size=(2, 2), strides=(2, 2))
	avgpool_bf16 = relay.nn.avg_pool2d(maxpool_bf16, pool_size=(2, 2), strides=(2, 2))
	flattened_bf16 = relay.nn.batch_flatten(avgpool_bf16)
	softmax_bf16 = relay.nn.softmax(flattened_bf16)
	mod_bf16 = tvm.IRModule.from_expr(softmax_bf16)
	with tvm.transform.PassContext(opt_level=3):
	relay.build(mod_bf16, target="llvm", params=params)


	@tvm.testing.parametrize_targets("llvm", "cuda")
	def test_fp16_conversion(target, dev):
	if target == "cuda" and not have_fp16(dev.compute_version):
	print("skip because gpu does not support fp16")
	return

	n = 10

	for (src, dst) in [("float32", "float16"), ("float16", "float32")]:
	x = relay.var("x", relay.TensorType((n,), src))
	y = x.astype(dst)
	func = relay.Function([x], y)

	# init input
	X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2)

	# build
	with tvm.transform.PassContext(opt_level=1):
	g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), target)

	# test
	rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
	rt.set_input("x", X)
	rt.run()
	out = rt.get_output(0)

	np.testing.assert_allclose(out.numpy(), X.numpy().astype(dst), atol=1e-5, rtol=1e-5)


	if __name__ == "__main__":
	test_basic_build()
	test_fp16_build()
	test_fp16_conversion()
	test_bf16_build()