tests/python/relax/test_codegen_cudnn.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import numpy as np
 import pytest

 import tvm
 import tvm.testing
 import tvm.topi.testing
 from tvm import relax
 from tvm.relax.backend.contrib.cudnn import partition_for_cudnn
 from tvm.relax.testing import get_relax_matmul_module
 from tvm.script import relax as R

 from tvm.script.ir_builder import IRBuilder
 from tvm.script.ir_builder import relax as relax_builder


 @pytest.fixture(autouse=True)
 def reset_seed():
     np.random.seed(0)


 pytestmark = tvm.testing.requires_cudnn.marks()


 _activation_table = {
     "none": None,
     "relu": R.nn.relu,
     "gelu": R.nn.gelu,
     "silu": R.nn.silu,
 }


 def get_relax_conv2d_module(
     data_shape,
     weight_shape,
     dtype,
     with_bias=False,
     activation=None,
     residual_bin_op=None,
     residual_activation=None,
     data_layout="NHWC",
     kernel_layout="OHWI",
 ):
     with IRBuilder() as builder:
         with relax_builder.function():
             R.func_name("main")
             data = R.arg("data", R.Tensor(data_shape, dtype))
             weight = R.arg("weight", R.Tensor(weight_shape, dtype))
             if with_bias:
                 if data_layout == "NHWC":
                     bias = R.arg("bias", R.Tensor((1, 1, 1, weight_shape[0]), dtype))
                 elif data_layout == "NCHW":
                     bias = R.arg("bias", R.Tensor((1, weight_shape[0], 1, 1), dtype))
                 else:
                     raise ValueError("Unsupported data_layout: {}".format(data_layout))

             with R.dataflow() as frame:
                 output = R.emit(
                     R.nn.conv2d(
                         data,
                         weight,
                         out_dtype=dtype,
                         padding=(1, 1),
                         data_layout=data_layout,
                         kernel_layout=kernel_layout,
                     )
                 )
                 if with_bias:
                     output = R.emit(output + bias)
                 if activation is not None:
                     output = R.emit(activation(output))
                 if residual_bin_op is not None:
                     output = R.emit(residual_bin_op(output, data))
                     if residual_activation is not None:
                         output = R.emit(residual_activation(output))
                 R.output(output)

             R.func_ret_value(frame.output_vars[0])

     func = builder.get()
     return tvm.IRModule({"main": func})


 def get_result_with_relax_cudnn_offload(mod, np_inputs, cuda_graph=False):
     mod = partition_for_cudnn(mod)
     mod = relax.transform.RunCodegen()(mod)
     return build_and_run(mod, np_inputs, "cuda", cuda_graph)


 def build_and_run(mod, inputs_np, target, legalize=False, cuda_graph=False):
     dev = tvm.device(target, 0)
     with tvm.transform.PassContext(
         config={
             "relax.backend.use_cuda_graph": cuda_graph,
             "relax.transform.apply_legalize_ops": legalize,
         }
     ):
         ex = relax.build(mod, target)
     vm = relax.VirtualMachine(ex, dev)
     f = vm["main"]
     inputs = [tvm.nd.array(inp, dev) for inp in inputs_np]

     # For cuda graph, run the compiled function twice to make sure that we can launch the cached
     # graph on the second run.
     if cuda_graph:
         f(*inputs)

     return f(*inputs).numpy()


 @pytest.mark.parametrize(
     "data_shape, weight_shape, dtype, with_bias, activation",
     [
         # Regular
         ((16, 32, 32, 16), (32, 3, 3, 16), "float16", False, "none"),
     ],
 )
 def test_cudnn_partition_conv2d_without_bias(
     data_shape, weight_shape, dtype, with_bias, activation
 ):
     low, high = -1, 1
     data = np.random.randint(low, high, size=data_shape).astype(dtype)
     weight = np.random.randint(low, high, size=weight_shape).astype(dtype)
     bias = np.random.randint(low, high, size=(1, 1, 1, weight_shape[0])).astype(dtype)
     activation = _activation_table[activation]
     if with_bias:
         args = (data, weight, bias)
     else:
         args = (data, weight)
     mod = get_relax_conv2d_module(
         data_shape,
         weight_shape,
         dtype,
         with_bias=with_bias,
         activation=activation,
     )
     mod = partition_for_cudnn(mod)
     assert (
         mod["main"].body.blocks[0].bindings[0].value.op.name_hint == "fused_relax_nn_conv2d_cudnn"
     )


 @pytest.mark.parametrize(
     "data_shape, weight_shape, dtype, with_bias, activation",
     [
         # Regular
         ((16, 32, 32, 16), (32, 3, 3, 16), "float32", False, "none"),
         # Bias
         ((16, 32, 32, 16), (32, 3, 3, 16), "float32", True, "none"),
         # Bias+ReLU
         ((16, 32, 32, 16), (32, 3, 3, 16), "float32", True, "relu"),
         # Bias+ReLU+half
         ((16, 32, 32, 16), (32, 3, 3, 16), "float16", True, "relu"),
     ],
 )
 def test_conv2d_offload(data_shape, weight_shape, dtype, with_bias, activation):
     input = np.random.randn(*data_shape).astype(dtype)
     weight = np.random.randn(*weight_shape).astype(dtype)

     if with_bias:
         oc = weight_shape[0]
         bias = np.random.randn(1, 1, 1, oc).astype(dtype)
         args = (input, weight, bias)
     else:
         bias = None
         args = (input, weight)

     activation = _activation_table[activation]

     mod = get_relax_conv2d_module(
         data_shape,
         weight_shape,
         dtype,
         with_bias=with_bias,
         activation=activation,
     )

     out = get_result_with_relax_cudnn_offload(mod, args)
     ref = build_and_run(mod, args, "llvm", legalize=True)
     if dtype == "float16":
         tvm.testing.assert_allclose(out, ref, rtol=1e-1, atol=1e-1)
     else:
         tvm.testing.assert_allclose(out, ref, rtol=1e-2, atol=1e-2)


 @pytest.mark.skip(reason="flaky test")
 @pytest.mark.parametrize(
     "data_shape, weight_shape, dtype, with_bias, activation",
     [
         # Regular
         ((16, 16, 32, 32), (32, 16, 3, 3), "float32", False, "none"),
         # Bias
         ((16, 16, 32, 32), (32, 16, 3, 3), "float32", True, "none"),
         # Bias+ReLU
         ((16, 16, 32, 32), (32, 16, 3, 3), "float32", True, "relu"),
         # Bias+ReLU+half
         ((16, 16, 32, 32), (32, 16, 3, 3), "float16", True, "relu"),
     ],
 )
 def test_conv2d_nchw_oihw_offload(data_shape, weight_shape, dtype, with_bias, activation):
     input = np.random.randn(*data_shape).astype(dtype)
     weight = np.random.randn(*weight_shape).astype(dtype)

     if with_bias:
         oc = weight_shape[0]
         bias = np.random.randn(1, oc, 1, 1).astype(dtype)
         args = (input, weight, bias)
     else:
         bias = None
         args = (input, weight)

     activation = _activation_table[activation]

     mod = get_relax_conv2d_module(
         data_shape,
         weight_shape,
         dtype,
         with_bias=with_bias,
         activation=activation,
         data_layout="NCHW",
         kernel_layout="OIHW",
     )

     out = get_result_with_relax_cudnn_offload(mod, args)
     ref = build_and_run(mod, args, "llvm", legalize=True)
     if dtype == "float16":
         tvm.testing.assert_allclose(out, ref, rtol=1e-1, atol=1e-1)
     else:
         tvm.testing.assert_allclose(out, ref, rtol=1e-2, atol=1e-2)


 if __name__ == "__main__":
     tvm.testing.main()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	import numpy as np
	import pytest

	import tvm
	import tvm.testing
	import tvm.topi.testing
	from tvm import relax
	from tvm.relax.backend.contrib.cudnn import partition_for_cudnn
	from tvm.relax.testing import get_relax_matmul_module
	from tvm.script import relax as R

	from tvm.script.ir_builder import IRBuilder
	from tvm.script.ir_builder import relax as relax_builder


	@pytest.fixture(autouse=True)
	def reset_seed():
	np.random.seed(0)


	pytestmark = tvm.testing.requires_cudnn.marks()


	_activation_table = {
	"none": None,
	"relu": R.nn.relu,
	"gelu": R.nn.gelu,
	"silu": R.nn.silu,
	}


	def get_relax_conv2d_module(
	data_shape,
	weight_shape,
	dtype,
	with_bias=False,
	activation=None,
	residual_bin_op=None,
	residual_activation=None,
	data_layout="NHWC",
	kernel_layout="OHWI",
	):
	with IRBuilder() as builder:
	with relax_builder.function():
	R.func_name("main")
	data = R.arg("data", R.Tensor(data_shape, dtype))
	weight = R.arg("weight", R.Tensor(weight_shape, dtype))
	if with_bias:
	if data_layout == "NHWC":
	bias = R.arg("bias", R.Tensor((1, 1, 1, weight_shape[0]), dtype))
	elif data_layout == "NCHW":
	bias = R.arg("bias", R.Tensor((1, weight_shape[0], 1, 1), dtype))
	else:
	raise ValueError("Unsupported data_layout: {}".format(data_layout))

	with R.dataflow() as frame:
	output = R.emit(
	R.nn.conv2d(
	data,
	weight,
	out_dtype=dtype,
	padding=(1, 1),
	data_layout=data_layout,
	kernel_layout=kernel_layout,
	)
	)
	if with_bias:
	output = R.emit(output + bias)
	if activation is not None:
	output = R.emit(activation(output))
	if residual_bin_op is not None:
	output = R.emit(residual_bin_op(output, data))
	if residual_activation is not None:
	output = R.emit(residual_activation(output))
	R.output(output)

	R.func_ret_value(frame.output_vars[0])

	func = builder.get()
	return tvm.IRModule({"main": func})


	def get_result_with_relax_cudnn_offload(mod, np_inputs, cuda_graph=False):
	mod = partition_for_cudnn(mod)
	mod = relax.transform.RunCodegen()(mod)
	return build_and_run(mod, np_inputs, "cuda", cuda_graph)


	def build_and_run(mod, inputs_np, target, legalize=False, cuda_graph=False):
	dev = tvm.device(target, 0)
	with tvm.transform.PassContext(
	config={
	"relax.backend.use_cuda_graph": cuda_graph,
	"relax.transform.apply_legalize_ops": legalize,
	}
	):
	ex = relax.build(mod, target)
	vm = relax.VirtualMachine(ex, dev)
	f = vm["main"]
	inputs = [tvm.nd.array(inp, dev) for inp in inputs_np]

	# For cuda graph, run the compiled function twice to make sure that we can launch the cached
	# graph on the second run.
	if cuda_graph:
	f(*inputs)

	return f(*inputs).numpy()


	@pytest.mark.parametrize(
	"data_shape, weight_shape, dtype, with_bias, activation",
	[
	# Regular
	((16, 32, 32, 16), (32, 3, 3, 16), "float16", False, "none"),
	],
	)
	def test_cudnn_partition_conv2d_without_bias(
	data_shape, weight_shape, dtype, with_bias, activation
	):
	low, high = -1, 1
	data = np.random.randint(low, high, size=data_shape).astype(dtype)
	weight = np.random.randint(low, high, size=weight_shape).astype(dtype)
	bias = np.random.randint(low, high, size=(1, 1, 1, weight_shape[0])).astype(dtype)
	activation = _activation_table[activation]
	if with_bias:
	args = (data, weight, bias)
	else:
	args = (data, weight)
	mod = get_relax_conv2d_module(
	data_shape,
	weight_shape,
	dtype,
	with_bias=with_bias,
	activation=activation,
	)
	mod = partition_for_cudnn(mod)
	assert (
	mod["main"].body.blocks[0].bindings[0].value.op.name_hint == "fused_relax_nn_conv2d_cudnn"
	)


	@pytest.mark.parametrize(
	"data_shape, weight_shape, dtype, with_bias, activation",
	[
	# Regular
	((16, 32, 32, 16), (32, 3, 3, 16), "float32", False, "none"),
	# Bias
	((16, 32, 32, 16), (32, 3, 3, 16), "float32", True, "none"),
	# Bias+ReLU
	((16, 32, 32, 16), (32, 3, 3, 16), "float32", True, "relu"),
	# Bias+ReLU+half
	((16, 32, 32, 16), (32, 3, 3, 16), "float16", True, "relu"),
	],
	)
	def test_conv2d_offload(data_shape, weight_shape, dtype, with_bias, activation):
	input = np.random.randn(*data_shape).astype(dtype)
	weight = np.random.randn(*weight_shape).astype(dtype)

	if with_bias:
	oc = weight_shape[0]
	bias = np.random.randn(1, 1, 1, oc).astype(dtype)
	args = (input, weight, bias)
	else:
	bias = None
	args = (input, weight)

	activation = _activation_table[activation]

	mod = get_relax_conv2d_module(
	data_shape,
	weight_shape,
	dtype,
	with_bias=with_bias,
	activation=activation,
	)

	out = get_result_with_relax_cudnn_offload(mod, args)
	ref = build_and_run(mod, args, "llvm", legalize=True)
	if dtype == "float16":
	tvm.testing.assert_allclose(out, ref, rtol=1e-1, atol=1e-1)
	else:
	tvm.testing.assert_allclose(out, ref, rtol=1e-2, atol=1e-2)


	@pytest.mark.skip(reason="flaky test")
	@pytest.mark.parametrize(
	"data_shape, weight_shape, dtype, with_bias, activation",
	[
	# Regular
	((16, 16, 32, 32), (32, 16, 3, 3), "float32", False, "none"),
	# Bias
	((16, 16, 32, 32), (32, 16, 3, 3), "float32", True, "none"),
	# Bias+ReLU
	((16, 16, 32, 32), (32, 16, 3, 3), "float32", True, "relu"),
	# Bias+ReLU+half
	((16, 16, 32, 32), (32, 16, 3, 3), "float16", True, "relu"),
	],
	)
	def test_conv2d_nchw_oihw_offload(data_shape, weight_shape, dtype, with_bias, activation):
	input = np.random.randn(*data_shape).astype(dtype)
	weight = np.random.randn(*weight_shape).astype(dtype)

	if with_bias:
	oc = weight_shape[0]
	bias = np.random.randn(1, oc, 1, 1).astype(dtype)
	args = (input, weight, bias)
	else:
	bias = None
	args = (input, weight)

	activation = _activation_table[activation]

	mod = get_relax_conv2d_module(
	data_shape,
	weight_shape,
	dtype,
	with_bias=with_bias,
	activation=activation,
	data_layout="NCHW",
	kernel_layout="OIHW",
	)

	out = get_result_with_relax_cudnn_offload(mod, args)
	ref = build_and_run(mod, args, "llvm", legalize=True)
	if dtype == "float16":
	tvm.testing.assert_allclose(out, ref, rtol=1e-1, atol=1e-1)
	else:
	tvm.testing.assert_allclose(out, ref, rtol=1e-2, atol=1e-2)


	if __name__ == "__main__":
	tvm.testing.main()