tests/python/contrib/test_cudnn.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import tvm
 from tvm import te
 from tvm.contrib import cudnn
 from tvm.contrib.nvcc import have_fp16
 import numpy as np
 import tvm.topi.testing
 import tvm.testing


 def verify_conv2d(data_dtype, conv_dtype, tensor_format=0, groups=1):
     in_channel = 4
     out_channel = 16
     filter_h = 3
     filter_w = 3
     pad_h = 1
     pad_w = 1
     stride_h = 1
     stride_w = 1
     dilation_h = 1
     dilation_w = 1
     batch = 3
     height = 32
     width = 32

     if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
         print("skip because cudnn is not enabled...")
         return
     if data_dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
         print("Skip because gpu does not have fp16 support")
         return

     # schedule
     if tensor_format == 0:
         xshape = [batch, in_channel, height, width]
         wshape = [out_channel, in_channel // groups, filter_h, filter_w]
     else:
         xshape = [batch, height, width, in_channel]
         wshape = [out_channel, filter_h, filter_w, in_channel // groups]

     X = te.placeholder(xshape, name="X", dtype=data_dtype)
     W = te.placeholder(wshape, name="W", dtype=data_dtype)
     Y = cudnn.conv_forward(
         X,
         W,
         [pad_h, pad_w],
         [stride_h, stride_w],
         [dilation_h, dilation_w],
         conv_mode=1,
         tensor_format=tensor_format,
         conv_dtype=conv_dtype,
         algo=-1,
         groups=groups,
     )
     yshape = [x.value for x in Y.shape]
     s = te.create_schedule(Y.op)

     # validation
     ctx = tvm.gpu(0)
     f = tvm.build(s, [X, W, Y], "cuda", target_host="llvm", name="conv2d")
     x_np = np.random.uniform(-1, 1, xshape).astype(data_dtype)
     w_np = np.random.uniform(-1, 1, wshape).astype(data_dtype)
     y_np = np.zeros(yshape).astype(data_dtype)
     x = tvm.nd.array(x_np, ctx)
     w = tvm.nd.array(w_np, ctx)
     y = tvm.nd.array(y_np, ctx)
     if tensor_format == 0:
         c_np = tvm.topi.testing.conv2d_nchw_python(x_np, w_np, 1, 1, groups=groups)
     elif tensor_format == 1:
         wt = w_np.transpose((1, 2, 3, 0))  # OHWI => HWIO
         c_np = tvm.topi.testing.conv2d_nhwc_python(x_np, wt, 1, 1, groups=groups)

     f(x, w, y)
     tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=1e-2, rtol=1e-2)


 @tvm.testing.requires_gpu
 def test_conv2d():
     verify_conv2d("float32", "float32", tensor_format=0)
     verify_conv2d("float16", "float32", tensor_format=1)
     verify_conv2d("float16", "float16", tensor_format=0)
     verify_conv2d("int8", "int32", tensor_format=1)

     verify_conv2d("float32", "float32", tensor_format=0, groups=2)
     verify_conv2d("float16", "float32", tensor_format=1, groups=2)
     verify_conv2d("float16", "float16", tensor_format=0, groups=2)
     verify_conv2d("int8", "int32", tensor_format=1, groups=2)


 def verify_conv3d(data_dtype, conv_dtype, tensor_format=0, groups=1):
     in_channel = 4
     out_channel = 16
     filter_d = 3
     filter_h = 3
     filter_w = 3
     pad_d = 1
     pad_h = 1
     pad_w = 1
     stride_d = 1
     stride_h = 1
     stride_w = 1
     dilation_d = 1
     dilation_h = 1
     dilation_w = 1
     batch = 3
     depth = 32
     height = 32
     width = 32

     if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
         print("skip because cudnn is not enabled...")
         return

     # schedule
     xshape = [batch, in_channel, depth, height, width]
     wshape = [out_channel, in_channel // groups, filter_d, filter_h, filter_w]

     X = te.placeholder(xshape, name="X", dtype=data_dtype)
     W = te.placeholder(wshape, name="W", dtype=data_dtype)
     Y = cudnn.conv_forward(
         X,
         W,
         [pad_d, pad_h, pad_w],
         [stride_d, stride_h, stride_w],
         [dilation_d, dilation_h, dilation_w],
         conv_mode=1,
         tensor_format=tensor_format,
         algo=-1,
         conv_dtype=conv_dtype,
         groups=groups,
     )
     yshape = [x.value for x in Y.shape]
     s = te.create_schedule(Y.op)

     # validation
     ctx = tvm.gpu(0)
     f = tvm.build(s, [X, W, Y], "cuda", target_host="llvm", name="conv3d")
     x_np = np.random.uniform(-1, 1, xshape).astype(data_dtype)
     w_np = np.random.uniform(-1, 1, wshape).astype(data_dtype)
     y_np = np.zeros(yshape).astype(data_dtype)
     x = tvm.nd.array(x_np, ctx)
     w = tvm.nd.array(w_np, ctx)
     y = tvm.nd.array(y_np, ctx)
     if tensor_format == 0:
         c_np = tvm.topi.testing.conv3d_ncdhw_python(x_np, w_np, 1, 1, groups)
     else:
         raise AssertionError("For now, conv3d tensor format only support: 0(NCHW)")

     f(x, w, y)
     tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=3e-5, rtol=1e-4)


 @tvm.testing.requires_gpu
 def test_conv3d():
     verify_conv3d("float32", "float32", tensor_format=0)
     verify_conv3d("float32", "float32", tensor_format=0, groups=2)


 def verify_softmax(shape, axis, dtype="float32"):
     A = te.placeholder(shape, dtype=dtype, name="A")
     B = cudnn.softmax(A, axis)
     s = te.create_schedule([B.op])

     ctx = tvm.gpu(0)
     a_np = np.random.uniform(size=shape).astype(dtype)
     b_np = tvm.topi.testing.softmax_python(a_np)
     a = tvm.nd.array(a_np, ctx)
     b = tvm.nd.array(b_np, ctx)
     f = tvm.build(s, [A, B], "cuda", target_host="llvm", name="softmax")
     f(a, b)
     tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3)


 def verify_softmax_4d(shape, dtype="float32"):
     A = te.placeholder(shape, dtype=dtype, name="A")
     B = cudnn.softmax(A, axis=1)
     s = te.create_schedule([B.op])

     ctx = tvm.gpu(0)
     n, c, h, w = shape
     a_np = np.random.uniform(size=shape).astype(dtype)
     b_np = tvm.topi.testing.softmax_python(a_np.transpose(0, 2, 3, 1).reshape(h * w, c))
     b_np = b_np.reshape(n, h, w, c).transpose(0, 3, 1, 2)
     a = tvm.nd.array(a_np, ctx)
     b = tvm.nd.array(b_np, ctx)
     f = tvm.build(s, [A, B], "cuda", target_host="llvm", name="softmax")
     f(a, b)
     tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3)


 @tvm.testing.requires_gpu
 def test_softmax():
     if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
         print("skip because cudnn is not enabled...")
         return

     verify_softmax((32, 10), -1)
     verify_softmax((3, 4), -1)
     verify_softmax((1, 5), -1, "float64")
     verify_softmax_4d((1, 16, 256, 256))
     verify_softmax_4d((1, 16, 256, 256), "float64")


 if __name__ == "__main__":
     test_conv2d()
     test_conv3d()
     test_softmax()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	import tvm
	from tvm import te
	from tvm.contrib import cudnn
	from tvm.contrib.nvcc import have_fp16
	import numpy as np
	import tvm.topi.testing
	import tvm.testing


	def verify_conv2d(data_dtype, conv_dtype, tensor_format=0, groups=1):
	in_channel = 4
	out_channel = 16
	filter_h = 3
	filter_w = 3
	pad_h = 1
	pad_w = 1
	stride_h = 1
	stride_w = 1
	dilation_h = 1
	dilation_w = 1
	batch = 3
	height = 32
	width = 32

	if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
	print("skip because cudnn is not enabled...")
	return
	if data_dtype == "float16" and not have_fp16(tvm.gpu(0).compute_version):
	print("Skip because gpu does not have fp16 support")
	return

	# schedule
	if tensor_format == 0:
	xshape = [batch, in_channel, height, width]
	wshape = [out_channel, in_channel // groups, filter_h, filter_w]
	else:
	xshape = [batch, height, width, in_channel]
	wshape = [out_channel, filter_h, filter_w, in_channel // groups]

	X = te.placeholder(xshape, name="X", dtype=data_dtype)
	W = te.placeholder(wshape, name="W", dtype=data_dtype)
	Y = cudnn.conv_forward(
	X,
	W,
	[pad_h, pad_w],
	[stride_h, stride_w],
	[dilation_h, dilation_w],
	conv_mode=1,
	tensor_format=tensor_format,
	conv_dtype=conv_dtype,
	algo=-1,
	groups=groups,
	)
	yshape = [x.value for x in Y.shape]
	s = te.create_schedule(Y.op)

	# validation
	ctx = tvm.gpu(0)
	f = tvm.build(s, [X, W, Y], "cuda", target_host="llvm", name="conv2d")
	x_np = np.random.uniform(-1, 1, xshape).astype(data_dtype)
	w_np = np.random.uniform(-1, 1, wshape).astype(data_dtype)
	y_np = np.zeros(yshape).astype(data_dtype)
	x = tvm.nd.array(x_np, ctx)
	w = tvm.nd.array(w_np, ctx)
	y = tvm.nd.array(y_np, ctx)
	if tensor_format == 0:
	c_np = tvm.topi.testing.conv2d_nchw_python(x_np, w_np, 1, 1, groups=groups)
	elif tensor_format == 1:
	wt = w_np.transpose((1, 2, 3, 0)) # OHWI => HWIO
	c_np = tvm.topi.testing.conv2d_nhwc_python(x_np, wt, 1, 1, groups=groups)

	f(x, w, y)
	tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=1e-2, rtol=1e-2)


	@tvm.testing.requires_gpu
	def test_conv2d():
	verify_conv2d("float32", "float32", tensor_format=0)
	verify_conv2d("float16", "float32", tensor_format=1)
	verify_conv2d("float16", "float16", tensor_format=0)
	verify_conv2d("int8", "int32", tensor_format=1)

	verify_conv2d("float32", "float32", tensor_format=0, groups=2)
	verify_conv2d("float16", "float32", tensor_format=1, groups=2)
	verify_conv2d("float16", "float16", tensor_format=0, groups=2)
	verify_conv2d("int8", "int32", tensor_format=1, groups=2)


	def verify_conv3d(data_dtype, conv_dtype, tensor_format=0, groups=1):
	in_channel = 4
	out_channel = 16
	filter_d = 3
	filter_h = 3
	filter_w = 3
	pad_d = 1
	pad_h = 1
	pad_w = 1
	stride_d = 1
	stride_h = 1
	stride_w = 1
	dilation_d = 1
	dilation_h = 1
	dilation_w = 1
	batch = 3
	depth = 32
	height = 32
	width = 32

	if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
	print("skip because cudnn is not enabled...")
	return

	# schedule
	xshape = [batch, in_channel, depth, height, width]
	wshape = [out_channel, in_channel // groups, filter_d, filter_h, filter_w]

	X = te.placeholder(xshape, name="X", dtype=data_dtype)
	W = te.placeholder(wshape, name="W", dtype=data_dtype)
	Y = cudnn.conv_forward(
	X,
	W,
	[pad_d, pad_h, pad_w],
	[stride_d, stride_h, stride_w],
	[dilation_d, dilation_h, dilation_w],
	conv_mode=1,
	tensor_format=tensor_format,
	algo=-1,
	conv_dtype=conv_dtype,
	groups=groups,
	)
	yshape = [x.value for x in Y.shape]
	s = te.create_schedule(Y.op)

	# validation
	ctx = tvm.gpu(0)
	f = tvm.build(s, [X, W, Y], "cuda", target_host="llvm", name="conv3d")
	x_np = np.random.uniform(-1, 1, xshape).astype(data_dtype)
	w_np = np.random.uniform(-1, 1, wshape).astype(data_dtype)
	y_np = np.zeros(yshape).astype(data_dtype)
	x = tvm.nd.array(x_np, ctx)
	w = tvm.nd.array(w_np, ctx)
	y = tvm.nd.array(y_np, ctx)
	if tensor_format == 0:
	c_np = tvm.topi.testing.conv3d_ncdhw_python(x_np, w_np, 1, 1, groups)
	else:
	raise AssertionError("For now, conv3d tensor format only support: 0(NCHW)")

	f(x, w, y)
	tvm.testing.assert_allclose(y.asnumpy(), c_np, atol=3e-5, rtol=1e-4)


	@tvm.testing.requires_gpu
	def test_conv3d():
	verify_conv3d("float32", "float32", tensor_format=0)
	verify_conv3d("float32", "float32", tensor_format=0, groups=2)


	def verify_softmax(shape, axis, dtype="float32"):
	A = te.placeholder(shape, dtype=dtype, name="A")
	B = cudnn.softmax(A, axis)
	s = te.create_schedule([B.op])

	ctx = tvm.gpu(0)
	a_np = np.random.uniform(size=shape).astype(dtype)
	b_np = tvm.topi.testing.softmax_python(a_np)
	a = tvm.nd.array(a_np, ctx)
	b = tvm.nd.array(b_np, ctx)
	f = tvm.build(s, [A, B], "cuda", target_host="llvm", name="softmax")
	f(a, b)
	tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3)


	def verify_softmax_4d(shape, dtype="float32"):
	A = te.placeholder(shape, dtype=dtype, name="A")
	B = cudnn.softmax(A, axis=1)
	s = te.create_schedule([B.op])

	ctx = tvm.gpu(0)
	n, c, h, w = shape
	a_np = np.random.uniform(size=shape).astype(dtype)
	b_np = tvm.topi.testing.softmax_python(a_np.transpose(0, 2, 3, 1).reshape(h * w, c))
	b_np = b_np.reshape(n, h, w, c).transpose(0, 3, 1, 2)
	a = tvm.nd.array(a_np, ctx)
	b = tvm.nd.array(b_np, ctx)
	f = tvm.build(s, [A, B], "cuda", target_host="llvm", name="softmax")
	f(a, b)
	tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3)


	@tvm.testing.requires_gpu
	def test_softmax():
	if not tvm.get_global_func("tvm.contrib.cudnn.conv.output_shape", True):
	print("skip because cudnn is not enabled...")
	return

	verify_softmax((32, 10), -1)
	verify_softmax((3, 4), -1)
	verify_softmax((1, 5), -1, "float64")
	verify_softmax_4d((1, 16, 256, 256))
	verify_softmax_4d((1, 16, 256, 256), "float64")


	if __name__ == "__main__":
	test_conv2d()
	test_conv3d()
	test_softmax()