python/tvm/topi/cuda/conv3d.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=invalid-name, unused-argument
 """Compute definition for conv3d with cuda backend"""
 from tvm import te
 from tvm import autotvm
 from tvm.contrib import cudnn

 from .. import nn, generic
 from ..utils import get_const_tuple, traverse_inline
 from .conv3d_direct import schedule_direct_conv3d_cuda


 @autotvm.register_topi_compute("conv3d_ncdhw.cuda")
 def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"):
     """Conv3D operator in NCDHW layout for cuda backend.

     Parameters
     ----------
     cfg: ConfigEntity
         The config for this template

     data : tvm.te.Tensor
         5-D with shape [batch, in_channel, in_depth, in_height, in_width]

     kernel : tvm.te.Tensor
         5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

     strides : int or a list/tuple of three ints
         stride size, or [stride_depth, stride_height, stride_width]

     padding : int or a list/tuple of three ints
         padding size, or [pad_depth, pad_height, pad_width]

     dilation: int or a list/tuple of three ints
         dilation size, or [dilation_depth, dilation_height, dilation_width]

     groups: int
         Number of groups

     out_dtype: str
         The output type. This is used for mixed precision.

     Returns
     -------
     output : tvm.te.Tensor
         5-D with shape [batch, out_channel, out_depth, out_height, out_width]
     """
     return nn.conv3d_ncdhw(data, kernel, strides, padding, dilation, groups, out_dtype)


 @autotvm.register_topi_schedule("conv3d_ncdhw.cuda")
 def schedule_conv3d_ncdhw(cfg, outs):
     """TOPI schedule callback of conv3d for cuda gpu

     Parameters
     ----------
     cfg: ConfigEntity
         The config for this template

     outs: Array of Tensor
         The computation graph description of conv2d
         in the format of an array of tensors.

     Returns
     -------
     s: Schedule
         The computation schedule for conv2d.
     """
     outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
     s = te.create_schedule([x.op for x in outs])

     def _callback(op):
         if "conv3d_ncdhw" in op.tag:
             schedule_direct_conv3d_cuda(cfg, s, op.output(0), "NCDHW", "conv3d_ncdhw.cuda")

     traverse_inline(s, outs[0].op, _callback)
     return s


 @autotvm.register_topi_compute("conv3d_ndhwc.cuda")
 def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"):
     """Conv3d operator in NDHWC layout for cuda backend.

     Parameters
     ----------
     Input : tvm.te.Tensor
         5-D with shape [batch, in_depth, in_height, in_width, in_channel]

     Filter : tvm.te.Tensor
         5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter]

     stride : int or a list/tuple of three ints
         Stride size, or [stride_depth, stride_height, stride_width]

     padding : int or str
         Padding size, or ['VALID', 'SAME']

     dilation: int or a list/tuple of three ints
         dilation size, or [dilation_depth, dilation_height, dilation_width]

     groups: int
         Number of groups

     Returns
     -------
     Output : tvm.te.Tensor
         5-D with shape [batch, out_depth, out_height, out_width, out_channel]
     """
     return nn.conv3d_ndhwc(data, kernel, strides, padding, dilation, groups, out_dtype)


 @autotvm.register_topi_schedule("conv3d_ndhwc.cuda")
 def schedule_conv3d_ndhwc(cfg, outs):
     """TOPI schedule callback of conv3d for cuda gpu

     Parameters
     ----------
     cfg: ConfigEntity
         The config for this template

     outs: Array of Tensor
         The computation graph description of conv3d
         in the format of an array of tensors.

     Returns
     -------
     s: Schedule
         The computation schedule for conv2d.
     """
     outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
     s = te.create_schedule([x.op for x in outs])

     def _callback(op):
         if "conv3d_ndhwc" in op.tag:
             schedule_direct_conv3d_cuda(cfg, s, op.output(0), "NDHWC", "conv3d_ndhwc.cuda")

     traverse_inline(s, outs[0].op, _callback)
     return s


 @autotvm.register_topi_compute("conv3d_cudnn.cuda")
 def conv3d_cudnn(
     cfg, data, kernel, strides, padding, dilation, groups, layout="NCDHW", out_dtype="float32"
 ):
     """Conv3D operator for cuda backend.

     Parameters
     ----------
     cfg: ConfigEntity
         The config for this template

     data : tvm.te.Tensor
         5-D with shape [batch, in_channel, in_depth, in_height, in_width]

     kernel : tvm.te.Tensor
         5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

     strides : int or a list/tuple of three ints
         stride size, or [stride_depth, stride_height, stride_width]

     padding : int or a list/tuple of three ints
         padding size, or [pad_depth, pad_height, pad_width]

     dilation: int or a list/tuple of three ints
         dilation size, or [dilation_depth, dilation_height, dilation_width]

     layout : str
         layout of data

     out_dtype: str
         The output type. This is used for mixed precision.

     Returns
     -------
     output : tvm.te.Tensor
         5-D with shape [batch, out_channel, out_depth, out_height, out_width]
     """
     if layout == "NCDHW":
         tensor_format = 0  # CUDNN_TENSOR_NCHW
         N, _, D, H, W = get_const_tuple(data.shape)
     elif layout == "NDHWC":
         tensor_format = 1  # CUDNN_TENSOR_NHWC
         N, D, H, W, _ = get_const_tuple(data.shape)
     else:
         raise ValueError(f"Unsupported layout {layout} in cudnn")
     CO, CI, KD, KH, KW = get_const_tuple(kernel.shape)

     assert groups == 1, "conv3d_cudnn does not support groups"

     # handle dilation
     stride_d, stride_h, stride_w = (
         (strides, strides, strides) if isinstance(strides, int) else strides
     )
     pad_d, pad_h, pad_w = (padding, padding, padding) if isinstance(padding, int) else padding
     dilation_d, dilation_h, dilation_w = (
         (dilation, dilation, dilation) if isinstance(dilation, int) else dilation
     )

     OD = (D + 2 * pad_d - KD) // stride_d + 1
     OH = (H + 2 * pad_h - KH) // stride_h + 1
     OW = (W + 2 * pad_w - KW) // stride_w + 1

     if isinstance(N, int):
         cfg.add_flop(
             2
             * N
             * OD
             * OH
             * OW
             * CO
             * CI
             * ((KD - 1) * dilation_d + 1)
             * ((KH - 1) * dilation_h + 1)
             * ((KW - 1) * dilation_w + 1)
         )

     cfg.define_knob("algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT")))
     if cfg.is_fallback:
         if cudnn.exists():
             # Let CUDNN choose the best algo, based on benchmarks run
             # on the local machine.  In the future, this should be
             # based on parameters stored in the Target.
             cfg["algo"] = OtherOptionEntity(-1)
         else:
             cfg["algo"] = OtherOptionEntity(0)

     return cudnn.conv_forward(
         data,
         kernel,
         [pad_d, pad_h, pad_w],
         [stride_d, stride_h, stride_w],
         [dilation_d, dilation_h, dilation_w],
         conv_mode=1,
         tensor_format=tensor_format,
         algo=cfg["algo"].val,
         conv_dtype=dtype,
     )


 @autotvm.register_topi_schedule("conv3d_cudnn.cuda")
 def schedule_conv3d_cudnn(_, outs):
     """TOPI schedule callback of conv3d for cuda gpu

     Parameters
     ----------
     cfg: ConfigEntity
         The config for this template

     outs: Array of Tensor
         The computation graph description of conv2d
         in the format of an array of tensors.

     Returns
     -------
     s: Schedule
         The computation schedule for conv2d.
     """
     return generic.schedule_extern(outs)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	# pylint: disable=invalid-name, unused-argument
	"""Compute definition for conv3d with cuda backend"""
	from tvm import te
	from tvm import autotvm
	from tvm.contrib import cudnn

	from .. import nn, generic
	from ..utils import get_const_tuple, traverse_inline
	from .conv3d_direct import schedule_direct_conv3d_cuda


	@autotvm.register_topi_compute("conv3d_ncdhw.cuda")
	def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"):
	"""Conv3D operator in NCDHW layout for cuda backend.

	Parameters
	----------
	cfg: ConfigEntity
	The config for this template

	data : tvm.te.Tensor
	5-D with shape [batch, in_channel, in_depth, in_height, in_width]

	kernel : tvm.te.Tensor
	5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

	strides : int or a list/tuple of three ints
	stride size, or [stride_depth, stride_height, stride_width]

	padding : int or a list/tuple of three ints
	padding size, or [pad_depth, pad_height, pad_width]

	dilation: int or a list/tuple of three ints
	dilation size, or [dilation_depth, dilation_height, dilation_width]

	groups: int
	Number of groups

	out_dtype: str
	The output type. This is used for mixed precision.

	Returns
	-------
	output : tvm.te.Tensor
	5-D with shape [batch, out_channel, out_depth, out_height, out_width]
	"""
	return nn.conv3d_ncdhw(data, kernel, strides, padding, dilation, groups, out_dtype)


	@autotvm.register_topi_schedule("conv3d_ncdhw.cuda")
	def schedule_conv3d_ncdhw(cfg, outs):
	"""TOPI schedule callback of conv3d for cuda gpu

	Parameters
	----------
	cfg: ConfigEntity
	The config for this template

	outs: Array of Tensor
	The computation graph description of conv2d
	in the format of an array of tensors.

	Returns
	-------
	s: Schedule
	The computation schedule for conv2d.
	"""
	outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
	s = te.create_schedule([x.op for x in outs])

	def _callback(op):
	if "conv3d_ncdhw" in op.tag:
	schedule_direct_conv3d_cuda(cfg, s, op.output(0), "NCDHW", "conv3d_ncdhw.cuda")

	traverse_inline(s, outs[0].op, _callback)
	return s


	@autotvm.register_topi_compute("conv3d_ndhwc.cuda")
	def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"):
	"""Conv3d operator in NDHWC layout for cuda backend.

	Parameters
	----------
	Input : tvm.te.Tensor
	5-D with shape [batch, in_depth, in_height, in_width, in_channel]

	Filter : tvm.te.Tensor
	5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter]

	stride : int or a list/tuple of three ints
	Stride size, or [stride_depth, stride_height, stride_width]

	padding : int or str
	Padding size, or ['VALID', 'SAME']

	dilation: int or a list/tuple of three ints
	dilation size, or [dilation_depth, dilation_height, dilation_width]

	groups: int
	Number of groups

	Returns
	-------
	Output : tvm.te.Tensor
	5-D with shape [batch, out_depth, out_height, out_width, out_channel]
	"""
	return nn.conv3d_ndhwc(data, kernel, strides, padding, dilation, groups, out_dtype)


	@autotvm.register_topi_schedule("conv3d_ndhwc.cuda")
	def schedule_conv3d_ndhwc(cfg, outs):
	"""TOPI schedule callback of conv3d for cuda gpu

	Parameters
	----------
	cfg: ConfigEntity
	The config for this template

	outs: Array of Tensor
	The computation graph description of conv3d
	in the format of an array of tensors.

	Returns
	-------
	s: Schedule
	The computation schedule for conv2d.
	"""
	outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
	s = te.create_schedule([x.op for x in outs])

	def _callback(op):
	if "conv3d_ndhwc" in op.tag:
	schedule_direct_conv3d_cuda(cfg, s, op.output(0), "NDHWC", "conv3d_ndhwc.cuda")

	traverse_inline(s, outs[0].op, _callback)
	return s


	@autotvm.register_topi_compute("conv3d_cudnn.cuda")
	def conv3d_cudnn(
	cfg, data, kernel, strides, padding, dilation, groups, layout="NCDHW", out_dtype="float32"
	):
	"""Conv3D operator for cuda backend.

	Parameters
	----------
	cfg: ConfigEntity
	The config for this template

	data : tvm.te.Tensor
	5-D with shape [batch, in_channel, in_depth, in_height, in_width]

	kernel : tvm.te.Tensor
	5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

	strides : int or a list/tuple of three ints
	stride size, or [stride_depth, stride_height, stride_width]

	padding : int or a list/tuple of three ints
	padding size, or [pad_depth, pad_height, pad_width]

	dilation: int or a list/tuple of three ints
	dilation size, or [dilation_depth, dilation_height, dilation_width]

	layout : str
	layout of data

	out_dtype: str
	The output type. This is used for mixed precision.

	Returns
	-------
	output : tvm.te.Tensor
	5-D with shape [batch, out_channel, out_depth, out_height, out_width]
	"""
	if layout == "NCDHW":
	tensor_format = 0 # CUDNN_TENSOR_NCHW
	N, _, D, H, W = get_const_tuple(data.shape)
	elif layout == "NDHWC":
	tensor_format = 1 # CUDNN_TENSOR_NHWC
	N, D, H, W, _ = get_const_tuple(data.shape)
	else:
	raise ValueError(f"Unsupported layout {layout} in cudnn")
	CO, CI, KD, KH, KW = get_const_tuple(kernel.shape)

	assert groups == 1, "conv3d_cudnn does not support groups"

	# handle dilation
	stride_d, stride_h, stride_w = (
	(strides, strides, strides) if isinstance(strides, int) else strides
	)
	pad_d, pad_h, pad_w = (padding, padding, padding) if isinstance(padding, int) else padding
	dilation_d, dilation_h, dilation_w = (
	(dilation, dilation, dilation) if isinstance(dilation, int) else dilation
	)

	OD = (D + 2 * pad_d - KD) // stride_d + 1
	OH = (H + 2 * pad_h - KH) // stride_h + 1
	OW = (W + 2 * pad_w - KW) // stride_w + 1

	if isinstance(N, int):
	cfg.add_flop(
	2
	* N
	* OD
	* OH
	* OW
	* CO
	* CI
	* ((KD - 1) * dilation_d + 1)
	* ((KH - 1) * dilation_h + 1)
	* ((KW - 1) * dilation_w + 1)
	)

	cfg.define_knob("algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT")))
	if cfg.is_fallback:
	if cudnn.exists():
	# Let CUDNN choose the best algo, based on benchmarks run
	# on the local machine. In the future, this should be
	# based on parameters stored in the Target.
	cfg["algo"] = OtherOptionEntity(-1)
	else:
	cfg["algo"] = OtherOptionEntity(0)

	return cudnn.conv_forward(
	data,
	kernel,
	[pad_d, pad_h, pad_w],
	[stride_d, stride_h, stride_w],
	[dilation_d, dilation_h, dilation_w],
	conv_mode=1,
	tensor_format=tensor_format,
	algo=cfg["algo"].val,
	conv_dtype=dtype,
	)


	@autotvm.register_topi_schedule("conv3d_cudnn.cuda")
	def schedule_conv3d_cudnn(_, outs):
	"""TOPI schedule callback of conv3d for cuda gpu

	Parameters
	----------
	cfg: ConfigEntity
	The config for this template

	outs: Array of Tensor
	The computation graph description of conv2d
	in the format of an array of tensors.

	Returns
	-------
	s: Schedule
	The computation schedule for conv2d.
	"""
	return generic.schedule_extern(outs)