blob: 7a5e8ce69cb454a95a0930a47bb4271c276482c8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name, unused-argument
"""Compute definition for conv3d with cuda backend"""
from tvm import te
from tvm import autotvm
from tvm.contrib import cudnn
from .. import nn, generic
from ..utils import get_const_tuple, traverse_inline
from .conv3d_direct import schedule_direct_conv3d_cuda
@autotvm.register_topi_compute("conv3d_ncdhw.cuda")
def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"):
"""Conv3D operator in NCDHW layout for cuda backend.
Parameters
----------
cfg: ConfigEntity
The config for this template
data : tvm.te.Tensor
5-D with shape [batch, in_channel, in_depth, in_height, in_width]
kernel : tvm.te.Tensor
5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]
strides : int or a list/tuple of three ints
stride size, or [stride_depth, stride_height, stride_width]
padding : int or a list/tuple of three ints
padding size, or [pad_depth, pad_height, pad_width]
dilation: int or a list/tuple of three ints
dilation size, or [dilation_depth, dilation_height, dilation_width]
groups: int
Number of groups
out_dtype: str
The output type. This is used for mixed precision.
Returns
-------
output : tvm.te.Tensor
5-D with shape [batch, out_channel, out_depth, out_height, out_width]
"""
return nn.conv3d_ncdhw(data, kernel, strides, padding, dilation, groups, out_dtype)
@autotvm.register_topi_schedule("conv3d_ncdhw.cuda")
def schedule_conv3d_ncdhw(cfg, outs):
"""TOPI schedule callback of conv3d for cuda gpu
Parameters
----------
cfg: ConfigEntity
The config for this template
outs: Array of Tensor
The computation graph description of conv2d
in the format of an array of tensors.
Returns
-------
s: Schedule
The computation schedule for conv2d.
"""
outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
s = te.create_schedule([x.op for x in outs])
def _callback(op):
if "conv3d_ncdhw" in op.tag:
schedule_direct_conv3d_cuda(cfg, s, op.output(0), "NCDHW", "conv3d_ncdhw.cuda")
traverse_inline(s, outs[0].op, _callback)
return s
@autotvm.register_topi_compute("conv3d_ndhwc.cuda")
def conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"):
"""Conv3d operator in NDHWC layout for cuda backend.
Parameters
----------
Input : tvm.te.Tensor
5-D with shape [batch, in_depth, in_height, in_width, in_channel]
Filter : tvm.te.Tensor
5-D with shape [filter_depth, filter_height, filter_width, in_channel, num_filter]
stride : int or a list/tuple of three ints
Stride size, or [stride_depth, stride_height, stride_width]
padding : int or str
Padding size, or ['VALID', 'SAME']
dilation: int or a list/tuple of three ints
dilation size, or [dilation_depth, dilation_height, dilation_width]
groups: int
Number of groups
Returns
-------
Output : tvm.te.Tensor
5-D with shape [batch, out_depth, out_height, out_width, out_channel]
"""
return nn.conv3d_ndhwc(data, kernel, strides, padding, dilation, groups, out_dtype)
@autotvm.register_topi_schedule("conv3d_ndhwc.cuda")
def schedule_conv3d_ndhwc(cfg, outs):
"""TOPI schedule callback of conv3d for cuda gpu
Parameters
----------
cfg: ConfigEntity
The config for this template
outs: Array of Tensor
The computation graph description of conv3d
in the format of an array of tensors.
Returns
-------
s: Schedule
The computation schedule for conv2d.
"""
outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
s = te.create_schedule([x.op for x in outs])
def _callback(op):
if "conv3d_ndhwc" in op.tag:
schedule_direct_conv3d_cuda(cfg, s, op.output(0), "NDHWC", "conv3d_ndhwc.cuda")
traverse_inline(s, outs[0].op, _callback)
return s
@autotvm.register_topi_compute("conv3d_cudnn.cuda")
def conv3d_cudnn(
cfg, data, kernel, strides, padding, dilation, groups, layout="NCDHW", out_dtype="float32"
):
"""Conv3D operator for cuda backend.
Parameters
----------
cfg: ConfigEntity
The config for this template
data : tvm.te.Tensor
5-D with shape [batch, in_channel, in_depth, in_height, in_width]
kernel : tvm.te.Tensor
5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]
strides : int or a list/tuple of three ints
stride size, or [stride_depth, stride_height, stride_width]
padding : int or a list/tuple of three ints
padding size, or [pad_depth, pad_height, pad_width]
dilation: int or a list/tuple of three ints
dilation size, or [dilation_depth, dilation_height, dilation_width]
layout : str
layout of data
out_dtype: str
The output type. This is used for mixed precision.
Returns
-------
output : tvm.te.Tensor
5-D with shape [batch, out_channel, out_depth, out_height, out_width]
"""
if layout == "NCDHW":
tensor_format = 0 # CUDNN_TENSOR_NCHW
N, _, D, H, W = get_const_tuple(data.shape)
elif layout == "NDHWC":
tensor_format = 1 # CUDNN_TENSOR_NHWC
N, D, H, W, _ = get_const_tuple(data.shape)
else:
raise ValueError(f"Unsupported layout {layout} in cudnn")
CO, CI, KD, KH, KW = get_const_tuple(kernel.shape)
assert groups == 1, "conv3d_cudnn does not support groups"
# handle dilation
stride_d, stride_h, stride_w = (
(strides, strides, strides) if isinstance(strides, int) else strides
)
pad_d, pad_h, pad_w = (padding, padding, padding) if isinstance(padding, int) else padding
dilation_d, dilation_h, dilation_w = (
(dilation, dilation, dilation) if isinstance(dilation, int) else dilation
)
OD = (D + 2 * pad_d - KD) // stride_d + 1
OH = (H + 2 * pad_h - KH) // stride_h + 1
OW = (W + 2 * pad_w - KW) // stride_w + 1
if isinstance(N, int):
cfg.add_flop(
2
* N
* OD
* OH
* OW
* CO
* CI
* ((KD - 1) * dilation_d + 1)
* ((KH - 1) * dilation_h + 1)
* ((KW - 1) * dilation_w + 1)
)
cfg.define_knob("algo", range(cudnn.algo_to_index("fwd", "CUDNN_CONVOLUTION_FWD_ALGO_COUNT")))
if cfg.is_fallback:
if cudnn.exists():
# Let CUDNN choose the best algo, based on benchmarks run
# on the local machine. In the future, this should be
# based on parameters stored in the Target.
cfg["algo"] = OtherOptionEntity(-1)
else:
cfg["algo"] = OtherOptionEntity(0)
return cudnn.conv_forward(
data,
kernel,
[pad_d, pad_h, pad_w],
[stride_d, stride_h, stride_w],
[dilation_d, dilation_h, dilation_w],
conv_mode=1,
tensor_format=tensor_format,
algo=cfg["algo"].val,
conv_dtype=dtype,
)
@autotvm.register_topi_schedule("conv3d_cudnn.cuda")
def schedule_conv3d_cudnn(_, outs):
"""TOPI schedule callback of conv3d for cuda gpu
Parameters
----------
cfg: ConfigEntity
The config for this template
outs: Array of Tensor
The computation graph description of conv2d
in the format of an array of tensors.
Returns
-------
s: Schedule
The computation schedule for conv2d.
"""
return generic.schedule_extern(outs)