blob: ae77f00fb8a9f9cb4df36c9dbd111294726ab538 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, ungrouped-imports
"""Namespace for supporting Relay operators on VTA."""
from __future__ import absolute_import as _abs
import tvm
import topi
from tvm.relay.op import op as reg
from tvm.relay.op.op import OpPattern
from tvm.relay.op.nn import _nn
from .util import is_packed_layout
from ..environment import get_env
# override to force partition at copy
reg.register_pattern("copy", OpPattern.INJECTIVE, level=15)
@reg.register_compute("clip", level=15)
def compute_clip(attrs, inputs, output_type, target):
""" Clip operator. """
x = inputs[0]
a_min = attrs.a_min
a_max = attrs.a_max
const_min = tvm.const(a_min, x.dtype)
const_max = tvm.const(a_max, x.dtype)
with tvm.tag_scope(topi.tag.ELEMWISE):
x = tvm.compute(
x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
x = tvm.compute(
x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
return [x]
@reg.register_compute("nn.conv2d", level=15)
def compute_conv2d(attrs, inputs, output_type, target):
""" Compute definition of conv2d """
padding = topi.util.get_const_tuple(attrs.padding)
strides = topi.util.get_const_tuple(attrs.strides)
dilation = tuple([int(d) for d in attrs.dilation])
groups = attrs.groups
layout = attrs.data_layout
out_dtype = attrs.out_dtype
if target.device_name == "vta":
assert dilation == (1, 1), "support for dilation limited to (1, 1)"
if is_packed_layout(layout):
if groups == 1:
assert groups == 1
env = get_env()
assert env.LOG_INP_WIDTH == 3, "only support 8bit inp for now"
assert env.LOG_WGT_WIDTH == 3, "only support 8bit wgt for now"
inputs = list(inputs)
assert inputs[1].dtype == "int8"
return [topi.nn.conv2d(inputs[0],
inputs[1],
strides,
padding,
dilation,
layout,
out_dtype)]
return [topi.nn.group_conv2d_nchw(inputs[0],
inputs[1],
strides,
padding,
dilation,
groups,
out_dtype)]
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.current_target().model):
return _nn.compute_conv2d(attrs, inputs, output_type, target)
# If VTA is not the target, default to _nn def
return _nn.compute_conv2d(attrs, inputs, output_type, target)
@reg.register_schedule("nn.conv2d", level=15)
def schedule_conv2d(attrs, outs, target):
""" Schedule definition of conv2d """
groups = attrs.groups
layout = attrs.data_layout
if target.device_name == "vta":
if is_packed_layout(layout):
target = tvm.target.create(target)
assert target.device_name == "vta"
if groups == 1:
return topi.generic.schedule_conv2d_nchw(outs)
return topi.generic.schedule_group_conv2d_nchw(outs)
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.current_target().model):
return _nn.schedule_conv2d(attrs, outs, tvm.target.current_target())
# If VTA is not the target, default to _nn def
return _nn.schedule_conv2d(attrs, outs, target)
@reg.register_compute("nn.conv2d_transpose", level=15)
def compute_conv2d_transpose(attrs, inputs, output_type, target):
""" 2D convolution algorithm.
"""
padding = topi.util.get_const_tuple(attrs.padding)
strides = topi.util.get_const_tuple(attrs.strides)
dilation = tuple([int(d) for d in attrs.dilation])
layout = attrs.data_layout
out_dtype = attrs.out_dtype
if target.device_name == "vta":
assert dilation == (1, 1), "support for dilation limited to (1, 1)"
if is_packed_layout(layout):
return [topi.nn.conv2d_transpose_nchw(
inputs[0], inputs[1], strides, padding, out_dtype)]
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.current_target().model):
return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)
# If VTA is not the target, default to _nn def
return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)
@reg.register_schedule("nn.conv2d_transpose", level=15)
def schedule_conv2d_transpose(attrs, outputs, target):
""" 2D convolution schedule.
"""
layout = attrs.data_layout
if target.device_name == "vta":
if is_packed_layout(layout):
return topi.nn.schedule_conv2d_transpose_nchw(outputs)
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.current_target().model):
return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())
# If VTA is not the target, default to _nn def
return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())
@reg.register_compute("nn.dense", level=15)
def compute_dense(attrs, inputs, out_type, target):
"""Compute definition of dense"""
out_dtype = attrs.out_dtype
out_dtype = inputs[0].dtype if out_dtype == "" else out_dtype
if target.device_name == "vta":
if inputs[0].shape == 4: # this implies the layout is packed
target = tvm.target.create(target)
return [topi.nn.dense(inputs[0], inputs[1], None, out_dtype)]
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.current_target().model):
return _nn.compute_dense(attrs, inputs, out_type, target)
# If VTA is not the target, default to _nn def
return _nn.compute_dense(attrs, inputs, out_type, target)
@reg.register_schedule("nn.dense", level=15)
def schedule_dense(attrs, outs, target):
"""Schedule definition of dense"""
if target.device_name == "vta":
if outs[0].shape == 4: # this implies the layout is packed
target = tvm.target.create(target)
assert target.device_name == "vta"
return topi.generic.schedule_dense(outs)
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.current_target().model):
return _nn.schedule_dense(attrs, outs, tvm.target.current_target())
# If VTA is not the target, default to _nn def
return _nn.schedule_dense(attrs, outs, target)