blob: 2198ed4c191f7279f82f0c6140be209cc0081518 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, ungrouped-imports
"""Namespace for supporting Relay operators on VTA."""
from __future__ import absolute_import as _abs
import tvm
from tvm import te
import topi
from tvm.relay.op import op as reg
from tvm.relay.op import strategy as _strategy
from tvm.relay.op.op import OpPattern, OpStrategy
from .util import is_packed_layout
from .vta_conv2d import conv2d_packed, schedule_conv2d_packed
from .vta_conv2d_transpose import conv2d_transpose_packed, schedule_conv2d_transpose_packed
from .vta_group_conv2d import group_conv2d_packed, schedule_group_conv2d_packed
from .vta_dense import dense_packed, schedule_dense_packed
from ..environment import get_env
# override to force partition at copy
reg.register_pattern("copy", OpPattern.INJECTIVE, level=15)
# add clip vta strategy
def compute_clip_vta(attrs, inputs, output_type):
""" Clip operator. """
x = inputs[0]
a_min = attrs.a_min
a_max = attrs.a_max
const_min = tvm.tir.const(a_min, x.dtype)
const_max = tvm.tir.const(a_max, x.dtype)
with tvm.te.tag_scope(topi.tag.ELEMWISE):
x = te.compute(
x.shape, lambda *i: tvm.te.min(x(*i), const_max), name="clipA")
x = te.compute(
x.shape, lambda *i: tvm.te.max(x(*i), const_min), name="clipB")
return [x]
def clip_strategy_vta(attrs, inputs, out_type, target):
strategy = OpStrategy()
strategy.add_implementation(
compute_clip_vta,
_strategy.wrap_topi_schedule(topi.generic.schedule_injective),
name="clip.vta")
return strategy
reg.get("clip").get_attr("FTVMStrategy").register(clip_strategy_vta, "vta")
@_strategy.conv2d_strategy.register("vta")
def conv2d_strategy_vta(attrs, inputs, out_type, target):
"""conv2d vta strategy"""
strategy = OpStrategy()
kernel = inputs[1]
dilation = topi.util.get_const_tuple(attrs.dilation)
groups = attrs.groups
layout = attrs.data_layout
assert dilation == (1, 1), "support for dilation limited to (1, 1)"
if is_packed_layout(layout):
if groups == 1:
env = get_env()
assert env.LOG_INP_WIDTH == 3, "only support 8bit inp for now"
assert env.LOG_WGT_WIDTH == 3, "only support 8bit wgt for now"
assert kernel.dtype == "int8"
strategy.add_implementation(
_strategy.wrap_compute_conv2d(conv2d_packed, True),
_strategy.wrap_topi_schedule(schedule_conv2d_packed),
name="conv2d_packed.vta")
else: # group_conv2d
strategy.add_implementation(
_strategy.wrap_compute_conv2d(group_conv2d_packed, has_groups=True),
_strategy.wrap_topi_schedule(schedule_group_conv2d_packed),
name="group_conv2d_packed.vta")
return strategy
# If it's not packed, run on ARM CPU
arm_tgt = tvm.target.arm_cpu(target.model)
return _strategy.arm_cpu.conv2d_strategy_arm_cpu(attrs, inputs, out_type, arm_tgt)
@_strategy.conv2d_transpose_strategy.register("vta")
def conv2d_transpose_strategy_vta(attrs, inputs, out_type, target):
"""conv2d_transpose vta strategy"""
dilation = topi.util.get_const_tuple(attrs.dilation)
layout = attrs.data_layout
assert dilation == (1, 1), "support for dilation limited to (1, 1)"
if is_packed_layout(layout):
strategy = OpStrategy()
strategy.add_implementation(
_strategy.wrap_compute_conv2d_transpose(conv2d_transpose_packed),
_strategy.wrap_topi_schedule(schedule_conv2d_transpose_packed),
name="conv2d_transpose_packed.vta")
return strategy
# If it's not packed, run on ARM CPU
arm_tgt = tvm.target.arm_cpu(target.model)
return _strategy.arm_cpu.conv2d_transpose_strategy_arm_cpu(attrs, inputs, out_type, arm_tgt)
@_strategy.dense_strategy.register("vta")
def dense_strategy_vta(attrs, inputs, out_type, target):
"""dense vta strategy"""
if inputs[0].shape == 4: # this implies the layout is packed
strategy = OpStrategy()
strategy.add_implementation(
_strategy.wrap_compute_dense(dense_packed),
_strategy.wrap_topi_schedule(schedule_dense_packed),
name="dense_packed.vta")
return strategy
# If it's not packed, run on ARM CPU
arm_tgt = tvm.target.arm_cpu(target.model)
return _strategy.x86.dense_strategy_cpu(attrs, inputs, out_type, arm_tgt)