python/vta/top/op.py - tvm-vta - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=unused-argument, ungrouped-imports
 """Namespace for supporting Relay operators on VTA."""
 from __future__ import absolute_import as _abs

 import tvm
 import topi

 from tvm.relay.op import op as reg
 from tvm.relay.op.op import OpPattern
 from tvm.relay.op.nn import _nn

 from .util import is_packed_layout
 from ..environment import get_env


 # override to force partition at copy
 reg.register_pattern("copy", OpPattern.INJECTIVE, level=15)


 @reg.register_compute("clip", level=15)
 def compute_clip(attrs, inputs, output_type, target):
     """ Clip operator. """
     x = inputs[0]
     a_min = attrs.a_min
     a_max = attrs.a_max
     const_min = tvm.const(a_min, x.dtype)
     const_max = tvm.const(a_max, x.dtype)
     with tvm.tag_scope(topi.tag.ELEMWISE):
         x = tvm.compute(
             x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
         x = tvm.compute(
             x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
     return [x]


 @reg.register_compute("nn.conv2d", level=15)
 def compute_conv2d(attrs, inputs, output_type, target):
     """ Compute definition of conv2d """
     padding = topi.util.get_const_tuple(attrs.padding)
     strides = topi.util.get_const_tuple(attrs.strides)
     dilation = tuple([int(d) for d in attrs.dilation])
     groups = attrs.groups
     layout = attrs.data_layout
     out_dtype = attrs.out_dtype

     if target.device_name == "vta":
         assert dilation == (1, 1), "support for dilation limited to (1, 1)"
         if is_packed_layout(layout):
             if groups == 1:
                 assert groups == 1
                 env = get_env()
                 assert env.LOG_INP_WIDTH == 3, "only support 8bit inp for now"
                 assert env.LOG_WGT_WIDTH == 3, "only support 8bit wgt for now"
                 inputs = list(inputs)
                 assert inputs[1].dtype == "int8"
                 return [topi.nn.conv2d(inputs[0],
                                        inputs[1],
                                        strides,
                                        padding,
                                        dilation,
                                        layout,
                                        out_dtype)]
             return [topi.nn.group_conv2d_nchw(inputs[0],
                                               inputs[1],
                                               strides,
                                               padding,
                                               dilation,
                                               groups,
                                               out_dtype)]
         # If it's not packed, run on ARM CPU
         with tvm.target.arm_cpu(tvm.target.current_target().model):
             return _nn.compute_conv2d(attrs, inputs, output_type, target)

     # If VTA is not the target, default to _nn def
     return _nn.compute_conv2d(attrs, inputs, output_type, target)


 @reg.register_schedule("nn.conv2d", level=15)
 def schedule_conv2d(attrs, outs, target):
     """ Schedule definition of conv2d """
     groups = attrs.groups
     layout = attrs.data_layout

     if target.device_name == "vta":
         if is_packed_layout(layout):
             target = tvm.target.create(target)
             assert target.device_name == "vta"
             if groups == 1:
                 return topi.generic.schedule_conv2d_nchw(outs)
             return topi.generic.schedule_group_conv2d_nchw(outs)
         # If it's not packed, run on ARM CPU
         with tvm.target.arm_cpu(tvm.target.current_target().model):
             return _nn.schedule_conv2d(attrs, outs, tvm.target.current_target())

     # If VTA is not the target, default to _nn def
     return _nn.schedule_conv2d(attrs, outs, target)


 @reg.register_compute("nn.conv2d_transpose", level=15)
 def compute_conv2d_transpose(attrs, inputs, output_type, target):
     """ 2D convolution algorithm.
     """
     padding = topi.util.get_const_tuple(attrs.padding)
     strides = topi.util.get_const_tuple(attrs.strides)
     dilation = tuple([int(d) for d in attrs.dilation])
     layout = attrs.data_layout
     out_dtype = attrs.out_dtype

     if target.device_name == "vta":
         assert dilation == (1, 1), "support for dilation limited to (1, 1)"
         if is_packed_layout(layout):
             return [topi.nn.conv2d_transpose_nchw(
                 inputs[0], inputs[1], strides, padding, out_dtype)]
         # If it's not packed, run on ARM CPU
         with tvm.target.arm_cpu(tvm.target.current_target().model):
             return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)

     # If VTA is not the target, default to _nn def
     return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)


 @reg.register_schedule("nn.conv2d_transpose", level=15)
 def schedule_conv2d_transpose(attrs, outputs, target):
     """ 2D convolution schedule.
     """
     layout = attrs.data_layout

     if target.device_name == "vta":
         if is_packed_layout(layout):
             return topi.nn.schedule_conv2d_transpose_nchw(outputs)
         # If it's not packed, run on ARM CPU
         with tvm.target.arm_cpu(tvm.target.current_target().model):
             return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())

     # If VTA is not the target, default to _nn def
     return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())


 @reg.register_compute("nn.dense", level=15)
 def compute_dense(attrs, inputs, out_type, target):
     """Compute definition of dense"""
     out_dtype = attrs.out_dtype
     out_dtype = inputs[0].dtype if out_dtype == "" else out_dtype

     if target.device_name == "vta":
         if inputs[0].shape == 4: # this implies the layout is packed
             target = tvm.target.create(target)
             return [topi.nn.dense(inputs[0], inputs[1], None, out_dtype)]
         # If it's not packed, run on ARM CPU
         with tvm.target.arm_cpu(tvm.target.current_target().model):
             return _nn.compute_dense(attrs, inputs, out_type, target)

     # If VTA is not the target, default to _nn def
     return _nn.compute_dense(attrs, inputs, out_type, target)


 @reg.register_schedule("nn.dense", level=15)
 def schedule_dense(attrs, outs, target):
     """Schedule definition of dense"""
     if target.device_name == "vta":
         if outs[0].shape == 4: # this implies the layout is packed
             target = tvm.target.create(target)
             assert target.device_name == "vta"
             return topi.generic.schedule_dense(outs)
         # If it's not packed, run on ARM CPU
         with tvm.target.arm_cpu(tvm.target.current_target().model):
             return _nn.schedule_dense(attrs, outs, tvm.target.current_target())

     # If VTA is not the target, default to _nn def
     return _nn.schedule_dense(attrs, outs, target)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	# pylint: disable=unused-argument, ungrouped-imports
	"""Namespace for supporting Relay operators on VTA."""
	from __future__ import absolute_import as _abs

	import tvm
	import topi

	from tvm.relay.op import op as reg
	from tvm.relay.op.op import OpPattern
	from tvm.relay.op.nn import _nn

	from .util import is_packed_layout
	from ..environment import get_env


	# override to force partition at copy
	reg.register_pattern("copy", OpPattern.INJECTIVE, level=15)


	@reg.register_compute("clip", level=15)
	def compute_clip(attrs, inputs, output_type, target):
	""" Clip operator. """
	x = inputs[0]
	a_min = attrs.a_min
	a_max = attrs.a_max
	const_min = tvm.const(a_min, x.dtype)
	const_max = tvm.const(a_max, x.dtype)
	with tvm.tag_scope(topi.tag.ELEMWISE):
	x = tvm.compute(
	x.shape, lambda i: tvm.min(x(i), const_max), name="clipA")
	x = tvm.compute(
	x.shape, lambda i: tvm.max(x(i), const_min), name="clipB")
	return [x]


	@reg.register_compute("nn.conv2d", level=15)
	def compute_conv2d(attrs, inputs, output_type, target):
	""" Compute definition of conv2d """
	padding = topi.util.get_const_tuple(attrs.padding)
	strides = topi.util.get_const_tuple(attrs.strides)
	dilation = tuple([int(d) for d in attrs.dilation])
	groups = attrs.groups
	layout = attrs.data_layout
	out_dtype = attrs.out_dtype

	if target.device_name == "vta":
	assert dilation == (1, 1), "support for dilation limited to (1, 1)"
	if is_packed_layout(layout):
	if groups == 1:
	assert groups == 1
	env = get_env()
	assert env.LOG_INP_WIDTH == 3, "only support 8bit inp for now"
	assert env.LOG_WGT_WIDTH == 3, "only support 8bit wgt for now"
	inputs = list(inputs)
	assert inputs[1].dtype == "int8"
	return [topi.nn.conv2d(inputs[0],
	inputs[1],
	strides,
	padding,
	dilation,
	layout,
	out_dtype)]
	return [topi.nn.group_conv2d_nchw(inputs[0],
	inputs[1],
	strides,
	padding,
	dilation,
	groups,
	out_dtype)]
	# If it's not packed, run on ARM CPU
	with tvm.target.arm_cpu(tvm.target.current_target().model):
	return _nn.compute_conv2d(attrs, inputs, output_type, target)

	# If VTA is not the target, default to _nn def
	return _nn.compute_conv2d(attrs, inputs, output_type, target)


	@reg.register_schedule("nn.conv2d", level=15)
	def schedule_conv2d(attrs, outs, target):
	""" Schedule definition of conv2d """
	groups = attrs.groups
	layout = attrs.data_layout

	if target.device_name == "vta":
	if is_packed_layout(layout):
	target = tvm.target.create(target)
	assert target.device_name == "vta"
	if groups == 1:
	return topi.generic.schedule_conv2d_nchw(outs)
	return topi.generic.schedule_group_conv2d_nchw(outs)
	# If it's not packed, run on ARM CPU
	with tvm.target.arm_cpu(tvm.target.current_target().model):
	return _nn.schedule_conv2d(attrs, outs, tvm.target.current_target())

	# If VTA is not the target, default to _nn def
	return _nn.schedule_conv2d(attrs, outs, target)


	@reg.register_compute("nn.conv2d_transpose", level=15)
	def compute_conv2d_transpose(attrs, inputs, output_type, target):
	""" 2D convolution algorithm.
	"""
	padding = topi.util.get_const_tuple(attrs.padding)
	strides = topi.util.get_const_tuple(attrs.strides)
	dilation = tuple([int(d) for d in attrs.dilation])
	layout = attrs.data_layout
	out_dtype = attrs.out_dtype

	if target.device_name == "vta":
	assert dilation == (1, 1), "support for dilation limited to (1, 1)"
	if is_packed_layout(layout):
	return [topi.nn.conv2d_transpose_nchw(
	inputs[0], inputs[1], strides, padding, out_dtype)]
	# If it's not packed, run on ARM CPU
	with tvm.target.arm_cpu(tvm.target.current_target().model):
	return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)

	# If VTA is not the target, default to _nn def
	return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)


	@reg.register_schedule("nn.conv2d_transpose", level=15)
	def schedule_conv2d_transpose(attrs, outputs, target):
	""" 2D convolution schedule.
	"""
	layout = attrs.data_layout

	if target.device_name == "vta":
	if is_packed_layout(layout):
	return topi.nn.schedule_conv2d_transpose_nchw(outputs)
	# If it's not packed, run on ARM CPU
	with tvm.target.arm_cpu(tvm.target.current_target().model):
	return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())

	# If VTA is not the target, default to _nn def
	return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.current_target())


	@reg.register_compute("nn.dense", level=15)
	def compute_dense(attrs, inputs, out_type, target):
	"""Compute definition of dense"""
	out_dtype = attrs.out_dtype
	out_dtype = inputs[0].dtype if out_dtype == "" else out_dtype

	if target.device_name == "vta":
	if inputs[0].shape == 4: # this implies the layout is packed
	target = tvm.target.create(target)
	return [topi.nn.dense(inputs[0], inputs[1], None, out_dtype)]
	# If it's not packed, run on ARM CPU
	with tvm.target.arm_cpu(tvm.target.current_target().model):
	return _nn.compute_dense(attrs, inputs, out_type, target)

	# If VTA is not the target, default to _nn def
	return _nn.compute_dense(attrs, inputs, out_type, target)


	@reg.register_schedule("nn.dense", level=15)
	def schedule_dense(attrs, outs, target):
	"""Schedule definition of dense"""
	if target.device_name == "vta":
	if outs[0].shape == 4: # this implies the layout is packed
	target = tvm.target.create(target)
	assert target.device_name == "vta"
	return topi.generic.schedule_dense(outs)
	# If it's not packed, run on ARM CPU
	with tvm.target.arm_cpu(tvm.target.current_target().model):
	return _nn.schedule_dense(attrs, outs, tvm.target.current_target())

	# If VTA is not the target, default to _nn def
	return _nn.schedule_dense(attrs, outs, target)