| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # pylint: disable=invalid-name, unused-argument |
| """Arm Compute Library supported operators.""" |
| import tvm |
| from tvm import relay |
| from tvm._ffi import register_func |
| from tvm.relay import transform |
| from tvm.relay.build_module import bind_params_by_name |
| from tvm.relay.expr import const |
| |
| from ...dataflow_pattern import is_constant, is_expr, is_op, wildcard |
| from ..strategy.generic import is_depthwise_conv2d |
| from .register import register_pattern_table |
| |
| |
| def is_arm_compute_runtime_enabled(): |
| """Check if the ACL graph executor is present. |
| |
| Returns |
| ------- |
| ret: bool |
| True if present, False if not. |
| """ |
| check_enabled = tvm.get_global_func("relay.op.is_arm_compute_runtime_enabled", True) |
| if check_enabled: |
| return check_enabled() |
| return False |
| |
| |
| def partition_for_arm_compute_lib(mod, params=None, **opts): |
| """Partition the graph greedily offloading supported |
| operators to Arm Compute Library. |
| |
| Parameters |
| ---------- |
| mod : Module |
| The module to run passes on. |
| params : Optional[Dict[str, NDArray]] |
| Constant input parameters. |
| |
| Returns |
| ------- |
| ret : annotated and partitioned module. |
| """ |
| if params: |
| mod["main"] = bind_params_by_name(mod["main"], params) |
| |
| seq = tvm.transform.Sequential( |
| [ |
| transform.InferType(), |
| transform.MergeComposite(arm_compute_lib_pattern_table()), |
| transform.AnnotateTarget("arm_compute_lib", False), |
| transform.PartitionGraph(), |
| ] |
| ) |
| |
| return seq(mod) |
| |
| |
| @register_func("relay.ext.arm_compute_lib.optimize") |
| def preprocess_module(mod): |
| """ |
| Pre-process a module containing functions ready for ACL codegen. For now we enforce OHWI |
| kernel layout and fold the transforms away. |
| |
| Parameters |
| ---------- |
| mod : Module |
| The module to run passes on. |
| |
| Returns |
| ------- |
| preprocessed_mod : The processed module. |
| """ |
| |
| def convert_layout_conv2d(conv2d_function): |
| def convert_conv(attrs, inputs, tinfos, desired_layouts): |
| new_attrs = dict(attrs) |
| data_info = tinfos[0] |
| weight_info = tinfos[1] |
| desired_data_layout, desired_kernel_layout = map(str, desired_layouts) |
| new_attrs["data_layout"] = desired_data_layout |
| new_attrs["kernel_layout"] = desired_kernel_layout |
| |
| if is_depthwise_conv2d( |
| data_info.shape, |
| attrs["data_layout"], |
| weight_info.shape, |
| attrs["kernel_layout"], |
| attrs["groups"], |
| ): |
| dkl = desired_kernel_layout |
| new_attrs["kernel_layout"] = dkl[3] + dkl[1:3] + dkl[0] |
| return conv2d_function(*inputs, **new_attrs) |
| |
| return convert_conv |
| |
| with OpAttrContext( |
| "nn.conv2d", "FTVMConvertOpLayout", convert_layout_conv2d(tvm.relay.nn.conv2d) |
| ), OpAttrContext( |
| "qnn.conv2d", "FTVMConvertOpLayout", convert_layout_conv2d(tvm.relay.qnn.op.conv2d) |
| ): |
| seq = tvm.transform.Sequential( |
| [ |
| transform.ConvertLayout( |
| {"nn.conv2d": ["NHWC", "OHWI"], "qnn.conv2d": ["NHWC", "OHWI"]} |
| ), |
| transform.FoldConstant(), |
| ] |
| ) |
| preprocessed_mod = seq(mod) |
| return preprocessed_mod |
| |
| |
| @register_pattern_table("arm_compute_lib") |
| def arm_compute_lib_pattern_table(): |
| """Get the ACL pattern table.""" |
| |
| def conv_pattern(): |
| """Create a convolution pattern. |
| |
| Returns |
| ------- |
| pattern : dataflow_pattern.AltPattern |
| Denotes the convolution pattern. |
| """ |
| pattern = is_op("nn.pad")(wildcard(), wildcard()) | wildcard() |
| pattern = is_op("nn.conv2d")(pattern, is_constant()) |
| pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant())) |
| pattern = pattern.optional(is_op("nn.relu")) |
| return pattern |
| |
| def qnn_conv_pattern(): |
| """Create a quantized convolution pattern. |
| |
| Returns |
| ------- |
| pattern : dataflow_pattern.AltPattern |
| Denotes the convolution pattern. |
| """ |
| pattern = is_op("nn.pad")(wildcard(), wildcard()) | wildcard() |
| pattern = is_op("qnn.conv2d")( |
| pattern, is_constant(), is_constant(), is_constant(), is_constant(), is_constant() |
| ) |
| pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant())) |
| pattern = pattern.optional(is_op("nn.relu")) |
| pattern = is_op("qnn.requantize")( |
| pattern, wildcard(), wildcard(), is_constant(), is_constant() |
| ) |
| return pattern |
| |
| def dense_pattern(): |
| """Create a dense (fully-connected) pattern. |
| |
| Returns |
| ------- |
| pattern : dataflow_pattern.AltPattern |
| Denotes the convolution pattern. |
| """ |
| pattern = is_op("nn.dense")(wildcard(), is_constant()) |
| pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant())) |
| return pattern |
| |
| def qnn_dense_pattern(): |
| """Create a quantized dense (fully-connected) pattern. |
| |
| Returns |
| ------- |
| pattern : dataflow_pattern.AltPattern |
| Denotes the convolution pattern. |
| """ |
| pattern = is_op("qnn.dense")( |
| wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant() |
| ) |
| pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant())) |
| pattern = is_op("qnn.requantize")( |
| pattern, wildcard(), wildcard(), is_constant(), is_constant() |
| ) |
| return pattern |
| |
| def avg_pool2d_pattern(): |
| """Creates a pattern that matches either quantized |
| avg_pool2d or quantized global_avg_pool2d. |
| |
| Returns |
| ------- |
| pattern : dataflow_pattern.AltPattern |
| Denotes the convolution pattern. |
| """ |
| pattern = is_op("cast")(wildcard()) |
| pattern = is_op("nn.avg_pool2d")(pattern) | is_op("nn.global_avg_pool2d")(pattern) |
| pattern = is_op("cast")(pattern) |
| return pattern |
| |
| def l2_pool2d_pattern(): |
| """Create an l2 pooling pattern from equivalent relay operators. |
| |
| Returns |
| ------- |
| pattern : dataflow_pattern.AltPattern |
| Denotes the convolution pattern. |
| """ |
| pattern = is_op("power")(wildcard(), is_expr(const(2.0))) |
| pattern = is_op("nn.avg_pool2d")(pattern) |
| pattern = is_op("sqrt")(pattern) |
| return pattern |
| |
| def check_conv(extract): |
| """Check conv pattern is supported by ACL.""" |
| call = extract |
| while call.op.name != "nn.conv2d": |
| call = call.args[0] |
| return conv2d(call) |
| |
| def check_qnn_conv(extract): |
| """Check qnn conv pattern is supported by ACL.""" |
| if extract.attrs.out_dtype != "uint8": |
| return False |
| call = extract |
| while call.op.name != "qnn.conv2d": |
| call = call.args[0] |
| return qnn_conv2d(call) |
| |
| def check_dense(extract): |
| """Check conv pattern is supported by ACL.""" |
| call = extract |
| while call.op.name != "nn.dense": |
| call = call.args[0] |
| return dense(call) |
| |
| def check_qnn_dense(extract): |
| """Check qnn conv pattern is supported by ACL.""" |
| if extract.attrs.out_dtype != "uint8": |
| return False |
| call = extract |
| while call.op.name != "qnn.dense": |
| call = call.args[0] |
| return qnn_dense(call) |
| |
| def check_avg_pool2d(extract): |
| """Check average pool2d pattern is supported by ACL.""" |
| if extract.attrs.dtype != "uint8": |
| return False |
| pool = extract.args[0] |
| if pool.args[0].attrs.dtype != "int32": |
| return False |
| return avg_pool2d(pool, from_quantized_composite=True) |
| |
| def check_l2_pool2d(extract): |
| """Check l2 pool2d pattern is supported by ACL.""" |
| pool = extract.args[0] |
| return avg_pool2d(pool) |
| |
| return [ |
| ("arm_compute_lib.conv2d", conv_pattern(), check_conv), |
| ("arm_compute_lib.qnn_conv2d", qnn_conv_pattern(), check_qnn_conv), |
| ("arm_compute_lib.dense", dense_pattern(), check_dense), |
| ("arm_compute_lib.qnn_dense", qnn_dense_pattern(), check_qnn_dense), |
| ("arm_compute_lib.qnn_conv2d", qnn_conv_pattern(), check_qnn_conv), |
| ("arm_compute_lib.avg_pool2d", avg_pool2d_pattern(), check_avg_pool2d), |
| ("arm_compute_lib.l2_pool2d", l2_pool2d_pattern(), check_l2_pool2d), |
| ] |
| |
| |
| def _register_external_op_helper(op_name, supported=True): |
| @tvm.ir.register_op_attr(op_name, "target.arm_compute_lib") |
| def _func_wrapper(expr): |
| return supported |
| |
| return _func_wrapper |
| |
| |
| _register_external_op_helper("reshape") |
| |
| |
| @tvm.ir.register_op_attr("nn.conv2d", "target.arm_compute_lib") |
| def conv2d(expr): |
| """Check if the external ACL codegen for conv2d should be used.""" |
| attrs, args = expr.attrs, expr.args |
| if attrs.data_layout != "NHWC": |
| return False |
| if attrs.out_dtype != "float32" and attrs.out_dtype != "": |
| return False |
| data_typ = args[0].checked_type |
| if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "float32": |
| return False |
| kernel_typ = args[1].checked_type |
| if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "float32": |
| return False |
| is_depthwise = is_depthwise_conv2d( |
| data_typ.shape, |
| attrs["data_layout"], |
| kernel_typ.shape, |
| attrs["kernel_layout"], |
| attrs["groups"], |
| ) |
| if is_depthwise: |
| return depthwise_conv2d(attrs, args) |
| # ACL doesn't support grouped convolution |
| if attrs.groups != 1 and not is_depthwise: |
| return False |
| return True |
| |
| |
| def qnn_conv2d(expr): |
| """Check if the external ACL codegen for qnn.conv2d should be used.""" |
| attrs, args = expr.attrs, expr.args |
| |
| if attrs.data_layout != "NHWC": |
| return False |
| if attrs.out_dtype != "int32" and attrs.out_dtype != "": |
| return False |
| data_typ = args[0].checked_type |
| if len(data_typ.shape) != 4 or data_typ.shape[0] != 1 or data_typ.dtype != "uint8": |
| return False |
| kernel_typ = args[1].checked_type |
| if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "uint8": |
| return False |
| is_depthwise = is_depthwise_conv2d( |
| data_typ.shape, |
| attrs["data_layout"], |
| kernel_typ.shape, |
| attrs["kernel_layout"], |
| attrs["groups"], |
| ) |
| if is_depthwise: |
| return depthwise_conv2d(attrs, args) |
| # ACL doesn't support grouped convolution |
| if attrs.groups != 1 and not is_depthwise: |
| return False |
| return True |
| |
| |
| def depthwise_conv2d(attrs, args): |
| """Check if the external ACL codegen for depthwise convolution should be used. |
| |
| Note |
| ---- |
| Relay does not have a depthwise conv2d operator whilst ACL does. We simply |
| separate the checks for depthwise for clarity. |
| """ |
| kernel_typ = args[1].checked_type |
| # Only supports 3x3, 5x5 depthwise |
| if ( |
| kernel_typ.shape[0] not in [3, 5] |
| or kernel_typ.shape[1] not in [3, 5] |
| or kernel_typ.shape[0] != kernel_typ.shape[1] |
| ): |
| return False |
| # Stride must be (1, 1) or (2, 2) |
| if (attrs.strides[0], attrs.strides[1]) not in [(1, 1), (2, 2)]: |
| return False |
| return True |
| |
| |
| @tvm.ir.register_op_attr("nn.dense", "target.arm_compute_lib") |
| def dense(expr): |
| """Check if the external ACL codegen for dense should be used.""" |
| attrs, args = expr.attrs, expr.args |
| data_typ = args[0].checked_type |
| if data_typ.dtype != "float32": |
| return False |
| kernel_typ = args[1].checked_type |
| if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "float32": |
| return False |
| if attrs.out_dtype != "float32" and attrs.out_dtype != "": |
| return False |
| return True |
| |
| |
| def qnn_dense(expr): |
| """Check if the external ACL codegen for qnn.dense should be used.""" |
| attrs, args = expr.attrs, expr.args |
| data_typ = args[0].checked_type |
| if data_typ.dtype != "uint8": |
| return False |
| kernel_typ = args[1].checked_type |
| if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "uint8": |
| return False |
| if attrs.out_dtype != "int32": |
| return False |
| return True |
| |
| |
| @tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib") |
| def max_pool2d(expr): |
| """Check if the external ACL codegen for maxpool2d should be used.""" |
| attrs, args = expr.attrs, expr.args |
| if attrs.layout != "NHWC": |
| return False |
| typ = args[0].checked_type |
| if typ.dtype not in ["float32", "uint8"]: |
| return False |
| return True |
| |
| |
| @tvm.ir.register_op_attr("nn.avg_pool2d", "target.arm_compute_lib") |
| def avg_pool2d(expr, from_quantized_composite=False): |
| """Check if the external ACL codegen for avgpool2d should be used.""" |
| attrs, args = expr.attrs, expr.args |
| typ = args[0].checked_type |
| |
| if from_quantized_composite: |
| if typ.dtype != "int32": |
| return False |
| else: |
| if typ.dtype not in ["float32"]: |
| return False |
| if attrs.layout != "NHWC": |
| return False |
| |
| return True |
| |
| |
| @tvm.ir.register_op_attr("nn.global_max_pool2d", "target.arm_compute_lib") |
| def global_max_pool2d(expr): |
| """Check if the external ACL codegen for gloval_maxpool2d should be used.""" |
| attrs, args = expr.attrs, expr.args |
| typ = args[0].checked_type |
| if typ.dtype not in ["float32", "uint8"]: |
| return False |
| if attrs.layout != "NHWC": |
| return False |
| return True |
| |
| |
| @tvm.ir.register_op_attr("nn.global_avg_pool2d", "target.arm_compute_lib") |
| def global_avg_pool2d(expr): |
| """Check if the external ACL codegen for global_avgpool2d should be used.""" |
| attrs, args = expr.attrs, expr.args |
| typ = args[0].checked_type |
| if typ.dtype not in ["float32"]: |
| return False |
| if attrs.layout != "NHWC": |
| return False |
| return True |
| |
| |
| @tvm.ir.register_op_attr("maximum", "target.arm_compute_lib") |
| def maximum(expr): |
| """Check if the external ACL codegen for maximum should be used.""" |
| args = expr.args |
| type_a = args[0].checked_type |
| type_b = args[0].checked_type |
| return (type_a.dtype == "float32") and (type_b.dtype == "float32") |
| |
| |
| @tvm.ir.register_op_attr("add", "target.arm_compute_lib") |
| def add(expr): |
| """Check if the external ACL codegen for add should be used.""" |
| args = expr.args |
| for typ in [args[0].checked_type, args[1].checked_type]: |
| if typ.dtype != "float32": |
| return False |
| |
| return True |
| |
| |
| @tvm.ir.register_op_attr("qnn.add", "target.arm_compute_lib") |
| def qnn_add(expr): |
| """Check if the external ACL codegen for add should be used.""" |
| args = expr.args |
| for typ in [args[0].checked_type, args[1].checked_type]: |
| if typ.dtype != "uint8": |
| return False |
| |
| return True |
| |
| |
| class OpAttrContext(object): |
| """ Temporarily changes the attr of an op. """ |
| |
| def __init__(self, op_name, attr_key, attr_value): |
| """Saves the required info for RAII pattern usage. |
| |
| Parameters |
| ---------- |
| op_name : str |
| The op name. |
| |
| attr_key : str |
| The attribute name. |
| |
| attr_value : object |
| The attribute value. |
| """ |
| self.op = relay.op.get(op_name) |
| self.attr_key = attr_key |
| self.attr_value = attr_value |
| |
| def __enter__(self): |
| self.older_attr = self.op.get_attr(self.attr_key) |
| self.op.reset_attr(self.attr_key) |
| self.op.set_attr(self.attr_key, self.attr_value) |
| return self |
| |
| def __exit__(self, ptype, value, trace): |
| self.op.reset_attr(self.attr_key) |
| if self.older_attr: |
| self.op.set_attr(self.attr_key, self.older_attr) |