| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # pylint: disable=invalid-name, unused-argument, import-outside-toplevel, no-value-for-parameter |
| """A set of passes to legalize some of operations for the NPU""" |
| from typing import List, Type, Callable |
| import math |
| |
| import numpy as np # type: ignore |
| from ethosu.vela import scaling, fp_math |
| |
| import tvm # type: ignore |
| from tvm import relay |
| from tvm.relay.dataflow_pattern import DFPatternCallback # type: ignore |
| from tvm.relay.dataflow_pattern import wildcard |
| from tvm.relay.dataflow_pattern import is_op |
| from tvm.relay.dataflow_pattern import rewrite |
| from tvm.relay.dataflow_pattern import CallPattern |
| from tvm.relay.backend.contrib.ethosu import op as ethosu_ops # type: ignore |
| from tvm.relay.backend.contrib.ethosu import vela_api |
| from tvm.relay.backend.contrib.ethosu import util |
| from tvm.relay.op.contrib import ethosu as ethosu_patterns # type: ignore |
| |
| |
| class SplitRewriter(DFPatternCallback): |
| """This rewriting converts split operations into a sequence of |
| strided_slice operations, because codegen is going to be based |
| on strided_slices that will define the slice of the tensor that |
| will be fed to the consumer. |
| """ |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.split_in = wildcard() |
| self.pattern = is_op("split")(self.split_in) |
| |
| @staticmethod |
| def get_section_begin_coords(split: tvm.relay.Expr) -> List[int]: |
| """Currently, the split operator takes an array of indices or an integer |
| indicating the number of splits. However, its an array of indices could |
| represent both cases, therefore this function just make it an array of |
| indices where each index represent the co-ordinate of beginning of each |
| section -- defines as section begins. |
| |
| Parameters |
| ---------- |
| split : tvm.relay.Expr |
| The Relay Call expression for a split operator |
| |
| Returns |
| ------- |
| section_begins : List[int] |
| A list containing integers corresponding to section |
| begins |
| """ |
| indices_or_sections = split.attrs.indices_or_sections |
| input_shape = split.args[0].checked_type.shape |
| split_axis = split.attrs.axis |
| |
| if isinstance(indices_or_sections, tvm.ir.container.Array): |
| # 0 is the beginning of the first section. |
| return [0] + list(indices_or_sections) |
| split_axis_len = input_shape[split_axis].value |
| section_length = split_axis_len // indices_or_sections.value |
| return list(range(0, split_axis_len, section_length)) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| split_input = post.args[0] |
| split_begins = list() |
| split_ends = list() |
| section_begins_in_split_axis = self.get_section_begin_coords(post) |
| for split_cord in section_begins_in_split_axis: |
| # first begin is [0, 0, ... , 0] |
| begin_shape = [0 for i in range(len(split_input.checked_type.shape))] |
| begin_shape[post.attrs.axis] = split_cord |
| split_begins.append(begin_shape) |
| |
| end_shape = list(split_input.checked_type.shape) |
| # Only the split axis coordinate changes |
| end_shape[post.attrs.axis] = split_cord |
| split_ends.append(end_shape) |
| |
| # Coordinates needs to be shifted left because beginning |
| # of the next section is the end of the previous |
| split_ends = split_ends[1:] |
| # Last section end is the shape of the tensor itself. |
| split_ends.append(list(split_input.checked_type.shape)) |
| |
| strided_slices = list() |
| for sb, se in zip(split_begins, split_ends): |
| strided_slices.append(relay.strided_slice(split_input, sb, se)) |
| |
| return relay.Tuple(strided_slices) |
| |
| |
| class PartitionedSplitRewriter(DFPatternCallback): |
| """This pass brings the split out of the partitioned function""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.SplitParams.composite_name}) |
| )(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| split_input = post.args[0] |
| split_params = ethosu_patterns.SplitParams(post.op.body) |
| indices_or_sections = split_params.indices_or_sections |
| axis = split_params.axis |
| return relay.op.split(split_input, indices_or_sections, axis=axis).astuple() |
| |
| |
| def get_lut_from_func( |
| ifm_scale: float, |
| ifm_zp: int, |
| ofm_scale: float, |
| ofm_zp: int, |
| func: Callable[[float], float], |
| ) -> List[int]: |
| """Calculates the values of the lookup table based on the calculation function""" |
| |
| lut_values = list() |
| # Only int8 is currently supported |
| dtype = np.int8 |
| qmin, qmax = np.iinfo(dtype).min, np.iinfo(dtype).max |
| for x in range(qmin, qmax + 1): |
| x_real = ifm_scale * (x - ifm_zp) |
| out_real = func(x_real) |
| lut_result = int(util.round_away_zero(ofm_zp + out_real / ofm_scale)) |
| lut_result = min(qmax, max(qmin, lut_result)) |
| lut_values.append(lut_result) |
| |
| return lut_values |
| |
| |
| class LutActivationRewriter(DFPatternCallback): |
| """A class to create an identity operator with the LUT""" |
| |
| def __init__( |
| self, |
| params_class: Type, |
| activation_type: str, |
| calc_func: Callable[[float], float], |
| ): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.params_class = params_class |
| self.pattern = (wildcard().has_attr({"Composite": params_class.composite_name}))(wildcard()) |
| self.activation_type = activation_type |
| self.calc_func = calc_func |
| |
| def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map): |
| params = self.params_class(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| input_scale = float(params.ifm.q_params.scale_f32) |
| input_zp = int(params.ifm.q_params.zero_point) |
| output_scale = float(params.ofm.q_params.scale_f32) |
| output_zp = int(params.ofm.q_params.zero_point) |
| |
| lut_values = get_lut_from_func( |
| input_scale, |
| input_zp, |
| output_scale, |
| output_zp, |
| self.calc_func, |
| ) |
| lut = relay.const(lut_values, dtype=params.ifm.dtype) |
| |
| # We baked the requantization into the LUT, so we don't requantize the identity operator |
| identity = ethosu_ops.ethosu_identity( |
| ifm=params.ifm.tensor, |
| lut=lut, |
| ifm_scale=input_scale, |
| ifm_zero_point=input_zp, |
| ofm_scale=input_scale, |
| ofm_zero_point=input_zp, |
| activation=self.activation_type, |
| ) |
| |
| return identity |
| |
| |
| class TanhRewriter(LutActivationRewriter): |
| """This pass adds tanh as a LUT to the identity operator""" |
| |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.TanhParams, activation_type="TANH", calc_func=math.tanh |
| ) |
| |
| |
| def sigmoid_calc_func(x: float) -> float: |
| """Function to calculate the values for sigmoid""" |
| # These limits are inherited from TFLite |
| upper_limit = 8.0 |
| lower_limit = -8.0 |
| |
| if x <= lower_limit: |
| y = 0.0 |
| elif x >= upper_limit: |
| y = 1.0 |
| else: |
| y = 1 / (1 + math.exp(-x)) |
| return y |
| |
| |
| class SigmoidRewriter(LutActivationRewriter): |
| """This pass adds sigmoid as a LUT for identity op""" |
| |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.SigmoidParams, |
| activation_type="SIGMOID", |
| calc_func=sigmoid_calc_func, |
| ) |
| |
| |
| def leaky_relu_calc_func(x: float, alpha: float) -> float: |
| """Function to calculate the values for leaky relu.""" |
| return x if x >= 0 else x * alpha |
| |
| |
| class LeakyReLURewriter(DFPatternCallback): |
| """This pass adds leaky relu as a LUT for identity op.""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.params_class = ethosu_patterns.LeakyReLUParams |
| self.pattern = wildcard().has_attr({"Composite": self.params_class.composite_name})( |
| wildcard() |
| ) |
| |
| def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map): |
| params = self.params_class(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| input_scale = np.double(float(params.ifm.q_params.scale_f32)) |
| input_zp = int(params.ifm.q_params.zero_point) |
| output_scale = np.double(float(params.ofm.q_params.scale_f32)) |
| output_zp = int(params.ofm.q_params.zero_point) |
| |
| alpha = params.alpha |
| |
| # The calculation of the LUT values is similar to that in Vela |
| # convert_lrelu_to_lut(op, arch) |
| # (https://review.mlplatform.org/plugins/gitiles/ml/ethos-u/ethos-u-vela/+/refs/tags/3.2.0/ethosu/vela/tflite_graph_optimiser.py#864) # pylint: disable=line-too-long |
| alpha_scalar = 1 |
| alpha_scale, alpha_shift = scaling.elementwise_mul_scale(input_scale, alpha, output_scale) |
| identity_scale, identity_shift = scaling.elementwise_mul_scale(input_scale, 1, output_scale) |
| |
| dtype = params.ifm.dtype |
| qmin, qmax = np.iinfo(dtype).min, np.iinfo(dtype).max |
| |
| def calculate_lut_value(i): |
| zp_shift = ( |
| fp_math.multiply_by_quantized_multiplier( |
| alpha_scalar * (i - input_zp), alpha_scale, alpha_shift |
| ) |
| if i < input_zp |
| else fp_math.multiply_by_quantized_multiplier( |
| i - input_zp, identity_scale, identity_shift |
| ) |
| ) |
| |
| return min(qmax, max(qmin, output_zp + zp_shift)) |
| |
| values = list(map(calculate_lut_value, range(qmin, qmax + 1))) |
| lut = relay.const(values, dtype=dtype) |
| |
| # We baked the requantization into the LUT, so we don't requantize the identity operator |
| identity = ethosu_ops.ethosu_identity( |
| ifm=params.ifm.tensor, |
| lut=lut, |
| ifm_scale=input_scale, |
| ifm_zero_point=input_zp, |
| ofm_scale=input_scale, |
| ofm_zero_point=input_zp, |
| activation="LUT", |
| ) |
| |
| return identity |
| |
| |
| class HardSwishRewriter(DFPatternCallback): |
| """Convert ethosu.hard_swish composite function to add operation with LUT.""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.params_class = ethosu_patterns.HardSwishParams |
| self.pattern = wildcard().has_attr({"Composite": self.params_class.composite_name})( |
| wildcard() |
| ) |
| |
| def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map): |
| params = self.params_class(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| # The calculation of the LUT values is similar to that in Vela |
| # convert_hardswish_to_lut(op, arch, nng) |
| # (https://review.mlplatform.org/plugins/gitiles/ml/ethos-u/ethos-u-vela/+/refs/tags/3.2.0/ethosu/vela/tflite_graph_optimiser.py#719) # pylint: disable=line-too-long |
| input_scale = np.double(params.ifm.q_params.scale_f32) |
| input_zp = int(params.ifm.q_params.zero_point) |
| hires_input_scale = (1 / 128) * input_scale |
| |
| output_scale = np.double(params.ofm.q_params.scale_f32) |
| output_zp = int(params.ofm.q_params.zero_point) |
| output_scale, output_shift = scaling.quantise_scale(hires_input_scale / output_scale) |
| output_scale_16 = fp_math.downscale_multiplier_int32_to_int16(output_scale) |
| output_shift = 31 - output_shift |
| output_shift = -output_shift if output_shift < 0 else 0 |
| |
| dtype = params.ifm.dtype |
| qmin, qmax = np.iinfo(dtype).min, np.iinfo(dtype).max |
| |
| def calculate_relu_multiplier(inp, input_scale): |
| rmultiplier = np.double(3 / 32768) |
| rscale, rshift = scaling.quantise_scale(input_scale / rmultiplier) |
| rscale_16 = fp_math.downscale_multiplier_int32_to_int16(rscale) |
| |
| rvalue = np.int16(inp) |
| if rshift < 31: |
| rvalue = fp_math.shift_left16(rvalue, 30 - rshift) |
| rvalue = fp_math.saturating_rounding_mul16(rvalue, rscale_16) |
| rvalue = fp_math.shift_left16(rvalue, 1) |
| elif rshift > 31: |
| rvalue = fp_math.saturating_rounding_mul16(rvalue, rscale_16) |
| rvalue = fp_math.rounding_divide_by_pot(rvalue, rshift - 31) |
| else: |
| rvalue = fp_math.saturating_rounding_mul16(rvalue, rscale_16) |
| |
| rvalue = (rvalue + (1 << 15)) >> 1 |
| return rvalue |
| |
| def calculate_lut_values(i): |
| hires_input_value = (i - input_zp) * 128 |
| preshift_input_value = fp_math.saturating_rounding_mul16( |
| hires_input_value, output_scale_16 |
| ) |
| relu_value = calculate_relu_multiplier(hires_input_value, hires_input_scale) |
| lut_result = fp_math.saturating_mul16(relu_value, preshift_input_value) |
| lut_result = fp_math.rounding_divide_by_pot(lut_result, output_shift) + output_zp |
| return min(qmax, max(qmin, lut_result)) |
| |
| values = list(map(calculate_lut_values, range(-128, 128))) |
| lut = relay.const(values, dtype=dtype) |
| |
| # We baked the requantization into the LUT, so we don't requantize the identity operator |
| identity = ethosu_ops.ethosu_identity( |
| ifm=params.ifm.tensor, |
| lut=lut, |
| ifm_scale=input_scale, |
| ifm_zero_point=input_zp, |
| ofm_scale=input_scale, |
| ofm_zero_point=input_zp, |
| activation="LUT", |
| ) |
| |
| return identity |
| |
| |
| class Conv2DRewriter(DFPatternCallback): |
| """Convert conv2d related composite functions into ethosu_conv2d operators""" |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d"}))(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.QnnConv2DParams(post.op.body) |
| params.ifm.tensor = post.args[0] |
| channels_map = { |
| "NHWC": 3, |
| } |
| kernel_size_map = { |
| "HWIO": params.weights.shape[0:2], |
| "OHWI": params.weights.shape[1:3], |
| "HWOI": params.weights.shape[0:2], |
| } |
| activation_map = {"clip": "CLIP"} |
| weight_to_ohwi_transform_map = {"HWIO": [3, 0, 1, 2]} |
| weights_values = params.weights.values |
| weights_values_ohwi = np.transpose( |
| weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)] |
| ) |
| if params.activation: |
| activation = activation_map[params.activation.op.name] |
| clip_min = int(params.activation.attrs.a_min) |
| clip_max = int(params.activation.attrs.a_max) |
| else: |
| activation = "NONE" |
| clip_min = 0 |
| clip_max = 0 |
| scale_bias = vela_api.pack_biases( |
| biases=params.biases.tensor.data.asnumpy(), |
| ifm_scale=params.ifm.q_params.scale_f32, |
| ifm_dtype=np.dtype(params.ifm.dtype), |
| weight_scales=params.weights.q_params.scale_f32, |
| ofm_scale=params.ofm.q_params.scale_f32, |
| is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"], |
| ) |
| ethosu_conv2d = ethosu_ops.ethosu_conv2d( |
| ifm=post.args[0], |
| weight=relay.const(weights_values_ohwi, params.weights.values.dtype), |
| scale_bias=relay.const(scale_bias, "uint8"), |
| lut=relay.const([], dtype="int8"), |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| weight_zero_point=int(params.weights.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| kernel_shape=kernel_size_map[str(params.weights.layout)], |
| ofm_channels=params.ofm.shape[channels_map[str(params.ofm.layout)]], |
| strides=params.strides, |
| padding=params.padding, |
| dilation=params.dilation, |
| activation=activation, |
| clip_min=clip_min, |
| clip_max=clip_max, |
| upscale="NONE", |
| ifm_layout=str(params.ifm.layout), |
| ofm_layout=str(params.ofm.layout), |
| ) |
| return ethosu_conv2d |
| |
| |
| class Conv2DTransposeRewriter(DFPatternCallback): |
| """Convert conv2d_transpose related composite functions into |
| ethosu_conv2d_transpose operators.""" |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d_transpose"}))( |
| wildcard() |
| ) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| ofm_shape = params.ofm.shape |
| legalize_padding = params.legalize_padding |
| |
| weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]} |
| weights_values = params.weights.values |
| weights_values_ohwi = np.transpose( |
| weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)] |
| ) |
| weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2)) |
| weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype) |
| |
| bias_values = ( |
| params.biases.tensor.data.asnumpy() |
| if params.biases |
| else np.zeros((params.ifm.shape[-1])) |
| ) |
| scale_bias = vela_api.pack_biases( |
| biases=bias_values, |
| ifm_scale=params.ifm.q_params.scale_f32, |
| ifm_dtype=np.dtype(params.ifm.dtype), |
| weight_scales=params.weights.q_params.scale_f32, |
| ofm_scale=params.ofm.q_params.scale_f32, |
| is_activation_tanh_or_sigmoid=False, |
| ) |
| |
| reduced_op = ethosu_ops.ethosu_conv2d( |
| ifm=post.args[0], |
| weight=weights, |
| scale_bias=relay.const(scale_bias, "uint8"), |
| lut=relay.const([], dtype="int8"), |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| weight_zero_point=int(params.weights.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| kernel_shape=params.kernel_shape, |
| ofm_channels=int(ofm_shape[-1]), |
| strides=(1, 1), |
| padding=legalize_padding, |
| dilation=params.dilation, |
| ifm_layout=str(params.ifm.layout), |
| ofm_layout=str(params.ofm.layout), |
| upscale="ZEROS", |
| ) |
| |
| # Remove additional padding by 'cropping' back to expected size |
| return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape) |
| |
| |
| class DepthwiseConv2DRewriter(DFPatternCallback): |
| """Convert ethosu.qnn_depthwise_conv2d composite functions to ethosu_depthwise_conv2d |
| operators""" |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.pattern = ( |
| wildcard().has_attr( |
| {"Composite": ethosu_patterns.QnnDepthwiseConv2DParams.composite_name} |
| ) |
| )(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.QnnDepthwiseConv2DParams(post.op.body) |
| params.ifm.tensor = post.args[0] |
| channels_map = { |
| "NHWC": 3, |
| } |
| kernel_shape_map = { |
| "HWOI": params.weights.shape[0:2], |
| } |
| |
| weights_values = params.weights.values |
| weights_values_ohwi = np.moveaxis(weights_values, [0, 1, 2, 3], [1, 2, 0, 3]) |
| |
| activation = "NONE" |
| # Activations requiring LUT is currently not supported, so setting it to an empty list |
| lut = relay.const([], "int8") |
| clip_min = 0 |
| clip_max = 0 |
| if params.activation: |
| activation = ethosu_patterns.QnnDepthwiseConv2DParams.activation_map[ |
| params.activation.op.name |
| ] |
| if activation == "CLIP": |
| clip_min = int(params.activation.attrs.a_min) |
| clip_max = int(params.activation.attrs.a_max) |
| scale_bias = vela_api.pack_biases( |
| biases=params.biases.tensor.data.asnumpy(), |
| ifm_scale=params.ifm.q_params.scale_f32, |
| ifm_dtype=np.dtype(params.ifm.dtype), |
| weight_scales=params.weights.q_params.scale_f32, |
| ofm_scale=params.ofm.q_params.scale_f32, |
| is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"], |
| ) |
| |
| ethosu_depthwise_conv2d = ethosu_ops.ethosu_depthwise_conv2d( |
| post.args[0], # IFM |
| relay.const(weights_values_ohwi, params.weights.values.dtype), |
| relay.const(scale_bias, "uint8"), |
| lut, |
| float(params.ifm.q_params.scale_f32), |
| int(params.ifm.q_params.zero_point), |
| int(params.weights.q_params.zero_point), |
| float(params.ofm.q_params.scale_f32), |
| int(params.ofm.q_params.zero_point), |
| kernel_shape_map[str(params.weights.layout)], |
| params.ofm.shape[channels_map[str(params.ofm.layout)]], |
| strides=params.strides, |
| padding=params.padding, |
| dilation=params.dilation, |
| activation=activation, |
| clip_min=clip_min, |
| clip_max=clip_max, |
| upscale="NONE", |
| ifm_layout=str(params.ifm.layout), |
| ofm_layout=str(params.ofm.layout), |
| ofm_dtype=str(params.ofm.dtype), |
| ) |
| return ethosu_depthwise_conv2d |
| |
| |
| class PoolingRewriter(DFPatternCallback): |
| """Convert ethosu.avgpool2d and ethosu.maxpool2d composite functions to |
| ethosu_pooling operators""" |
| |
| def __init__( |
| self, |
| params_class: Type, |
| pattern: CallPattern, |
| ): |
| super().__init__(require_type=True) |
| self.params_class = params_class |
| self.pattern = pattern |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = self.params_class(post.op.body) |
| params.ifm.tensor = post.args[0] |
| channels_map = { |
| "NHWC": 3, |
| } |
| |
| activation_map = {"clip": "CLIP"} |
| if params.activation: |
| activation = activation_map[params.activation.op.name] |
| clip_min = int(params.activation.attrs.a_min) |
| clip_max = int(params.activation.attrs.a_max) |
| else: |
| activation = "NONE" |
| clip_min = 0 |
| clip_max = 0 |
| |
| # Activations requiring LUT is currently not supported, so setting it to an empty list |
| lut = relay.const([], dtype="int8") |
| |
| return ethosu_ops.ethosu_pooling( |
| ifm=post.args[0], |
| lut=lut, |
| pooling_type=params.pooling_type, |
| ifm_scale=params.ifm.q_params.scale_f32, |
| ifm_zero_point=params.ifm.q_params.zero_point, |
| ofm_scale=params.ofm.q_params.scale_f32, |
| ofm_zero_point=params.ofm.q_params.zero_point, |
| pool_shape=params.pool_shape, |
| ofm_channels=params.ofm.shape[channels_map[str(params.ofm.layout)]], |
| strides=params.strides, |
| padding=params.padding, |
| activation=activation, |
| clip_min=clip_min, |
| clip_max=clip_max, |
| upscale="NONE", |
| ifm_layout=str(params.ifm.layout), |
| ofm_layout=str(params.ofm.layout), |
| ) |
| |
| |
| class MaxPoolingRewriter(PoolingRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.MaxPool2DParams, |
| pattern=( |
| wildcard().has_attr({"Composite": ethosu_patterns.MaxPool2DParams.composite_name}) |
| )(wildcard()), |
| ) |
| |
| |
| class AvgPoolingRewriter(PoolingRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.AvgPool2DParams, |
| pattern=( |
| wildcard().has_attr({"Composite": ethosu_patterns.AvgPool2DParams.composite_name}) |
| )(wildcard()), |
| ) |
| |
| |
| class BinaryElementwiseRewriter(DFPatternCallback): |
| """Convert ethosu binary elementwise composite functions to |
| ethosu_binary_elementwise operators""" |
| |
| def __init__( |
| self, |
| params_class: Type, |
| pattern: CallPattern, |
| ): |
| super().__init__(require_type=True) |
| self.params_class = params_class |
| self.pattern = pattern |
| |
| @staticmethod |
| def reshape_input( |
| inputs: List["TensorParams"], |
| ) -> List[tvm.relay.Expr]: |
| """Reshape the inputs so that the following binary elementwise |
| operator receives 4-dimensional inputs. |
| |
| Parameters |
| ---------- |
| inputs: List[TensorParams] |
| The inputs to reshape. |
| |
| Returns |
| ------- |
| reshaped_inputs: List[tvm.relay.Expr] |
| The new reshaped inputs. |
| """ |
| reshaped_inputs = [] |
| for i in inputs: |
| in_shape = i.shape |
| if len(in_shape) < 4: |
| pad_size = 4 - len(in_shape) |
| new_shape = ([1] * pad_size) + in_shape |
| new_call = relay.reshape(i.tensor, new_shape) |
| reshaped_inputs.append(new_call) |
| else: |
| reshaped_inputs.append(i.tensor) |
| return reshaped_inputs |
| |
| @staticmethod |
| def reshape_output(output: tvm.relay.Expr, ifm_input_shape: List[int]) -> tvm.relay.Expr: |
| """Reshape the output back to the original dimensionality. |
| Since the NPU must have the brodcastable tensor as the |
| second operand, the original shape of the first ifm must |
| be the output shape. |
| |
| Parameters |
| ---------- |
| output: tvm.relay.Expr |
| The output to reshape. |
| |
| ifm_input_shape: List[int] |
| The shape of the non-reshaped ifm tensor. |
| |
| Returns |
| ------- |
| reshaped_output: tvm.relay.Expr |
| The reshaped output expression. |
| """ |
| if len(ifm_input_shape) == 4: |
| return output |
| reshaped_output = relay.reshape(output, ifm_input_shape) |
| return reshaped_output |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = self.params_class(post.op.body) |
| params.ifm.tensor = post.args[1] if params.reversed_operands else post.args[0] |
| params.ifm2.tensor = post.args[0] if params.reversed_operands else post.args[1] |
| |
| activation_map = {"clip": "CLIP"} |
| if params.activation: |
| activation = activation_map[params.activation.op.name] |
| clip_min = int(params.activation.attrs.a_min) |
| clip_max = int(params.activation.attrs.a_max) |
| else: |
| activation = "NONE" |
| clip_min = 0 |
| clip_max = 0 |
| |
| # We don't yet support activation functions that need to get legalized to LUTs. |
| lut = relay.const([], dtype="int8") |
| |
| inputs = [params.ifm, params.ifm2] |
| inputs = self.reshape_input(inputs) |
| |
| ethosu_binary_elementwise = ethosu_ops.ethosu_binary_elementwise( |
| ifm=inputs[0], |
| ifm2=inputs[1], |
| lut=lut, |
| operator_type=params.operator_type, |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| ifm2_scale=float(params.ifm2.q_params.scale_f32), |
| ifm2_zero_point=int(params.ifm2.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| ifm_channels=params.ifm.shape[-1] if params.ifm.shape else 1, |
| ifm2_channels=params.ifm2.shape[-1] if params.ifm2.shape else 1, |
| reversed_operands=params.reversed_operands, |
| ofm_dtype=params.ofm.dtype, |
| activation=activation, |
| clip_min=clip_min, |
| clip_max=clip_max, |
| ifm_layout=str(params.ifm.layout), |
| ifm2_layout=str(params.ifm2.layout), |
| ofm_layout=str(params.ofm.layout), |
| ) |
| output = self.reshape_output(ethosu_binary_elementwise, params.ifm.shape) |
| return output |
| |
| |
| class AddRewriter(BinaryElementwiseRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.AddParams, |
| pattern=(wildcard().has_attr({"Composite": ethosu_patterns.AddParams.composite_name}))( |
| wildcard(), wildcard() |
| ), |
| ) |
| |
| |
| class SubRewriter(BinaryElementwiseRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.SubParams, |
| pattern=(wildcard().has_attr({"Composite": ethosu_patterns.SubParams.composite_name}))( |
| wildcard(), wildcard() |
| ), |
| ) |
| |
| |
| class MulRewriter(BinaryElementwiseRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.MulParams, |
| pattern=(wildcard().has_attr({"Composite": ethosu_patterns.MulParams.composite_name}))( |
| wildcard(), wildcard() |
| ), |
| ) |
| |
| |
| class MinRewriter(BinaryElementwiseRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.MinParams, |
| pattern=(wildcard().has_attr({"Composite": ethosu_patterns.MinParams.composite_name}))( |
| wildcard(), wildcard() |
| ), |
| ) |
| |
| |
| class MaxRewriter(BinaryElementwiseRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.MaxParams, |
| pattern=(wildcard().has_attr({"Composite": ethosu_patterns.MaxParams.composite_name}))( |
| wildcard(), wildcard() |
| ), |
| ) |
| |
| |
| class ShlRewriter(BinaryElementwiseRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.ShlParams, |
| pattern=(wildcard().has_attr({"Composite": ethosu_patterns.ShlParams.composite_name}))( |
| wildcard(), wildcard() |
| ), |
| ) |
| |
| |
| class StridedSliceRewriter(DFPatternCallback): |
| """This pass brings the strided slice out of the partitioned function""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.StridedSliceParams.composite_name}) |
| )(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| |
| slice_input = post.args[0] |
| |
| # TODO(lhutton1) For an unknown reason compilation will fail for strides of 4 |
| # dimensions, so we cannot use params.strides as this will sometimes give |
| # strides as [1, 1, 1, 1]. Since we only support strides of 1, hardcoding this |
| # value for now. |
| strides = [1] |
| |
| params = ethosu_patterns.StridedSliceParams(post.op.body) |
| strided_slice = relay.op.strided_slice( |
| slice_input, |
| params.begin, |
| params.end, |
| strides=strides, |
| axes=params.axes, |
| slice_mode=params.slice_mode, |
| ) |
| return strided_slice |
| |
| |
| class ReshapeRewriter(DFPatternCallback): |
| """This pass brings the reshape out of the partitioned function""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.ReshapeParams.composite_name}) |
| )(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| reshape_input = post.args[0] |
| reshape_params = ethosu_patterns.ReshapeParams(post.op.body) |
| new_shape = reshape_params.new_shape |
| return relay.op.reshape(reshape_input, newshape=new_shape) |
| |
| |
| class NoOpRewriter(DFPatternCallback): |
| """This pass adds an idenity operator to reshape and strided slice to avoid a no op |
| without a consumer""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.reshape = is_op("reshape")(wildcard()) |
| self.strided_slice = is_op("strided_slice")(wildcard()) |
| self.pattern = self.reshape | self.strided_slice |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| if pre.checked_type.dtype == "int32": |
| return post |
| return ethosu_ops.ethosu_identity(ifm=post, lut=relay.const([], dtype="int8")) |
| |
| |
| class UnaryElementwiseRewriter(DFPatternCallback): |
| """ |
| Convert ethosu unary elementwise composite function to |
| ethosu_unary_elementwise operators |
| """ |
| |
| def __init__(self, params_class: Type, pattern: CallPattern): |
| super().__init__(require_type=True) |
| self.params_class = params_class |
| self.pattern = pattern |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = self.params_class(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| activation_map = {"clip": "CLIP"} |
| if params.activation: |
| activation = activation_map[params.activation.op.name] |
| clip_min = int(params.activation.attrs.a_min) |
| clip_max = int(params.activation.attrs.a_max) |
| else: |
| activation = "NONE" |
| clip_min = 0 |
| clip_max = 0 |
| |
| # We don't yet support activation functions that use LUT. |
| lut = relay.const([], dtype="int8") |
| |
| unary_input_shape = params.ifm.shape |
| # If the input tensor is not 4D, enter reshapes before and after the unary operator |
| if len(params.ifm.shape) == 4: |
| unary_input = params.ifm.tensor |
| else: |
| pad_size = 4 - len(unary_input_shape) |
| unary_input_shape = ([1] * pad_size) + unary_input_shape |
| unary_input = relay.op.reshape(params.ifm.tensor, newshape=unary_input_shape) |
| |
| ethosu_unary_elementwise = ethosu_ops.ethosu_unary_elementwise( |
| ifm=unary_input, |
| lut=lut, |
| operator_type=params.operator_type, |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| ofm_channels=unary_input_shape[3], |
| activation=activation, |
| clip_min=clip_min, |
| clip_max=clip_max, |
| ifm_layout=str(params.ifm.layout), |
| ofm_layout=str(params.ofm.layout), |
| ) |
| if len(params.ifm.shape) == 4: |
| op = ethosu_unary_elementwise |
| else: |
| op = relay.op.reshape(ethosu_unary_elementwise, newshape=params.ifm.shape) |
| return op |
| |
| |
| class AbsRewriter(UnaryElementwiseRewriter): |
| def __init__(self): |
| super().__init__( |
| params_class=ethosu_patterns.AbsParams, |
| pattern=(wildcard().has_attr({"Composite": ethosu_patterns.AbsParams.composite_name}))( |
| wildcard() |
| ), |
| ) |
| |
| |
| class MeanRewriter(DFPatternCallback): |
| """Convert ethosu.mean composite functions to an equivalent legalization: |
| - Case 1 (axis == [1, 2] and keepsdims == True): |
| ethosu_depthwise_conv2d + ethosu_binary_elementwise |
| - Case 2 (ifm qparams == ofm qparams): ethosu_pooling |
| - Case 3 (else): ethosu_depthwise_conv2d |
| """ |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.MeanParams.composite_name}) |
| )(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.MeanParams(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| ifm_shape = params.ifm.shape |
| ofm_shape = params.ofm.shape |
| lut = relay.const([], "int8") |
| axis = params.axis |
| reduced_op = params.ifm.tensor |
| |
| # Enforce 4d input |
| if len(ifm_shape) < 4: |
| axis = [x + 1 for x in axis] |
| if len(ifm_shape) == 3: |
| ifm_shape = [1, params.height, params.width, ifm_shape[2]] |
| else: |
| ifm_shape = [1, params.height, params.width, 1] |
| reduced_op = relay.reshape(reduced_op, ifm_shape) |
| |
| filter_height = ifm_shape[1] if 1 in axis else 1 |
| filter_width = ifm_shape[2] if 2 in axis else 1 |
| in_channels = out_channels = ifm_shape[-1] |
| |
| # If the height is greater than max kernel height, reshape the input |
| # from [filter_height, filter_width] to [1, (filter_height*filter_width)] |
| # only in the case the axis is [1, 2]. |
| if axis == [1, 2] and filter_height > 64: |
| ifm_shape = (ifm_shape[0], 1, filter_height * filter_width, in_channels) |
| filter_width = filter_height * filter_width |
| filter_height = 1 |
| reduced_op = relay.reshape(reduced_op, ifm_shape) |
| |
| if axis == [1, 2] and params.keepdims: |
| weight_scale = 1 |
| weight_values = np.ones([out_channels, filter_height, filter_width, 1]) |
| scale_bias = vela_api.pack_biases( |
| biases=np.zeros(ifm_shape[-1]), |
| ifm_scale=params.ifm.q_params.scale_f32, |
| ifm_dtype=np.dtype(params.ifm.dtype), |
| weight_scales=np.array([weight_scale], dtype=np.float), |
| ofm_scale=params.ofm.q_params.scale_f32, |
| is_activation_tanh_or_sigmoid=False, |
| ) |
| |
| reduced_op = ethosu_ops.ethosu_depthwise_conv2d( |
| ifm=reduced_op, |
| weight=relay.const(weight_values, params.ifm.dtype), |
| scale_bias=relay.const(scale_bias, "uint8"), |
| lut=lut, |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| weight_zero_point=0, |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| kernel_shape=(filter_height, filter_width), |
| ofm_channels=out_channels, |
| ofm_dtype="int16", |
| ) |
| |
| n = int(filter_height * filter_width) |
| eps = 1 / (256 * (n + 1)) if n % 2 == 0 else 0 |
| |
| scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int16"), dtype="int16") |
| |
| reduced_op = ethosu_ops.ethosu_binary_elementwise( |
| ifm=reduced_op, |
| ifm2=scalar_tensor, |
| lut=lut, |
| operator_type="MUL", |
| ifm_scale=float(params.ofm.q_params.scale_f32), |
| ifm_zero_point=int(params.ofm.q_params.zero_point), |
| ifm2_scale=1 / (n - eps), |
| ifm2_zero_point=0, |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| ifm_channels=out_channels, |
| ifm2_channels=out_channels, |
| reversed_operands=False, |
| ofm_dtype="int8", |
| rounding_mode="NATURAL", |
| ) |
| elif ( |
| params.ifm.q_params.scale_f32 == params.ofm.q_params.scale_f32 |
| and params.ifm.q_params.zero_point == params.ofm.q_params.zero_point |
| ): |
| reduced_op = ethosu_ops.ethosu_pooling( |
| ifm=reduced_op, |
| lut=lut, |
| pooling_type="AVG", |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=0, |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=0, |
| pool_shape=(filter_height, filter_width), |
| ofm_channels=out_channels, |
| rounding_mode="TRUNCATE", |
| ) |
| else: |
| weight_scale = 1 / (filter_height * filter_width) |
| weight_values = np.ones([out_channels, filter_height, filter_width, 1]) |
| bias = -1 * int(params.ifm.q_params.zero_point) * filter_height * filter_width |
| |
| scale_bias = vela_api.pack_biases( |
| biases=np.ones([ifm_shape[-1]]) * bias, |
| ifm_scale=params.ifm.q_params.scale_f32, |
| ifm_dtype=np.dtype(params.ifm.dtype), |
| weight_scales=np.array([weight_scale], dtype=np.float), |
| ofm_scale=params.ofm.q_params.scale_f32, |
| is_activation_tanh_or_sigmoid=False, |
| ) |
| reduced_op = ethosu_ops.ethosu_depthwise_conv2d( |
| ifm=reduced_op, |
| weight=relay.const(weight_values, params.ifm.dtype), |
| scale_bias=relay.const(scale_bias, "uint8"), |
| lut=lut, |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=0, |
| weight_zero_point=0, |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| kernel_shape=(filter_height, filter_width), |
| ofm_channels=out_channels, |
| rounding_mode="NATURAL", |
| ) |
| |
| # Reshape to original ofm shape |
| if len(ofm_shape) < 4: |
| reduced_op = relay.reshape(reduced_op, ofm_shape) |
| |
| return reduced_op |
| |
| |
| class ConcatRewriter(DFPatternCallback): |
| """The newer versions of TFLite converters return a concatenate operator that concatenates |
| tensors with same QNN params (if the QNN params of tensors were initially different, |
| the converter adds a requantize node), so this rewriter replaces the QNN concatenate with |
| "normal" concatenate""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.ConcatParams.composite_name}) |
| )(None) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| # Find the tensors that are inputs to the concat and the scales and zero points |
| concat_args = list() |
| for arg in post.args: |
| if isinstance(arg, tvm.relay.expr.Call): |
| concat_args.append(arg) |
| |
| axis = post.op.body.attrs.axis |
| concat = relay.op.concatenate(relay.Tuple(concat_args), axis=axis) |
| return concat |
| |
| |
| class RequantizeRewriter(DFPatternCallback): |
| """Convert ethos-u.requantize composite function to an identity operation.""" |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.RequantizeParams.composite_name}) |
| )(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.RequantizeParams(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| lut = relay.const([], "int8") |
| |
| return ethosu_ops.ethosu_identity( |
| ifm=params.ifm.tensor, |
| lut=lut, |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| ) |
| |
| |
| class Resize2dRewriter(DFPatternCallback): |
| """ |
| Convert ethos-u.resize2d composite function to an equivalent operation that |
| performs the relevant upsampling operation. |
| |
| Case 1: No upsampling (upscale factor of 1): |
| Identity. |
| Case 1: Nearest neighbor upsampling: |
| 1x1 pooling with 2x2 nearest neighbor upsampling. |
| Case 2: Bilinear upsampling: |
| 2x2 average pool with 2x2 nearest neighbor upsampling. |
| """ |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.Resize2dParams.composite_name}) |
| )(wildcard()) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.Resize2dParams(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| lut = relay.const([], "int8") |
| ifm_shape = params.ifm.shape |
| in_channels = ifm_shape[-1] |
| reduced_op = params.ifm.tensor |
| current_size = np.array(ifm_shape[1:3]) |
| output_size = np.array(params.size) |
| |
| if (current_size == output_size).all(): |
| return ethosu_ops.ethosu_identity( |
| reduced_op, |
| lut, |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| ) |
| |
| padding = [0, 0, 0, 0] |
| rounding_mode = "TFL" |
| pool_shape = [1, 1] |
| if params.method == "linear": |
| pool_shape = [2, 2] |
| rounding_mode = "NATURAL" |
| if params.coordinate_transformation_mode == "asymmetric": |
| # Use SAME padding. |
| ypad = Resize2dRewriter.get_required_padding(ifm_shape[1]) |
| xpad = Resize2dRewriter.get_required_padding(ifm_shape[2]) |
| padding = [ypad // 2, xpad // 2, (ypad + 1) // 2, (xpad + 1) // 2] |
| |
| return ethosu_ops.ethosu_pooling( |
| ifm=reduced_op, |
| lut=lut, |
| pooling_type="AVG", |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| pool_shape=pool_shape, |
| ofm_channels=in_channels, |
| strides=[1, 1], |
| padding=padding, |
| upscale="NEAREST", |
| rounding_mode=rounding_mode, |
| ) |
| |
| @staticmethod |
| def get_required_padding(input_size: int, pool_size: int = 2) -> int: |
| """Gets the amount of padding required needed to achieve |
| 'SAME' padding for a given axis.""" |
| needed_input = (input_size - 1) + pool_size |
| total_padding = max(0, needed_input - input_size) |
| return total_padding |
| |
| |
| class ExpandDimsRewriter(DFPatternCallback): |
| """Legalize expand dims to a reshape operator.""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.ExpandDimsParams.composite_name}) |
| )(None) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.ExpandDimsParams(post.op.body) |
| return relay.op.reshape(post.args[0], newshape=params.output.shape) |
| |
| |
| class SqueezeRewriter(DFPatternCallback): |
| """Legalize squeeze to a reshape operator.""" |
| |
| def __init__(self): |
| super().__init__(require_type=True, rewrite_once=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.SqueezeParams.composite_name}) |
| )(None) |
| |
| def callback( |
| self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map |
| ) -> tvm.relay.Expr: |
| params = ethosu_patterns.SqueezeParams(post.op.body) |
| return relay.op.reshape(post.args[0], newshape=params.output.shape) |
| |
| |
| class FullyConnectedRewriter(DFPatternCallback): |
| """Legalize Fully Connected (with bias and clip) to an NPU operator""" |
| |
| def __init__(self): |
| super().__init__(require_type=True) |
| self.pattern = ( |
| wildcard().has_attr({"Composite": ethosu_patterns.FullyConnectedParams.composite_name}) |
| )(wildcard()) |
| |
| def callback(self, pre, post, node_map): |
| params = ethosu_patterns.FullyConnectedParams(post.op.body) |
| params.ifm.tensor = post.args[0] |
| |
| # IFM reshapes |
| ifm = post.args[0] |
| if len(params.ifm.shape) != 4 or not params.ifm.shape[1] == params.ifm.shape[2] == 1: |
| ifm = relay.reshape(ifm, (1, 1, 1, params.ifm.shape[-1])) |
| |
| # Weight transformations |
| weights_values = params.weights.values |
| weights_values_ohwi = np.expand_dims(weights_values, axis=(1, 2)) |
| if params.activation: |
| activation = "CLIP" |
| clip_min = int(params.activation.attrs.a_min) |
| clip_max = int(params.activation.attrs.a_max) |
| else: |
| activation = "NONE" |
| clip_min = 0 |
| clip_max = 0 |
| bias_values = ( |
| params.biases.tensor.data.asnumpy() |
| if params.biases |
| else np.zeros((params.ofm.shape[-1])) |
| ) |
| scale_bias = vela_api.pack_biases( |
| biases=bias_values, |
| ifm_scale=params.ifm.q_params.scale_f32, |
| ifm_dtype=np.dtype(params.ifm.dtype), |
| weight_scales=params.weights.q_params.scale_f32, |
| ofm_scale=params.ofm.q_params.scale_f32, |
| is_activation_tanh_or_sigmoid=False, |
| ) |
| ethosu_fc = ethosu_ops.ethosu_conv2d( |
| ifm=ifm, |
| weight=relay.const(weights_values_ohwi, params.weights.values.dtype), |
| scale_bias=relay.const(scale_bias, "uint8"), |
| lut=relay.const([], dtype="int8"), |
| ifm_scale=float(params.ifm.q_params.scale_f32), |
| ifm_zero_point=int(params.ifm.q_params.zero_point), |
| weight_zero_point=int(params.weights.q_params.zero_point), |
| ofm_scale=float(params.ofm.q_params.scale_f32), |
| ofm_zero_point=int(params.ofm.q_params.zero_point), |
| kernel_shape=[1, 1], |
| ofm_channels=params.weights.shape[0], |
| strides=(1, 1), |
| padding=(0, 0, 0, 0), |
| dilation=(1, 1), |
| activation=activation, |
| clip_min=clip_min, |
| clip_max=clip_max, |
| upscale="NONE", |
| ifm_layout="NHWC", |
| ofm_layout="NHWC", |
| ) |
| |
| if len(params.ofm.shape) != 4 or not params.ofm.shape[1] == params.ofm.shape[2] == 1: |
| ethosu_fc = relay.reshape(ethosu_fc, params.ofm.shape) |
| return ethosu_fc |
| |
| |
| @util.create_npu_function_pass(opt_level=1) |
| class LegalizeEthosU: |
| """This is the pass to call graph-rewrites to perform graph transformation |
| in a way such that the operations are replaced with hardware/codegen supported |
| operations. |
| """ |
| |
| def transform_npu_function(self, _, func: relay.Function) -> relay.Function: |
| """This is the method that replaces the operations with hardware/codegen supported |
| operations. |
| """ |
| rewriters = [ |
| PartitionedSplitRewriter(), |
| SplitRewriter(), |
| Conv2DRewriter(), |
| Conv2DTransposeRewriter(), |
| DepthwiseConv2DRewriter(), |
| FullyConnectedRewriter(), |
| MaxPoolingRewriter(), |
| AvgPoolingRewriter(), |
| AddRewriter(), |
| SubRewriter(), |
| MulRewriter(), |
| MinRewriter(), |
| MaxRewriter(), |
| ShlRewriter(), |
| AbsRewriter(), |
| TanhRewriter(), |
| HardSwishRewriter(), |
| LeakyReLURewriter(), |
| MeanRewriter(), |
| ConcatRewriter(), |
| SigmoidRewriter(), |
| RequantizeRewriter(), |
| Resize2dRewriter(), |
| ExpandDimsRewriter(), |
| SqueezeRewriter(), |
| ReshapeRewriter(), |
| StridedSliceRewriter(), |
| NoOpRewriter(), |
| ] |
| for rewriter in rewriters: |
| func = rewrite(rewriter, func) |
| |
| return func |
| |
| def __call__(self, *args, **kwargs): |
| # pylint is unable figure out the decorated |
| # class is callable, thus adding this to |
| # suppress the warning. |
| pass |