| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| """External function interface to NNPACK libraries.""" |
| import tvm |
| from tvm import te |
| import tvm._ffi |
| |
| |
| def is_available(): |
| """Check whether NNPACK is available, that is, `nnp_initialize()` |
| returns `nnp_status_success`. |
| """ |
| return _initialize() == 0 |
| |
| |
| def fully_connected_inference(lhs, rhs, nthreads=1): |
| """Create an extern op that compute fully connected of 1D tensor lhs and |
| 2D tensor rhs with nnpack. |
| |
| Parameters |
| ---------- |
| lhs : Tensor |
| lhs 1D array input[input_channels] of FP32 elements |
| rhs : Tensor |
| lhs 2D matrix kernel[output_channels][input_channels] of FP32 elements |
| |
| Returns |
| ------- |
| C : Tensor |
| lhs 1D array out[output_channels] of FP32 elements. |
| """ |
| m = rhs.shape[0] |
| return te.extern( |
| (m,), |
| [lhs, rhs], |
| lambda ins, outs: tvm.tir.call_packed( |
| "tvm.contrib.nnpack.fully_connected_inference", ins[0], ins[1], outs[0], nthreads |
| ), |
| name="C", |
| ) |
| |
| |
| class ConvolutionAlgorithm: |
| AUTO = 0 |
| FFT_8x8 = 1 |
| FFT_16x16 = 2 |
| WT_8x8 = 3 |
| IMPLICIT_GEMM = 4 |
| DIRECT = 5 |
| WT_8x8_FP16 = 6 |
| |
| |
| class ConvolutionTransformStrategy: |
| COMPUTE = 1 |
| PRECOMPUTE = 2 |
| |
| |
| def convolution_inference( |
| data, kernel, bias, padding, stride, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO |
| ): |
| """Create an extern op to do inference convolution of 4D tensor data and |
| 4D tensor kernel and 1D tensor bias with nnpack. |
| |
| Parameters |
| ---------- |
| data : Tensor |
| data 4D tensor input[batch][input_channels][input_height][input_width] of |
| FP32 elements. |
| kernel : Tensor |
| kernel 4D tensor kernel[output_channels][input_channels][kernel_height] |
| [kernel_width] of FP32 elements. |
| bias : Tensor |
| bias 1D array bias[output_channels][input_channels][kernel_height] |
| [kernel_width] of FP32 elements. |
| padding : list |
| padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right], |
| which indicates the padding around the feature map. |
| stride : list |
| stride A 2-dim list of [stride_height, stride_width], which indicates |
| the stride. |
| |
| Returns |
| ------- |
| output : Tensor |
| output 4D tensor output[batch][output_channels][output_height][output_width] |
| of FP32 elements. |
| """ |
| |
| assert isinstance(padding, list) and len(padding) == 4 |
| assert isinstance(stride, list) and len(stride) == 2 |
| batch, _, input_height, input_width = data.shape |
| output_channels, _, kernel_height, kernel_width = kernel.shape |
| idxdiv = te.indexdiv |
| output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 |
| output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 |
| |
| return te.extern( |
| (batch, output_channels, output_height, output_width), |
| [data, kernel, bias] if bias is not None else [data, kernel], |
| lambda ins, outs: tvm.tir.call_packed( |
| "tvm.contrib.nnpack.convolution_inference", |
| ins[0], |
| ins[1], |
| ins[2] if bias is not None else 0, |
| outs[0], |
| padding[0], |
| padding[1], |
| padding[2], |
| padding[3], |
| stride[0], |
| stride[1], |
| nthreads, |
| algorithm, |
| ), |
| name="C", |
| ) |
| |
| |
| def convolution_inference_without_weight_transform( |
| data, transformed_kernel, bias, padding, stride, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO |
| ): |
| """Create an extern op to do inference convolution of 4D tensor data and |
| 4D pre-transformed tensor kernel and 1D tensor bias with nnpack. |
| |
| Parameters |
| ---------- |
| data : Tensor |
| data 4D tensor input[batch][input_channels][input_height][input_width] of |
| FP32 elements. |
| transformed_kernel : Tensor |
| transformed_kernel 4D tensor kernel[output_channels][input_channels][tile] |
| [tile] of FP32 elements. |
| bias : Tensor |
| bias 1D array bias[output_channels][input_channels][kernel_height] |
| [kernel_width] of FP32 elements. |
| padding : list |
| padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right], |
| which indicates the padding around the feature map. |
| stride : list |
| stride A 2-dim list of [stride_height, stride_width], which indicates |
| the stride. |
| |
| Returns |
| ------- |
| output : Tensor |
| output 4D tensor output[batch][output_channels][output_height][output_width] |
| of FP32 elements. |
| """ |
| |
| assert algorithm in (ConvolutionAlgorithm.WT_8x8, ConvolutionAlgorithm.WT_8x8_FP16) |
| assert isinstance(padding, list) and len(padding) == 4 |
| assert isinstance(stride, list) and len(stride) == 2 |
| batch, _, input_height, input_width = data.shape |
| output_channels, _, _, _ = transformed_kernel.shape |
| kernel_height, kernel_width = (3, 3) |
| idxdiv = te.indexdiv |
| output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 |
| output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 |
| |
| return te.extern( |
| (batch, output_channels, output_height, output_width), |
| [data, transformed_kernel, bias] if bias is not None else [data, transformed_kernel], |
| lambda ins, outs: tvm.tir.call_packed( |
| "tvm.contrib.nnpack.convolution_inference_without_weight_transform", |
| ins[0], |
| ins[1], |
| ins[2] if bias is not None else 0, |
| outs[0], |
| padding[0], |
| padding[1], |
| padding[2], |
| padding[3], |
| stride[0], |
| stride[1], |
| nthreads, |
| algorithm, |
| ), |
| name="C", |
| dtype="float32", |
| ) |
| |
| |
| def convolution_inference_weight_transform( |
| kernel, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO, dtype="float32" |
| ): |
| """Create an extern op to do inference convolution of 3D tensor data and |
| 4D tensor kernel and 1D tensor bias with nnpack. |
| |
| Parameters |
| ---------- |
| kernel : Tensor |
| kernel 4D tensor kernel[output_channels][input_channels][kernel_height] |
| [kernel_width] of FP32 elements. |
| |
| Returns |
| ------- |
| output : Tensor |
| output 4D tensor output[output_channels][input_channels][tile][tile] |
| of FP32 elements. |
| """ |
| assert algorithm in (ConvolutionAlgorithm.WT_8x8, ConvolutionAlgorithm.WT_8x8_FP16) |
| output_channels, input_channels, _, _ = kernel.shape |
| transform_tile_size = 8 |
| if not isinstance(dtype, str): |
| dtype = dtype.dtype |
| return te.extern( |
| (output_channels, input_channels, transform_tile_size, transform_tile_size), |
| [kernel], |
| lambda ins, outs: tvm.tir.call_packed( |
| "tvm.contrib.nnpack.convolution_inference_weight_transform", |
| ins[0], |
| outs[0], |
| nthreads, |
| algorithm, |
| ), |
| name="transform_kernel", |
| dtype=dtype, |
| ) |
| |
| |
| tvm._ffi._init_api("tvm.contrib.nnpack") |