| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import subprocess |
| import tempfile |
| import re |
| |
| import pytest |
| import numpy as np |
| |
| import tvm |
| from tvm.script import tir as T |
| from tvm.target import codegen |
| |
| llvm_version, arm_target, input_dtype, kernel_dtype, is_supported = tvm.testing.parameters( |
| # Testing mcpu type |
| (8, "c -mcpu=cortex-m4", "int8", "int8", False), |
| (8, "c -mcpu=cortex-m7", "int8", "int8", False), |
| (8, "c -mcpu=cortex-m33", "int8", "int8", False), |
| (8, "c -mcpu=cortex-m55", "int8", "int8", False), |
| (8, "c -mcpu=cortex-m3", "int8", "int8", False), |
| (7, "llvm -mtriple=arm-linux-gnueabi -mattr=+neon", "int8", "int8", False), |
| (8, "llvm -mtriple=arm-linux-gnueabi -mattr=+neon", "int8", "int8", True), |
| (9, "llvm -mtriple=arm-linux-gnueabi -mattr=+neon", "int8", "int8", True), |
| (8, "llvm -mtriple=arm-linux-gnueabi", "int8", "int8", False), |
| (7, "llvm -mtriple=aarch64-linux-gnu -mattr=+v8.4a,+dotprod", "int8", "int8", False), |
| (8, "llvm -mtriple=aarch64-linux-gnu -mattr=+v8.4a,+dotprod", "int8", "int8", True), |
| (9, "llvm -mtriple=arm-linux-gnueabi -mattr=+neon", "int8", "int8", True), |
| (8, "llvm -mtriple=aarch64-linux-gnu", "int8", "int8", True), |
| # Testing dtype |
| (8, "llvm -mtriple=aarch64-linux-gnu -mattr=+neon", "int16", "int8", False), |
| (8, "llvm -mtriple=aarch64-linux-gnu -mattr=+neon", "int8", "int16", False), |
| (8, "llvm -mtriple=aarch64-linux-gnu -mattr=+neon", "int16", "int16", False), |
| ) |
| |
| |
| @pytest.fixture(scope="session") |
| def sve_device_vector_length(): |
| c_code = r""" |
| #include <stdio.h> |
| #include <arm_sve.h> |
| |
| int main() { |
| printf("%ld\n", svcntb() * 8); |
| } |
| """ |
| |
| with tempfile.TemporaryDirectory() as tmp_dir: |
| c_path = f"{tmp_dir}/vl.c" |
| o_path = f"{tmp_dir}/out.o" |
| with open(c_path, "w") as f: |
| f.write(c_code) |
| tvm.contrib.cc.create_executable(o_path, c_path, ["-march=native"]) |
| out = subprocess.check_output(o_path, shell=True).strip().decode() |
| |
| return int(out) |
| |
| |
| @tvm.testing.requires_aarch64_sve |
| def test_scalable_div(sve_device_vector_length): |
| np.random.seed(0) |
| target = "llvm -mtriple=aarch64-linux-gnu -mattr=+sve" |
| dev = tvm.cpu(0) |
| |
| @T.prim_func |
| def my_func(a: T.handle): |
| A = T.match_buffer(a, (1,), "int32") |
| T.func_attr({"global_symbol": "my_module", "tir.noalias": True}) |
| A[0] = T.Div(10000, 4 * T.vscale()) |
| |
| mod = tvm.compile(my_func, target=target) |
| |
| A_nd = tvm.runtime.tensor(np.empty((1,), dtype="int32"), device=dev) |
| mod(A_nd) |
| |
| ref = 10000 // (sve_device_vector_length // 32) |
| tvm.testing.assert_allclose(A_nd.numpy()[0], ref) |
| |
| |
| @tvm.testing.requires_aarch64_sve |
| def test_scalable_buffer_load_store(sve_device_vector_length): |
| np.random.seed(0) |
| target = "llvm -mtriple=aarch64-linux-gnu -mattr=+sve" |
| num_elements = sve_device_vector_length // 32 |
| dev = tvm.cpu(0) |
| |
| @T.prim_func |
| def my_func(a: T.handle, b: T.handle): |
| A = T.match_buffer(a, (num_elements,), "float32") |
| B = T.match_buffer(b, (num_elements,), "float32") |
| T.func_attr({"global_symbol": "my_module", "tir.noalias": True}) |
| B[T.ramp(0, 1, 4 * T.vscale())] = A[T.ramp(0, 1, 4 * T.vscale())] |
| |
| mod = tvm.compile(my_func, target=target) |
| |
| A_np = np.random.uniform(size=(num_elements,)).astype("float32") |
| B_np = np.zeros((num_elements,)).astype("float32") |
| A_nd = tvm.runtime.tensor(A_np, device=dev) |
| B_nd = tvm.runtime.tensor(B_np, device=dev) |
| mod(A_nd, B_nd) |
| |
| tvm.testing.assert_allclose(B_nd.numpy(), A_np) |
| |
| |
| @tvm.testing.requires_aarch64_sve |
| def test_scalable_loop_bound(sve_device_vector_length): |
| np.random.seed(0) |
| |
| dtype = "float32" |
| num_elements = sve_device_vector_length // 32 |
| target = "llvm -mtriple=aarch64-linux-gnu -mattr=+sve" |
| dev = tvm.cpu(0) |
| |
| @T.prim_func |
| def my_func(a: T.handle, b: T.handle): |
| A = T.match_buffer(a, (num_elements,), "float32") |
| B = T.match_buffer(b, (num_elements,), "float32") |
| T.func_attr({"global_symbol": "my_module", "tir.noalias": True}) |
| for i in T.serial(0, 4 * T.vscale()): |
| B[i] = A[i] |
| |
| mod = tvm.compile(my_func, target=target) |
| |
| A_np = np.random.uniform(size=(num_elements,)).astype(dtype) |
| B_np = np.zeros((num_elements,)).astype(dtype) |
| A_nd = tvm.runtime.tensor(A_np, device=dev) |
| B_nd = tvm.runtime.tensor(B_np, device=dev) |
| mod(A_nd, B_nd) |
| |
| tvm.testing.assert_allclose(B_nd.numpy(), A_np) |
| |
| |
| @tvm.testing.requires_aarch64_sve |
| def test_scalable_broadcast(sve_device_vector_length): |
| target = "llvm -mtriple=aarch64-linux-gnu -mattr=+sve" |
| num_elements = sve_device_vector_length // 32 |
| dev = tvm.cpu(0) |
| |
| @T.prim_func |
| def my_func(a: T.handle): |
| A = T.match_buffer(a, (num_elements,), "float32") |
| T.func_attr({"global_symbol": "my_module", "tir.noalias": True}) |
| A[T.ramp(0, 1, 4 * T.vscale())] = T.broadcast(1, 4 * T.vscale()) |
| |
| mod = tvm.compile(my_func, target=target) |
| |
| A_np = np.zeros((num_elements,)).astype("float32") |
| A_nd = tvm.runtime.tensor(A_np, device=dev) |
| mod(A_nd) |
| |
| ref = np.ones((num_elements,)) |
| tvm.testing.assert_allclose(A_nd.numpy(), ref) |