blob: 428f21abdf25e07cd740bc699c0abf7dc8feff4e [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# ruff: noqa: E501, F401, F841
"""CLML integration operator tests."""
import inspect
import json
import os
import numpy as np
import pytest
from mod_utils import (
get_avgpool_expected_codegen,
get_batchnorm_mod,
get_binary_op_mod,
get_clml_conv2d_codegen,
get_conv2d_transpose_expected_codegen,
get_dequant_matmul_module,
get_dequant_vec_matmul_module,
get_global_avgpool_expected_codegen,
get_global_maxpool_expected_codegen,
get_maxpool_expected_codegen,
get_relax_avgpool_mod,
get_relax_conv2d_mod,
get_relax_conv2d_transpose_mod,
get_relax_global_avgpool_mod,
get_relax_global_maxpool_mod,
get_relax_maxpool_mod,
get_relax_reshape_codegen,
get_relax_reshape_mod,
get_unary_op_mod,
)
from utils import skip_unless_adreno_clml, verify_results
import tvm
import tvm.testing
from tvm import relax, rpc
from tvm.relax.backend.adreno import clml
from tvm.relax.backend.adreno.clml import OpenCLMLOffLoad, OpenCLMLOffLoadForLLM
from tvm.script import ir as I
from tvm.script import relax as R
from tvm.script import tirx as T
from tvm.script.ir_builder import IRBuilder
from tvm.script.ir_builder import relax as relax_builder
CLML_VERSION = clml.clml_sdk_version()
TARGET_CLML_VERSION = int(os.environ.get("ADRENO_TARGET_CLML_VERSION", 4))
clml_target = tvm.target.Target("qcom/adreno-opencl-clml")
ref_target = tvm.target.Target("opencl")
def verify_clml_codegen(clml_mod, clml_codegen):
clml_mod = OpenCLMLOffLoadForLLM(clml_target)(clml_mod)
clml_mod = OpenCLMLOffLoad()(clml_mod)
source = clml_mod.attrs["external_mods"][0].inspect_source()
codegen = json.loads(source)["nodes"]
for node in range(len(codegen)):
if codegen[node]["op"] == "input" or codegen[node]["op"] == "const":
codegen[node]["name"] = ""
if codegen[node]["op"] == "kernel":
codegen[node]["name"] = ""
codegen_str = json.dumps(codegen, sort_keys=True, indent=2)
known_good_codegen_str = json.dumps(clml_codegen, sort_keys=True, indent=2)
assert codegen_str == known_good_codegen_str, (
f"The JSON produced by codegen does not match the expected result. \n"
f"Actual={codegen_str} \n"
f"Expected={known_good_codegen_str}"
)
def verify(
mod, clml_codegen, inputs_np, params_np, target_minimum_clml_version=None, target_test=True
):
mod = tvm.relax.transform.BindParams("main", params_np)(mod)
codegen_mod, clml_mod = mod.clone(), mod
verify_clml_codegen(codegen_mod, clml_codegen)
if (
target_minimum_clml_version is not None
and TARGET_CLML_VERSION < target_minimum_clml_version
):
print(f"Skipped Eval Tests for {inspect.stack()[1].function} function", flush=True)
return
if "ADRENO_TARGET" not in os.environ:
return
if target_test:
verify_results(clml_mod, target=clml_target, ref_target=ref_target)
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"kernel_h, kernel_w, padding, stride, dilation, out_channels, shape, has_bias, has_bn, has_activation, has_pad, is_depthwise",
[
(3, 3, (1, 1), (1, 1), (1, 1), 64, (3, 224, 224), False, True, False, True, False),
(3, 3, (1, 1), (1, 1), (1, 1), 64, (3, 224, 224), False, True, False, False, False),
# (5, 5, (2, 2), (1, 1), (1, 1), 16, (16, 64, 64), False, True, True, False, False),
# (7, 7, (3, 3), (2, 2), (1, 1), 32, (3, 224, 224), True, False, True, True, False),
(3, 3, (0, 0), (1, 1), (1, 1), 512, (256, 14, 14), True, False, True, False, False),
(1, 1, (0, 0), (1, 1), (1, 1), 1024, (512, 7, 7), True, False, True, False, False),
(1, 3, (0, 0), (1, 1), (1, 1), 64, (64, 7, 7), True, False, True, False, False),
(3, 1, (0, 0), (1, 1), (1, 1), 64, (64, 7, 7), False, True, True, True, False),
],
)
def test_conv2d_offload(
kernel_h,
kernel_w,
padding,
stride,
dilation,
out_channels,
shape,
has_bias,
has_bn,
has_activation,
has_pad,
is_depthwise,
dtype,
):
low, high = -0.01, 0.01
rtol, atol = 1e-3, 1e-3
if CLML_VERSION > 3:
rtol, atol = 1e-2, 1e-2 # @clml precision
data_shape = (1, *shape)
if is_depthwise:
groups = data_shape[1] // out_channels
else:
groups = 1
padding = (padding[0], padding[1], padding[0], padding[1])
weight_format = "IOHW" if is_depthwise else "OIHW"
weight_shape = (out_channels, data_shape[1] // groups, kernel_h, kernel_w)
data = np.random.uniform(low, high, size=data_shape).astype(dtype)
weight = np.random.uniform(low, high, size=weight_shape).astype(dtype)
bias = np.random.uniform(low, high, size=(1, weight_shape[0], 1, 1)).astype(dtype)
gamma = np.random.uniform(low, high, size=(weight_shape[0],)).astype(dtype)
beta = np.random.uniform(low, high, size=(weight_shape[0],)).astype(dtype)
mean = np.random.uniform(low, high, size=(weight_shape[0],)).astype(dtype)
variance = np.random.uniform(low, high, size=(weight_shape[0],)).astype(dtype)
inputs_np = [data]
params_np = {"weight": weight}
if has_bias:
params_np["bias"] = bias
if has_bn:
params_np.update({"gamma": gamma, "beta": beta, "mean": mean, "variance": variance})
mod = get_relax_conv2d_mod(
data_shape,
weight_shape,
stride=stride,
dilation=dilation,
padding=padding,
weight_layout=weight_format,
groups=groups,
dtype=dtype,
has_bias=has_bias,
has_bn=has_bn,
has_activation=has_activation,
has_pad=has_pad,
is_depthwise=is_depthwise,
)
clml_codegen = get_clml_conv2d_codegen(
data_shape,
weight_shape,
stride=stride,
dilation=dilation,
padding=padding,
weight_layout=weight_format,
groups=groups,
dtype=dtype,
has_bias=has_bias,
has_bn=has_bn,
has_activation=has_activation,
has_pad=has_pad,
is_depthwise=is_depthwise,
)
verify(mod, clml_codegen, inputs_np, params_np)
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"dshape, kshape, channels, kernel_size, strides, padding, out_shape",
[
((1, 256, 100, 100), (64, 256, 4, 4), 64, (4, 4), (2, 2), (0, 0, 0, 0), (1, 64, 202, 202)),
((1, 64, 200, 200), (64, 64, 4, 4), 64, (4, 4), (2, 2), (1, 1, 1, 1), (1, 64, 400, 400)),
((1, 64, 200, 200), (64, 64, 4, 4), 64, (4, 4), (2, 2), (1, 1, 1, 1), (1, 64, 400, 400)),
((1, 64, 400, 400), (16, 64, 4, 4), 16, (4, 4), (2, 2), (1, 1, 1, 1), (1, 16, 800, 800)),
],
)
def test_conv2d_transpose(
dshape, kshape, channels, kernel_size, strides, padding, dtype, out_shape
):
low, high = -1, 1
data = np.random.uniform(low, high, size=dshape).astype(dtype)
weight = np.random.uniform(low, high, size=kshape).astype(dtype)
inputs_np = [data]
params_np = {"weight": weight}
mod = get_relax_conv2d_transpose_mod(
dshape,
kshape,
channels=channels,
stride=strides,
padding=padding,
dtype=dtype,
)
clml_codegen = get_conv2d_transpose_expected_codegen(
dshape=dshape,
kshape=kshape,
channels=channels,
kernel_size=kernel_size,
strides=strides,
padding=padding,
dilation=(1, 1),
dtype=dtype,
output_shape=out_shape,
)
verify(mod, clml_codegen, inputs_np, params_np, target_test=False)
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.skipif(
CLML_VERSION < 3,
reason="Requires compiler supporting CLML v5 or above",
)
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"trials",
[
[(1, 64, 14, 14), 1, 3e-4],
[(1, 14, 256, 256), 1, 3e-4],
[(1, 14, 256, 256), 1, 3e-4],
[(1, 256, 1, 1), 1, 3e-4],
],
)
def test_batchnorm(dtype, trials):
low, high = 0, 1
(input_shape, axis, epsilon) = trials
channels = input_shape[axis]
def _get_axis_tuple(axis):
if axis == 0:
return (1, 2, 3)
elif axis == 1:
return (0, 2, 3)
elif axis == 2:
return (0, 1, 3)
else:
return (0, 1, 2)
data = np.random.uniform(low, high, size=(input_shape)).astype(dtype)
gamma = np.random.uniform(low, high, size=(channels)).astype(dtype)
beta = np.random.uniform(low, high, size=(channels)).astype(dtype)
mean = np.mean(data, _get_axis_tuple(axis), keepdims=False)
variance = np.var(data, _get_axis_tuple(axis), keepdims=False)
inputs_np = [data]
params_np = {"gamma": gamma, "beta": beta, "moving_mean": mean, "moving_var": variance}
mod = get_batchnorm_mod(input_shape, channels, axis, epsilon, dtype)
clml_codegen = [
{
"attrs": {"dtype": [dtype], "shape": [input_shape]},
"name": "",
"op": "input",
},
{"attrs": {"dtype": [dtype], "shape": [[channels]]}, "name": "", "op": "const"},
{"attrs": {"dtype": [dtype], "shape": [[channels]]}, "name": "", "op": "const"},
{"attrs": {"dtype": [dtype], "shape": [[channels]]}, "name": "", "op": "const"},
{"attrs": {"dtype": [dtype], "shape": [[channels]]}, "name": "", "op": "const"},
{
"attrs": {
"axis": axis,
"center": 1,
"dtype": [dtype],
"momentum": 0.10000000000000001,
"epsilon": 0.00029999999999999997,
"num_inputs": 5,
"num_outputs": 1,
"scale": 1,
"training": 1,
"shape": [input_shape],
},
"inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0], [3, 0, 0], [4, 0, 0]],
"name": "",
"op": "kernel",
},
]
verify(mod, clml_codegen, inputs_np, params_np)
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"a_shape, b_shape, op",
[
((1, 64, 14, 14), (1, 64, 14, 14), R.add),
((1, 256), (1, 256), R.add),
((1, 64, 14, 14), (1, 64, 14, 14), R.subtract),
((1, 256), (1, 256), R.subtract),
((1, 64, 14, 14), (1, 64, 14, 14), R.multiply),
((1, 256), (1, 256), R.multiply),
((1, 64, 14, 14), (1, 64, 14, 14), R.divide),
((1, 256), (1, 256), R.divide),
((1, 64, 14, 14), (1, 64, 14, 14), R.minimum),
((1, 256), (1, 256), R.minimum),
((1, 64, 14, 14), (1, 64, 14, 14), R.maximum),
((1, 256), (1, 256), R.maximum),
],
)
@pytest.mark.gpu
@skip_unless_adreno_clml
def test_binary_ops(a_shape, b_shape, op, dtype):
(mod, inputs_np) = get_binary_op_mod(a_shape, b_shape, op, dtype)
clml_codegen = [
{
"attrs": {
"dtype": [dtype],
"shape": [a_shape],
},
"name": "",
"op": "input",
},
{
"attrs": {
"dtype": [dtype],
"shape": [b_shape],
},
"name": "",
"op": "input",
},
{
"attrs": {
"dtype": [dtype],
"num_inputs": 2,
"num_outputs": 1,
"shape": [a_shape],
},
"inputs": [[0, 0, 0], [1, 0, 0]],
"name": "",
"op": "kernel",
},
]
verify(mod, clml_codegen, inputs_np, {})
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize(
"dtype",
[
"float32",
],
)
@pytest.mark.parametrize(
"a_shape, op",
[
((1, 64, 14, 14), R.nn.relu),
((1, 256, 1, 1), R.nn.relu),
((1, 14, 256, 256), R.nn.relu),
((1, 14, 14, 256), R.nn.relu),
],
)
@pytest.mark.gpu
@skip_unless_adreno_clml
def test_unary_ops(a_shape, op, dtype):
(mod, inputs_np) = get_unary_op_mod(a_shape, op, dtype)
clml_codegen = [
{
"attrs": {
"dtype": [dtype],
"shape": [a_shape],
},
"name": "",
"op": "input",
},
{
"attrs": {
"activation_type": "relu",
"dtype": [dtype],
"num_inputs": 1,
"num_outputs": 1,
"shape": [a_shape],
},
"inputs": [[0, 0, 0]],
"name": "",
"op": "kernel",
},
]
verify(mod, clml_codegen, inputs_np, {})
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"trials",
[
[(1, 64, 147, 147), (3, 3), (2, 2), (1, 1), (0, 0, 0, 0), False],
[(1, 256, 17, 17), (3, 3), (1, 1), (1, 1), (0, 0, 0, 0), False],
[(1, 1024, 14, 14), (3, 3), (1, 1), (1, 1), (0, 0, 0, 0), False],
# With padding is realized as nn.pad + pool
# [(1, 32, 256, 256), (3, 3), (2, 2), (1, 1), (1, 1, 1, 1), True],
# [(1, 32, 256, 256), (3, 3), (2, 2), (1, 1), (0, 1, 0, 1), True],
# [(1, 32, 256, 256), (2, 2), (2, 2), (1, 1), (1, 1, 1, 1), True],
# [(1, 32, 256, 256), (2, 2), (2, 2), (1, 1), (1, 0, 1, 0), True],
],
)
def test_max_pool(dtype, trials):
low, high = -1, 1
(input_shape, pool_size, stride, dilation, padding, has_pad) = trials
mod = get_relax_maxpool_mod(input_shape, dtype, pool_size, stride, dilation, padding, has_pad)
clml_codegen = get_maxpool_expected_codegen(
input_shape, pool_size, stride, padding, "maxpool2d", dtype
)
inputs_np = [np.random.uniform(low, high, size=input_shape).astype(dtype)]
verify(mod, clml_codegen, inputs_np, {})
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"trials",
[
[(1, 64, 147, 147), (3, 3), (2, 2), (1, 1), (0, 0, 0, 0), False],
[(1, 256, 17, 17), (3, 3), (1, 1), (1, 1), (0, 0, 0, 0), False],
[(1, 1024, 14, 14), (3, 3), (1, 1), (1, 1), (0, 0, 0, 0), False],
# With padding is realized as nn.pad + pool
# [(1, 32, 256, 256), (3, 3), (2, 2), (1, 1), (1, 1, 1, 1), True],
# [(1, 32, 256, 256), (3, 3), (2, 2), (1, 1), (0, 1, 0, 1), True],
# [(1, 32, 256, 256), (2, 2), (2, 2), (1, 1), (1, 1, 1, 1), True],
# [(1, 32, 256, 256), (2, 2), (2, 2), (1, 1), (1, 0, 1, 0), True],
],
)
def test_avg_pool(dtype, trials):
low, high = -1, 1
(input_shape, pool_size, stride, dilation, padding, has_pad) = trials
mod = get_relax_avgpool_mod(input_shape, dtype, pool_size, stride, dilation, padding, has_pad)
clml_codegen = get_avgpool_expected_codegen(
input_shape, pool_size, stride, padding, "avg_pool2d", dtype
)
inputs_np = [np.random.uniform(low, high, size=input_shape).astype(dtype)]
params_np = {}
verify(mod, clml_codegen, inputs_np, {})
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"trials",
[
[(1, 3, 32, 32), (1, 4, -1, 32)],
[(1, 4, 8, 32), (1, 4, -1, 16)],
[(1, 64, 3, 3), (1, 32, 3, -1)],
],
)
def test_reshape(dtype, trials):
low, high = -1, 1
(input_shape, output_shape) = trials
mod = get_relax_reshape_mod(input_shape, output_shape, dtype)
clml_codegen = get_relax_reshape_codegen(input_shape, output_shape, dtype)
inputs_np = [np.random.uniform(low, high, size=input_shape).astype(dtype)]
verify(mod, clml_codegen, inputs_np, {})
@pytest.mark.skip(reason="Codegen Comparision Failing")
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"trials",
[
[(1, 64, 147, 147), True],
[(1, 256, 17, 17), False],
[(1, 1024, 14, 14), True],
[(1, 32, 256, 256), False],
],
)
def test_global_avg_pool(dtype, trials):
"""Test function for global average pooling."""
low, high = -1, 1
(input_shape, keep_dims) = trials
N, C, H, W = input_shape
pool_size, stride, padding = (H, W), (1, 1), (0, 0, 0, 0)
mod = get_relax_global_avgpool_mod(input_shape, keep_dims, dtype)
clml_codegen = get_global_maxpool_expected_codegen(
input_shape, pool_size, stride, padding, "global_max", dtype
)
inputs_np = [np.random.uniform(low, high, size=input_shape).astype(dtype)]
verify(mod, clml_codegen, inputs_np, {})
@pytest.mark.gpu
@skip_unless_adreno_clml
@pytest.mark.parametrize("dtype", ["float32"])
@pytest.mark.parametrize(
"trials",
[
[(1, 64, 147, 147), True],
[(1, 256, 17, 17), False],
[(1, 1024, 14, 14), True],
[(1, 32, 256, 256), False],
],
)
def test_global_max_pool(dtype, trials):
"""Test function for global average pooling."""
low, high = -1, 1
(input_shape, keep_dims) = trials
N, C, H, W = input_shape
pool_size, stride, padding = (H, W), (1, 1), (0, 0, 0, 0)
mod = get_relax_global_maxpool_mod(input_shape, keep_dims, dtype)
clml_codegen = get_global_maxpool_expected_codegen(
input_shape, pool_size, stride, padding, "global_max", dtype
)
inputs_np = [np.random.uniform(low, high, size=input_shape).astype(dtype)]
verify(mod, clml_codegen, inputs_np, {})
@pytest.mark.skipif(
CLML_VERSION < 5,
reason="Requires target device with CLML v5 or above",
)
@pytest.mark.parametrize(
"K, N, M",
[
(4096, 11008, 256),
(2048, 32768, 128),
(4096, 4096, 512),
(4096, 22016, 64),
(16384, 2048, 128),
(2048, 2560, 1024),
(3072, 9216, 256),
(14336, 4096, 128),
(1536, 17920, 128),
(8960, 1536, 1024),
],
)
def test_dequant_matmul(K, N, M):
x_data = np.random.uniform(-0.1, 0.1, size=(1, M, K)).astype("float16")
weight = np.random.randint(0, 100, size=(K // 8, N)).astype("uint32")
scale = np.random.uniform(-0.1, 0.1, size=(K // 32, N)).astype("float16")
mod = get_dequant_matmul_module(K, N)
clml_codegen = [
{
"op": "input",
"name": "",
"attrs": {"shape": [[[K // 8, N]]], "dtype": [["uint32"]]},
},
{
"op": "input",
"name": "",
"attrs": {"shape": [[[K // 32, N]]], "dtype": [["float16"]]},
},
{
"op": "input",
"name": "",
"attrs": {"shape": [[[1, -1, K]]], "dtype": [["float16"]]},
},
{
"op": "kernel",
"name": "",
"inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]],
"attrs": {
"dtype": ["float16"],
"num_inputs": 3,
"num_outputs": 1,
"out_dtype": ["float16"],
"shape": [[1, -1, N]],
},
},
]
inputs_np = [x_data, weight, scale]
verify(mod, clml_codegen, inputs_np, {}, target_minimum_clml_version=5)
@pytest.mark.skipif(
CLML_VERSION < 5,
reason="Requires compiler supporting CLML v5 or above",
)
@pytest.mark.parametrize(
"K, N",
[
(4096, 11008),
(2048, 32768),
(4096, 4096),
(4096, 22016),
(16384, 2048),
(2048, 2560),
(3072, 9216),
(4096, 28672),
(14336, 4096),
(1536, 17920),
(8960, 1536),
],
)
def test_dequant_vec_matmul(K, N):
x_data = np.random.uniform(-0.1, 0.1, size=(1, 1, K)).astype("float16")
weight = np.random.randint(0, 100, size=(K // 8, N)).astype("uint32")
scale = np.random.uniform(-0.1, 0.1, size=(K // 32, N)).astype("float16")
mod = get_dequant_vec_matmul_module(K, N)
clml_codegen = [
{
"op": "input",
"name": "",
"attrs": {"shape": [[[K // 8, -1]]], "dtype": [["uint32"]]},
},
{
"op": "input",
"name": "",
"attrs": {"shape": [[[K // 32, -1]]], "dtype": [["float16"]]},
},
{
"op": "input",
"name": "",
"attrs": {"shape": [[[1, 1, K]]], "dtype": [["float16"]]},
},
{
"op": "kernel",
"name": "",
"inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]],
"attrs": {
"dtype": ["float16"],
"num_inputs": 3,
"num_outputs": 1,
"out_dtype": ["float16"],
"shape": [[1, 1, -1]],
},
},
]
inputs_np = (x_data, weight, scale)
verify(mod, clml_codegen, inputs_np, {}, target_minimum_clml_version=5)
if __name__ == "__main__":
tvm.testing.main()