blob: 91bbeed56b66481dafac9a0f55aeb5263ebf51cb [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import tvm
import tvm.testing
import numpy as np
from tvm.script import tir as T, ir as I
@tvm.testing.requires_rocm
def test_rocm_inf_nan():
def check_inf_nan(dev, n, value, dtype):
@I.ir_module
class Module:
@T.prim_func
def main(A: T.Buffer((1,), dtype), C: T.Buffer((1,), dtype)):
T.func_attr({"tir.noalias": True})
for i_0 in T.thread_binding(1, thread="blockIdx.x"):
for i_1 in T.thread_binding(128, thread="threadIdx.x"):
with T.sblock("C"):
v_i = T.axis.spatial(1, i_0 * 128 + i_1)
T.where(i_0 * 128 + i_1 < 1)
T.reads()
T.writes(C[v_i])
C[v_i] = T.Cast(dtype, value)
fun = tvm.compile(Module, "rocm")
a = tvm.runtime.empty((n,), dtype, dev)
c = tvm.runtime.empty((n,), dtype, dev)
# Only need to test compiling here
fun(a, c)
dev = tvm.rocm(0)
check_inf_nan(dev, 1, -float("inf"), "float32")
check_inf_nan(dev, 1, -float("inf"), "float64")
check_inf_nan(dev, 1, float("inf"), "float32")
check_inf_nan(dev, 1, float("inf"), "float64")
check_inf_nan(dev, 1, float("nan"), "float32")
check_inf_nan(dev, 1, float("nan"), "float64")
@tvm.testing.requires_rocm
def test_rocm_copy():
def check_rocm(dtype, n):
dev = tvm.rocm(0)
a_np = np.random.uniform(size=(n,)).astype(dtype)
a = tvm.runtime.empty((n,), dtype, dev).copyfrom(a_np)
b_np = a.numpy()
tvm.testing.assert_allclose(a_np, b_np)
tvm.testing.assert_allclose(a_np, a.numpy())
for _ in range(100):
dtype = np.random.choice(["float32", "float16", "int8", "int32"])
logN = np.random.randint(1, 15)
peturb = np.random.uniform(low=0.5, high=1.5)
check_rocm(dtype, int(peturb * (2**logN)))
@tvm.testing.requires_rocm
def test_rocm_vectorize_add():
def check_rocm(dtype, n, lanes):
vec_dtype = "%sx%d" % (dtype, lanes)
num_blocks = n // 4
@I.ir_module
class Module:
@T.prim_func
def main(A: T.Buffer((n,), vec_dtype), B: T.Buffer((n,), vec_dtype)):
T.func_attr({"tir.noalias": True})
for i_0 in T.thread_binding(num_blocks, thread="blockIdx.x"):
for i_1 in T.thread_binding(4, thread="threadIdx.x"):
with T.sblock("B"):
v_i = T.axis.spatial(n, i_0 * 4 + i_1)
T.reads(A[v_i])
T.writes(B[v_i])
B[v_i] = A[v_i] + T.Broadcast(T.Cast(dtype, 1), lanes)
fun = tvm.compile(Module, target="rocm")
dev = tvm.rocm(0)
a = tvm.runtime.empty((n,), vec_dtype, dev).copyfrom(np.random.uniform(size=(n, lanes)))
c = tvm.runtime.empty((n,), vec_dtype, dev)
fun(a, c)
tvm.testing.assert_allclose(c.numpy(), a.numpy() + 1)
check_rocm("float32", 64, 2)
check_rocm("float16", 64, 2)
@tvm.testing.requires_rocm
def test_rocm_warp_shuffle():
@T.prim_func
def func(
A_handle: T.handle,
):
A = T.match_buffer(A_handle, (32,), dtype="float32")
for bx in T.thread_binding(1, thread="blockIdx.x"):
for tx in T.thread_binding(32, thread="threadIdx.x"):
with T.sblock("test"):
A_local = T.alloc_buffer((1,), "float32", scope="local")
mask = T.alloc_buffer((1,), "uint32", scope="local")
t0 = T.alloc_buffer((1,), "float32", scope="local")
A_local[0] = A[tx]
A_local[0] = T.tvm_warp_shuffle(mask[0], A_local[0], 0, 32, 32)
A[tx] = A_local[0]
mod = tvm.compile(func, target="rocm")
dev = tvm.rocm(0)
a = tvm.runtime.tensor(np.random.uniform(size=(32,)).astype("float32"), dev)
mod(a)
tvm.testing.assert_allclose(a.numpy(), np.ones((32,)) * a.numpy()[0])
@tvm.testing.requires_rocm
def test_rocm_vectorized_exp():
@T.prim_func
def func(
A_handle: T.handle,
B_handle: T.handle,
):
A = T.match_buffer(A_handle, (4,), dtype="float32")
B = T.match_buffer(B_handle, (4,), dtype="float32")
for bx in T.thread_binding(1, thread="blockIdx.x"):
for tx in T.thread_binding(1, thread="threadIdx.x"):
with T.sblock("test"):
for i in T.vectorized(0, 4):
B[i] = T.exp2(A[i])
mod = tvm.compile(func, target="rocm")
dev = tvm.rocm(0)
a = tvm.runtime.tensor(np.ones((4,)).astype("float32"), dev)
b = tvm.runtime.tensor(np.zeros((4,)).astype("float32"), dev)
mod(a, b)
tvm.testing.assert_allclose(b.numpy(), np.exp2(a.numpy()))