blob: d95379d80817872bee76a5b2c2521885f3c7df38 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""codegen related to bool types"""
import numpy as np
import tvm
import tvm.testing
from tvm.script import ir as I
from tvm.script import tir as T
@tvm.testing.uses_gpu
def test_cmp_load_store(target, dev):
@I.ir_module
class GPUModule:
@T.prim_func
def main(
A: T.Buffer((32,), "float32"),
B: T.Buffer((32,), "float32"),
D: T.Buffer((32,), "float32"),
):
T.func_attr({"tir.noalias": True})
C = T.alloc_buffer((32,), "bool")
for i0_0 in T.thread_binding(8, thread="blockIdx.x"):
for i0_1 in T.thread_binding(4, thread="blockIdx.x"):
with T.sblock("C"):
v_i0 = T.axis.spatial(32, i0_0 * 4 + i0_1)
T.reads(B[v_i0], A[v_i0])
T.writes(C[v_i0])
C[v_i0] = B[v_i0] < A[v_i0]
for i0_0 in T.thread_binding(8, thread="blockIdx.x"):
for i0_1 in T.thread_binding(4, thread="blockIdx.x"):
with T.sblock("D"):
v_i0 = T.axis.spatial(32, i0_0 * 4 + i0_1)
T.reads(C[v_i0], A[v_i0])
T.writes(D[v_i0])
D[v_i0] = T.Cast("float32", C[v_i0] and T.float32(1.0) < A[v_i0])
@I.ir_module
class CPUModule:
@T.prim_func
def main(
A: T.Buffer((32,), "float32"),
B: T.Buffer((32,), "float32"),
D: T.Buffer((32,), "float32"),
):
T.func_attr({"tir.noalias": True})
C = T.alloc_buffer((32,), "bool")
for i0 in range(32):
with T.sblock("C"):
v_i0 = T.axis.spatial(32, i0)
T.reads(B[v_i0], A[v_i0])
T.writes(C[v_i0])
C[v_i0] = B[v_i0] < A[v_i0]
for i0 in range(32):
with T.sblock("D"):
v_i0 = T.axis.spatial(32, i0)
T.reads(C[v_i0], A[v_i0])
T.writes(D[v_i0])
D[v_i0] = T.Cast("float32", C[v_i0] and T.float32(1.0) < A[v_i0])
arr_size = 32
is_gpu = tvm.target.Target(target).kind.name != "llvm"
mod = GPUModule if is_gpu else CPUModule
f = tvm.compile(mod, target=target)
a_np = np.random.uniform(size=arr_size).astype("float32")
b_np = np.random.uniform(size=arr_size).astype("float32")
a = tvm.runtime.tensor(a_np, dev)
b = tvm.runtime.tensor(b_np, dev)
d = tvm.runtime.tensor(np.zeros(arr_size, dtype="float32"), dev)
f(a, b, d)
np.testing.assert_equal(
d.numpy(),
np.logical_and(a_np > b_np, a_np > 1).astype("float32"),
)
if __name__ == "__main__":
tvm.testing.main()