tests/python/codegen/test_target_codegen_bool.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 """codegen related to bool types"""

 import numpy as np

 import tvm
 import tvm.testing
 from tvm.script import ir as I
 from tvm.script import tir as T


 @tvm.testing.uses_gpu
 def test_cmp_load_store(target, dev):
     @I.ir_module
     class GPUModule:
         @T.prim_func
         def main(
             A: T.Buffer((32,), "float32"),
             B: T.Buffer((32,), "float32"),
             D: T.Buffer((32,), "float32"),
         ):
             T.func_attr({"tir.noalias": True})
             C = T.alloc_buffer((32,), "bool")
             for i0_0 in T.thread_binding(8, thread="blockIdx.x"):
                 for i0_1 in T.thread_binding(4, thread="blockIdx.x"):
                     with T.sblock("C"):
                         v_i0 = T.axis.spatial(32, i0_0 * 4 + i0_1)
                         T.reads(B[v_i0], A[v_i0])
                         T.writes(C[v_i0])
                         C[v_i0] = B[v_i0] < A[v_i0]
             for i0_0 in T.thread_binding(8, thread="blockIdx.x"):
                 for i0_1 in T.thread_binding(4, thread="blockIdx.x"):
                     with T.sblock("D"):
                         v_i0 = T.axis.spatial(32, i0_0 * 4 + i0_1)
                         T.reads(C[v_i0], A[v_i0])
                         T.writes(D[v_i0])
                         D[v_i0] = T.Cast("float32", C[v_i0] and T.float32(1.0) < A[v_i0])

     @I.ir_module
     class CPUModule:
         @T.prim_func
         def main(
             A: T.Buffer((32,), "float32"),
             B: T.Buffer((32,), "float32"),
             D: T.Buffer((32,), "float32"),
         ):
             T.func_attr({"tir.noalias": True})
             C = T.alloc_buffer((32,), "bool")
             for i0 in range(32):
                 with T.sblock("C"):
                     v_i0 = T.axis.spatial(32, i0)
                     T.reads(B[v_i0], A[v_i0])
                     T.writes(C[v_i0])
                     C[v_i0] = B[v_i0] < A[v_i0]
             for i0 in range(32):
                 with T.sblock("D"):
                     v_i0 = T.axis.spatial(32, i0)
                     T.reads(C[v_i0], A[v_i0])
                     T.writes(D[v_i0])
                     D[v_i0] = T.Cast("float32", C[v_i0] and T.float32(1.0) < A[v_i0])

     arr_size = 32
     is_gpu = tvm.target.Target(target).kind.name != "llvm"
     mod = GPUModule if is_gpu else CPUModule

     f = tvm.compile(mod, target=target)

     a_np = np.random.uniform(size=arr_size).astype("float32")
     b_np = np.random.uniform(size=arr_size).astype("float32")
     a = tvm.runtime.tensor(a_np, dev)
     b = tvm.runtime.tensor(b_np, dev)
     d = tvm.runtime.tensor(np.zeros(arr_size, dtype="float32"), dev)
     f(a, b, d)
     np.testing.assert_equal(
         d.numpy(),
         np.logical_and(a_np > b_np, a_np > 1).astype("float32"),
     )


 if __name__ == "__main__":
     tvm.testing.main()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	"""codegen related to bool types"""

	import numpy as np

	import tvm
	import tvm.testing
	from tvm.script import ir as I
	from tvm.script import tir as T


	@tvm.testing.uses_gpu
	def test_cmp_load_store(target, dev):
	@I.ir_module
	class GPUModule:
	@T.prim_func
	def main(
	A: T.Buffer((32,), "float32"),
	B: T.Buffer((32,), "float32"),
	D: T.Buffer((32,), "float32"),
	):
	T.func_attr({"tir.noalias": True})
	C = T.alloc_buffer((32,), "bool")
	for i0_0 in T.thread_binding(8, thread="blockIdx.x"):
	for i0_1 in T.thread_binding(4, thread="blockIdx.x"):
	with T.sblock("C"):
	v_i0 = T.axis.spatial(32, i0_0 * 4 + i0_1)
	T.reads(B[v_i0], A[v_i0])
	T.writes(C[v_i0])
	C[v_i0] = B[v_i0] < A[v_i0]
	for i0_0 in T.thread_binding(8, thread="blockIdx.x"):
	for i0_1 in T.thread_binding(4, thread="blockIdx.x"):
	with T.sblock("D"):
	v_i0 = T.axis.spatial(32, i0_0 * 4 + i0_1)
	T.reads(C[v_i0], A[v_i0])
	T.writes(D[v_i0])
	D[v_i0] = T.Cast("float32", C[v_i0] and T.float32(1.0) < A[v_i0])

	@I.ir_module
	class CPUModule:
	@T.prim_func
	def main(
	A: T.Buffer((32,), "float32"),
	B: T.Buffer((32,), "float32"),
	D: T.Buffer((32,), "float32"),
	):
	T.func_attr({"tir.noalias": True})
	C = T.alloc_buffer((32,), "bool")
	for i0 in range(32):
	with T.sblock("C"):
	v_i0 = T.axis.spatial(32, i0)
	T.reads(B[v_i0], A[v_i0])
	T.writes(C[v_i0])
	C[v_i0] = B[v_i0] < A[v_i0]
	for i0 in range(32):
	with T.sblock("D"):
	v_i0 = T.axis.spatial(32, i0)
	T.reads(C[v_i0], A[v_i0])
	T.writes(D[v_i0])
	D[v_i0] = T.Cast("float32", C[v_i0] and T.float32(1.0) < A[v_i0])

	arr_size = 32
	is_gpu = tvm.target.Target(target).kind.name != "llvm"
	mod = GPUModule if is_gpu else CPUModule

	f = tvm.compile(mod, target=target)

	a_np = np.random.uniform(size=arr_size).astype("float32")
	b_np = np.random.uniform(size=arr_size).astype("float32")
	a = tvm.runtime.tensor(a_np, dev)
	b = tvm.runtime.tensor(b_np, dev)
	d = tvm.runtime.tensor(np.zeros(arr_size, dtype="float32"), dev)
	f(a, b, d)
	np.testing.assert_equal(
	d.numpy(),
	np.logical_and(a_np > b_np, a_np > 1).astype("float32"),
	)


	if __name__ == "__main__":
	tvm.testing.main()