tests/python/relax/test_transform_ipc_allreduce_rewrite.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import tvm
 import tvm.testing
 from tvm import relax
 from tvm.script import ir as I
 from tvm.script import relax as R
 from tvm.script import tir as T


 def test_ipc_allreduce_rewrite():
     @I.ir_module
     class Module:
         @R.function(pure=False)
         def main(shape: R.Shape(["m", "n"])):  # type: ignore
             m = T.int64()
             n = T.int64()
             alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             lv1: R.Tensor((m, n), dtype="float16") = alloc  # type: ignore
             alloc1: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             _: R.Object = R.call_packed(
                 "runtime.disco.allreduce", lv1, R.shape([0]), R.prim_value(True), alloc1
             )
             return alloc1

     @I.ir_module
     class Expected:
         @R.function(pure=False)
         def main(shape: R.Shape(["m", "n"])):  # type: ignore
             m = T.int64()
             n = T.int64()
             alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("ipc_memory")
             )
             lv1: R.Tensor((m, n), dtype="float16") = alloc  # type: ignore
             alloc1: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             _: R.Object = R.call_packed(
                 "runtime.disco.cuda_ipc.custom_allreduce", lv1, R.prim_value(1), alloc1
             )
             return alloc1

     allreduce_strategy = 1
     mod = relax.transform.IPCAllReduceRewrite(allreduce_strategy)(Module)
     tvm.ir.assert_structural_equal(
         mod,
         (
             Expected
             if tvm.get_global_func("runtime.disco.cuda_ipc.custom_allreduce", allow_missing=True)
             is not None
             else Module
         ),
     )


 def test_ipc_allreduce_spread_along_reshape():
     @I.ir_module
     class Module:
         @R.function(pure=False)
         def main(shape: R.Shape(["m", "n"])):  # type: ignore
             m = T.int64()
             n = T.int64()
             alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             lv1: R.Tensor((m * n,), dtype="float16") = R.reshape(alloc, (m * n,))  # type: ignore
             alloc1: R.Tensor((m * n,), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m * n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             _: R.Object = R.call_packed(
                 "runtime.disco.allreduce", lv1, R.shape([0]), R.prim_value(False), alloc1
             )
             return alloc1

     @I.ir_module
     class Expected:
         @R.function(pure=False)
         def main(
             shape: R.Shape(["m", "n"]),  # type: ignore
         ) -> R.Tensor(("m * n",), dtype="float16"):  # type: ignore
             m = T.int64()
             n = T.int64()
             alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("ipc_memory")
             )
             lv1: R.Tensor((m * n,), dtype="float16") = R.reshape(  # type: ignore
                 alloc, R.shape([m * n])
             )
             alloc1: R.Tensor((m * n,), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m * n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             _: R.Object = R.call_packed(
                 "runtime.disco.cuda_ipc.custom_allreduce", lv1, R.prim_value(1), alloc1
             )
             return alloc1

     allreduce_strategy = 1
     mod = relax.transform.IPCAllReduceRewrite(allreduce_strategy)(Module)
     tvm.ir.assert_structural_equal(
         mod,
         (
             Expected
             if tvm.get_global_func("runtime.disco.cuda_ipc.custom_allreduce", allow_missing=True)
             is not None
             else Module
         ),
     )


 def test_ipc_allreduce_skip_reducer_other_than_sum():
     @I.ir_module
     class Module:
         @R.function(pure=False)
         def main(shape: R.Shape(["m", "n"])):  # type: ignore
             m = T.int64()
             n = T.int64()
             alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             lv1: R.Tensor((m, n), dtype="float16") = alloc  # type: ignore
             alloc1: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor(  # type: ignore
                 R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
             )
             _: R.Object = R.call_packed(
                 "runtime.disco.allreduce", lv1, R.shape([1]), R.prim_value(True), alloc1
             )
             return alloc1

     allreduce_strategy = 1
     mod = relax.transform.IPCAllReduceRewrite(allreduce_strategy)(Module)
     tvm.ir.assert_structural_equal(mod, Module)


 if __name__ == "__main__":
     tvm.testing.main()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import tvm
	import tvm.testing
	from tvm import relax
	from tvm.script import ir as I
	from tvm.script import relax as R
	from tvm.script import tir as T


	def test_ipc_allreduce_rewrite():
	@I.ir_module
	class Module:
	@R.function(pure=False)
	def main(shape: R.Shape(["m", "n"])): # type: ignore
	m = T.int64()
	n = T.int64()
	alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	lv1: R.Tensor((m, n), dtype="float16") = alloc # type: ignore
	alloc1: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	_: R.Object = R.call_packed(
	"runtime.disco.allreduce", lv1, R.shape([0]), R.prim_value(True), alloc1
	)
	return alloc1

	@I.ir_module
	class Expected:
	@R.function(pure=False)
	def main(shape: R.Shape(["m", "n"])): # type: ignore
	m = T.int64()
	n = T.int64()
	alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("ipc_memory")
	)
	lv1: R.Tensor((m, n), dtype="float16") = alloc # type: ignore
	alloc1: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	_: R.Object = R.call_packed(
	"runtime.disco.cuda_ipc.custom_allreduce", lv1, R.prim_value(1), alloc1
	)
	return alloc1

	allreduce_strategy = 1
	mod = relax.transform.IPCAllReduceRewrite(allreduce_strategy)(Module)
	tvm.ir.assert_structural_equal(
	mod,
	(
	Expected
	if tvm.get_global_func("runtime.disco.cuda_ipc.custom_allreduce", allow_missing=True)
	is not None
	else Module
	),
	)


	def test_ipc_allreduce_spread_along_reshape():
	@I.ir_module
	class Module:
	@R.function(pure=False)
	def main(shape: R.Shape(["m", "n"])): # type: ignore
	m = T.int64()
	n = T.int64()
	alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	lv1: R.Tensor((m * n,), dtype="float16") = R.reshape(alloc, (m * n,)) # type: ignore
	alloc1: R.Tensor((m * n,), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m * n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	_: R.Object = R.call_packed(
	"runtime.disco.allreduce", lv1, R.shape([0]), R.prim_value(False), alloc1
	)
	return alloc1

	@I.ir_module
	class Expected:
	@R.function(pure=False)
	def main(
	shape: R.Shape(["m", "n"]), # type: ignore
	) -> R.Tensor(("m * n",), dtype="float16"): # type: ignore
	m = T.int64()
	n = T.int64()
	alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("ipc_memory")
	)
	lv1: R.Tensor((m * n,), dtype="float16") = R.reshape( # type: ignore
	alloc, R.shape([m * n])
	)
	alloc1: R.Tensor((m * n,), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m * n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	_: R.Object = R.call_packed(
	"runtime.disco.cuda_ipc.custom_allreduce", lv1, R.prim_value(1), alloc1
	)
	return alloc1

	allreduce_strategy = 1
	mod = relax.transform.IPCAllReduceRewrite(allreduce_strategy)(Module)
	tvm.ir.assert_structural_equal(
	mod,
	(
	Expected
	if tvm.get_global_func("runtime.disco.cuda_ipc.custom_allreduce", allow_missing=True)
	is not None
	else Module
	),
	)


	def test_ipc_allreduce_skip_reducer_other_than_sum():
	@I.ir_module
	class Module:
	@R.function(pure=False)
	def main(shape: R.Shape(["m", "n"])): # type: ignore
	m = T.int64()
	n = T.int64()
	alloc: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	lv1: R.Tensor((m, n), dtype="float16") = alloc # type: ignore
	alloc1: R.Tensor((m, n), dtype="float16") = R.builtin.alloc_tensor( # type: ignore
	R.shape([m, n]), R.dtype("float16"), R.prim_value(0), R.str("global")
	)
	_: R.Object = R.call_packed(
	"runtime.disco.allreduce", lv1, R.shape([1]), R.prim_value(True), alloc1
	)
	return alloc1

	allreduce_strategy = 1
	mod = relax.transform.IPCAllReduceRewrite(allreduce_strategy)(Module)
	tvm.ir.assert_structural_equal(mod, Module)


	if __name__ == "__main__":
	tvm.testing.main()