docs/get_started/tutorials/ir_module.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 """
 .. _ir_module:

 IRModule
 ========
 This tutorial presents the core abstraction of Apache TVM Unity, the IRModule.
 The IRModule encompasses the **entirety** of the ML models, incorporating the
 computational graph, tensor programs, and potential calls to external libraries.

 .. contents:: Table of Contents
     :local:
     :depth: 1
 """

 import numpy as np
 import tvm
 from tvm import relax

 ######################################################################
 # Create IRModule
 # ---------------
 # IRModules can be initialized in various ways. We demonstrate a few of them
 # below.

 import torch
 from torch import nn
 from torch.export import export
 from tvm.relax.frontend.torch import from_exported_program

 ######################################################################
 # Import from existing models
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # The most common way to initialize an IRModule is to import from an existing
 # model. Apache TVM Unity accommodates imports from a range of frameworks,
 # such as PyTorch and ONNX. This tutorial solely demonstrates the import process
 # from PyTorch.


 # Create a dummy model
 class TorchModel(nn.Module):
     def __init__(self):
         super(TorchModel, self).__init__()
         self.fc1 = nn.Linear(784, 256)
         self.relu1 = nn.ReLU()
         self.fc2 = nn.Linear(256, 10)

     def forward(self, x):
         x = self.fc1(x)
         x = self.relu1(x)
         x = self.fc2(x)
         return x


 # Give an example argument to torch.export
 example_args = (torch.randn(1, 784, dtype=torch.float32),)

 # Convert the model to IRModule
 with torch.no_grad():
     exported_program = export(TorchModel().eval(), example_args)
     mod_from_torch = from_exported_program(
         exported_program, keep_params_as_input=True, unwrap_unit_return_tuple=True
     )

 mod_from_torch, params_from_torch = relax.frontend.detach_params(mod_from_torch)
 # Print the IRModule
 mod_from_torch.show()

 ######################################################################
 # Write with Relax NN Module
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Apache TVM Unity also provides a set of PyTorch-liked APIs, to help users
 # write the IRModule directly.

 from tvm.relax.frontend import nn


 class RelaxModel(nn.Module):
     def __init__(self):
         super(RelaxModel, self).__init__()
         self.fc1 = nn.Linear(784, 256)
         self.relu1 = nn.ReLU()
         self.fc2 = nn.Linear(256, 10)

     def forward(self, x):
         x = self.fc1(x)
         x = self.relu1(x)
         x = self.fc2(x)
         return x


 mod_from_relax, params_from_relax = RelaxModel().export_tvm(
     {"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
 )
 mod_from_relax.show()

 ######################################################################
 # Create via TVMScript
 # ~~~~~~~~~~~~~~~~~~~~
 # TVMScript is a Python-based DSL for IRModules. We are able to
 # directly output the IRModule in the TVMScript syntax, or alternatively,
 # parse the TVMScript to obtain an IRModule.

 from tvm.script import ir as I
 from tvm.script import relax as R


 @I.ir_module
 class TVMScriptModule:
     @R.function
     def main(
         x: R.Tensor((1, 784), dtype="float32"),
         fc1_weight: R.Tensor((256, 784), dtype="float32"),
         fc1_bias: R.Tensor((256,), dtype="float32"),
         fc2_weight: R.Tensor((10, 256), dtype="float32"),
         fc2_bias: R.Tensor((10,), dtype="float32"),
     ) -> R.Tensor((1, 10), dtype="float32"):
         R.func_attr({"num_input": 1})
         with R.dataflow():
             permute_dims = R.permute_dims(fc1_weight, axes=None)
             matmul = R.matmul(x, permute_dims, out_dtype="void")
             add = R.add(matmul, fc1_bias)
             relu = R.nn.relu(add)
             permute_dims1 = R.permute_dims(fc2_weight, axes=None)
             matmul1 = R.matmul(relu, permute_dims1, out_dtype="void")
             add1 = R.add(matmul1, fc2_bias)
             gv = add1
             R.output(gv)
         return gv


 mod_from_script = TVMScriptModule
 mod_from_script.show()

 ######################################################################
 # Attributes of an IRModule
 # -------------------------
 # An IRModule is a collection of functions, indexed by GlobalVars.

 mod = mod_from_torch
 print(mod.get_global_vars())

 ######################################################################
 # We can access the functions in the IRModule by indexing with the GlobalVars
 # or their names

 # index by global var name
 print(mod["main"])
 # index by global var, and checking they are the same function
 (gv,) = mod.get_global_vars()
 assert mod[gv] == mod["main"]

 ######################################################################
 # Transformations on IRModules
 # ----------------------------
 # Transformations are the import component of Apache TVM Unity. One transformation
 # takes in an IRModule and outputs another IRModule. We can apply a sequence of
 # transformations to an IRModule to obtain a new IRModule. That is the common way to
 # optimize a model.
 #
 # In this getting started tutorial, we only demonstrate how to apply transformations
 # to an IRModule. For details of each transformation, please refer to the
 # :ref:`Transformation API Reference <api-relax-transformation>`

 ######################################################################
 # We first apply **LegalizeOps** transformation to the IRModule. This transformation
 # will convert the Relax module into a mixed stage, with both Relax and TensorIR function
 # within the same module. Meanwhile, the Relax operators will be converted into ``call_tir``.

 mod = mod_from_torch
 mod = relax.transform.LegalizeOps()(mod)
 mod.show()

 ######################################################################
 # After the transformation, there are much more functions inside the module. Let's print
 # the global vars again.

 print(mod.get_global_vars())

 ######################################################################
 # Next, Apache TVM Unity provides a set of default transformation pipelines for users,
 # to simplify the transformation process. We can then apply the default pipeline to the module.
 # The default **zero** pipeline contains very fundamental transformations, including:
 #
 # - **LegalizeOps**: This transform converts the Relax operators into `call_tir` functions
 #   with the corresponding TensorIR Functions. After this transform, the IRModule will
 #   contain both Relax functions and TensorIR functions.
 # - **AnnotateTIROpPattern**: This transform annotates the pattern of the TensorIR functions,
 #   preparing them for subsequent operator fusion.
 # - **FoldConstant**: This pass performs constant folding, optimizing operations
 #   involving constants.
 # - **FuseOps and FuseTIR**: These two passes work together to fuse operators based on the
 #   patterns annotated in the previous step (AnnotateTIROpPattern). These passes transform
 #   both Relax functions and TensorIR functions.
 #
 # .. note::
 #
 #   Here, we have applied **LegalizeOps** twice in the flow. The second time is useless but
 #   harmless.
 #
 #   Every passes can be duplicated in the flow, since we ensure the passes can handle all legal
 #   IRModule inputs. This design can help users to construct their own pipeline.

 mod = relax.get_pipeline("zero")(mod)
 mod.show()

 ######################################################################
 # Deploy the IRModule Universally
 # -------------------------------
 # After the optimization, we can compile the model into a TVM runtime module.
 # Notably, Apache TVM Unity provides the ability of universal deployment, which means
 # we can deploy the same IRModule on different backends, including CPU, GPU, and other emerging
 # backends.
 #
 # Deploy on CPU
 # ~~~~~~~~~~~~~
 # We can deploy the IRModule on CPU by specifying the target as ``llvm``.

 exec = tvm.compile(mod, target="llvm")
 dev = tvm.cpu()
 vm = relax.VirtualMachine(exec, dev)

 raw_data = np.random.rand(1, 784).astype("float32")
 data = tvm.runtime.tensor(raw_data, dev)
 cpu_out = vm["main"](data, *params_from_torch["main"]).numpy()
 print(cpu_out)

 ######################################################################
 # Deploy on GPU
 # ~~~~~~~~~~~~~
 # Besides, CPU backend, we can also deploy the IRModule on GPU. GPU requires
 # programs containing extra information, such as the thread bindings and shared memory
 # allocations. We need a further transformation to generate the GPU programs.
 #
 # We use ``DLight`` to generate the GPU programs. In this tutorial, we won't go into
 # the details of ``DLight``.
 #

 from tvm.s_tir import dlight as dl

 with tvm.target.Target("cuda"):
     gpu_mod = dl.ApplyDefaultSchedule(
         dl.gpu.Matmul(),
         dl.gpu.Fallback(),
     )(mod)

 ######################################################################
 # Now we can compile the IRModule on GPU, the similar way as we did on CPU.

 exec = tvm.compile(gpu_mod, target="cuda")
 dev = tvm.device("cuda", 0)
 vm = relax.VirtualMachine(exec, dev)
 # Need to allocate data and params on GPU device
 data = tvm.runtime.tensor(raw_data, dev)
 gpu_params = [tvm.runtime.tensor(p, dev) for p in params_from_torch["main"]]
 gpu_out = vm["main"](data, *gpu_params).numpy()
 print(gpu_out)

 # Check the correctness of the results
 assert np.allclose(cpu_out, gpu_out, atol=1e-3)

 ######################################################################
 # Deploy on Other Backends
 # ~~~~~~~~~~~~~~~~~~~~~~~~
 # Apache TVM Unity also supports other backends, such as different kinds of GPUs
 # (Metal, ROCm, Vulkan and OpenCL), different kinds of CPUs (x86, ARM), and other
 # emerging backends (e.g., WebAssembly). The deployment process is similar to the
 # GPU backend.
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	"""
	.. _ir_module:

	IRModule
	========
	This tutorial presents the core abstraction of Apache TVM Unity, the IRModule.
	The IRModule encompasses the entirety of the ML models, incorporating the
	computational graph, tensor programs, and potential calls to external libraries.

	.. contents:: Table of Contents
	:local:
	:depth: 1
	"""

	import numpy as np
	import tvm
	from tvm import relax

	######################################################################
	# Create IRModule
	# ---------------
	# IRModules can be initialized in various ways. We demonstrate a few of them
	# below.

	import torch
	from torch import nn
	from torch.export import export
	from tvm.relax.frontend.torch import from_exported_program

	######################################################################
	# Import from existing models
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# The most common way to initialize an IRModule is to import from an existing
	# model. Apache TVM Unity accommodates imports from a range of frameworks,
	# such as PyTorch and ONNX. This tutorial solely demonstrates the import process
	# from PyTorch.


	# Create a dummy model
	class TorchModel(nn.Module):
	def __init__(self):
	super(TorchModel, self).__init__()
	self.fc1 = nn.Linear(784, 256)
	self.relu1 = nn.ReLU()
	self.fc2 = nn.Linear(256, 10)

	def forward(self, x):
	x = self.fc1(x)
	x = self.relu1(x)
	x = self.fc2(x)
	return x


	# Give an example argument to torch.export
	example_args = (torch.randn(1, 784, dtype=torch.float32),)

	# Convert the model to IRModule
	with torch.no_grad():
	exported_program = export(TorchModel().eval(), example_args)
	mod_from_torch = from_exported_program(
	exported_program, keep_params_as_input=True, unwrap_unit_return_tuple=True
	)

	mod_from_torch, params_from_torch = relax.frontend.detach_params(mod_from_torch)
	# Print the IRModule
	mod_from_torch.show()

	######################################################################
	# Write with Relax NN Module
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Apache TVM Unity also provides a set of PyTorch-liked APIs, to help users
	# write the IRModule directly.

	from tvm.relax.frontend import nn


	class RelaxModel(nn.Module):
	def __init__(self):
	super(RelaxModel, self).__init__()
	self.fc1 = nn.Linear(784, 256)
	self.relu1 = nn.ReLU()
	self.fc2 = nn.Linear(256, 10)

	def forward(self, x):
	x = self.fc1(x)
	x = self.relu1(x)
	x = self.fc2(x)
	return x


	mod_from_relax, params_from_relax = RelaxModel().export_tvm(
	{"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
	)
	mod_from_relax.show()

	######################################################################
	# Create via TVMScript
	# ~~~~~~~~~~~~~~~~~~~~
	# TVMScript is a Python-based DSL for IRModules. We are able to
	# directly output the IRModule in the TVMScript syntax, or alternatively,
	# parse the TVMScript to obtain an IRModule.

	from tvm.script import ir as I
	from tvm.script import relax as R


	@I.ir_module
	class TVMScriptModule:
	@R.function
	def main(
	x: R.Tensor((1, 784), dtype="float32"),
	fc1_weight: R.Tensor((256, 784), dtype="float32"),
	fc1_bias: R.Tensor((256,), dtype="float32"),
	fc2_weight: R.Tensor((10, 256), dtype="float32"),
	fc2_bias: R.Tensor((10,), dtype="float32"),
	) -> R.Tensor((1, 10), dtype="float32"):
	R.func_attr({"num_input": 1})
	with R.dataflow():
	permute_dims = R.permute_dims(fc1_weight, axes=None)
	matmul = R.matmul(x, permute_dims, out_dtype="void")
	add = R.add(matmul, fc1_bias)
	relu = R.nn.relu(add)
	permute_dims1 = R.permute_dims(fc2_weight, axes=None)
	matmul1 = R.matmul(relu, permute_dims1, out_dtype="void")
	add1 = R.add(matmul1, fc2_bias)
	gv = add1
	R.output(gv)
	return gv


	mod_from_script = TVMScriptModule
	mod_from_script.show()

	######################################################################
	# Attributes of an IRModule
	# -------------------------
	# An IRModule is a collection of functions, indexed by GlobalVars.

	mod = mod_from_torch
	print(mod.get_global_vars())

	######################################################################
	# We can access the functions in the IRModule by indexing with the GlobalVars
	# or their names

	# index by global var name
	print(mod["main"])
	# index by global var, and checking they are the same function
	(gv,) = mod.get_global_vars()
	assert mod[gv] == mod["main"]

	######################################################################
	# Transformations on IRModules
	# ----------------------------
	# Transformations are the import component of Apache TVM Unity. One transformation
	# takes in an IRModule and outputs another IRModule. We can apply a sequence of
	# transformations to an IRModule to obtain a new IRModule. That is the common way to
	# optimize a model.
	#
	# In this getting started tutorial, we only demonstrate how to apply transformations
	# to an IRModule. For details of each transformation, please refer to the
	# :ref:`Transformation API Reference <api-relax-transformation>`

	######################################################################
	# We first apply LegalizeOps transformation to the IRModule. This transformation
	# will convert the Relax module into a mixed stage, with both Relax and TensorIR function
	# within the same module. Meanwhile, the Relax operators will be converted into ``call_tir``.

	mod = mod_from_torch
	mod = relax.transform.LegalizeOps()(mod)
	mod.show()

	######################################################################
	# After the transformation, there are much more functions inside the module. Let's print
	# the global vars again.

	print(mod.get_global_vars())

	######################################################################
	# Next, Apache TVM Unity provides a set of default transformation pipelines for users,
	# to simplify the transformation process. We can then apply the default pipeline to the module.
	# The default zero pipeline contains very fundamental transformations, including:
	#
	# - LegalizeOps: This transform converts the Relax operators into `call_tir` functions
	# with the corresponding TensorIR Functions. After this transform, the IRModule will
	# contain both Relax functions and TensorIR functions.
	# - AnnotateTIROpPattern: This transform annotates the pattern of the TensorIR functions,
	# preparing them for subsequent operator fusion.
	# - FoldConstant: This pass performs constant folding, optimizing operations
	# involving constants.
	# - FuseOps and FuseTIR: These two passes work together to fuse operators based on the
	# patterns annotated in the previous step (AnnotateTIROpPattern). These passes transform
	# both Relax functions and TensorIR functions.
	#
	# .. note::
	#
	# Here, we have applied LegalizeOps twice in the flow. The second time is useless but
	# harmless.
	#
	# Every passes can be duplicated in the flow, since we ensure the passes can handle all legal
	# IRModule inputs. This design can help users to construct their own pipeline.

	mod = relax.get_pipeline("zero")(mod)
	mod.show()

	######################################################################
	# Deploy the IRModule Universally
	# -------------------------------
	# After the optimization, we can compile the model into a TVM runtime module.
	# Notably, Apache TVM Unity provides the ability of universal deployment, which means
	# we can deploy the same IRModule on different backends, including CPU, GPU, and other emerging
	# backends.
	#
	# Deploy on CPU
	# ~~~~~~~~~~~~~
	# We can deploy the IRModule on CPU by specifying the target as ``llvm``.

	exec = tvm.compile(mod, target="llvm")
	dev = tvm.cpu()
	vm = relax.VirtualMachine(exec, dev)

	raw_data = np.random.rand(1, 784).astype("float32")
	data = tvm.runtime.tensor(raw_data, dev)
	cpu_out = vm["main"](data, *params_from_torch["main"]).numpy()
	print(cpu_out)

	######################################################################
	# Deploy on GPU
	# ~~~~~~~~~~~~~
	# Besides, CPU backend, we can also deploy the IRModule on GPU. GPU requires
	# programs containing extra information, such as the thread bindings and shared memory
	# allocations. We need a further transformation to generate the GPU programs.
	#
	# We use ``DLight`` to generate the GPU programs. In this tutorial, we won't go into
	# the details of ``DLight``.
	#

	from tvm.s_tir import dlight as dl

	with tvm.target.Target("cuda"):
	gpu_mod = dl.ApplyDefaultSchedule(
	dl.gpu.Matmul(),
	dl.gpu.Fallback(),
	)(mod)

	######################################################################
	# Now we can compile the IRModule on GPU, the similar way as we did on CPU.

	exec = tvm.compile(gpu_mod, target="cuda")
	dev = tvm.device("cuda", 0)
	vm = relax.VirtualMachine(exec, dev)
	# Need to allocate data and params on GPU device
	data = tvm.runtime.tensor(raw_data, dev)
	gpu_params = [tvm.runtime.tensor(p, dev) for p in params_from_torch["main"]]
	gpu_out = vm["main"](data, *gpu_params).numpy()
	print(gpu_out)

	# Check the correctness of the results
	assert np.allclose(cpu_out, gpu_out, atol=1e-3)

	######################################################################
	# Deploy on Other Backends
	# ~~~~~~~~~~~~~~~~~~~~~~~~
	# Apache TVM Unity also supports other backends, such as different kinds of GPUs
	# (Metal, ROCm, Vulkan and OpenCL), different kinds of CPUs (x86, ARM), and other
	# emerging backends (e.g., WebAssembly). The deployment process is similar to the
	# GPU backend.