tutorials/nnvm/get_started.py - tvm - Git at Google

 """
 Get Started with NNVM
 =====================
 **Author**: `Tianqi Chen <https://tqchen.github.io/>`_

 This article is an introductory tutorial to workflow in NNVM.
 """
 import nnvm.compiler
 import nnvm.symbol as sym

 ######################################################################
 # Declare Computation
 # -------------------
 # We start by describing our need using computational graph.
 # Most deep learning frameworks use computation graph to describe
 # their computation. In this example, we directly use
 # NNVM's API to construct the computational graph.
 #
 # .. note::
 #
 #   In a typical deep learning compilation workflow,
 #   we can get the models from :any:`nnvm.frontend`
 #
 # The following code snippet describes :math:`z = x + \sqrt{y}`
 # and creates a nnvm graph from the description.
 # We can print out the graph ir to check the graph content.

 x = sym.Variable("x")
 y = sym.Variable("y")
 z = sym.elemwise_add(x, sym.sqrt(y))
 compute_graph = nnvm.graph.create(z)
 print("-------compute graph-------")
 print(compute_graph.ir())

 ######################################################################
 # Compile
 # -------
 # We can call :any:`nnvm.compiler.build` to compile the graph.
 # The build function takes a shape parameter which specifies the
 # input shape requirement. Here we only need to pass in shape of ``x``
 # and the other one will be inferred automatically by NNVM.
 #
 # The function returns three values. ``deploy_graph`` contains
 # the final compiled graph structure. ``lib`` is a :any:`tvm.module.Module`
 # that contains compiled CUDA functions. We do not need the ``params``
 # in this case.
 shape = (4,)
 deploy_graph, lib, params = nnvm.compiler.build(
     compute_graph, target="cuda", shape={"x": shape}, dtype="float32")

 ######################################################################
 # We can print out the IR of ``deploy_graph`` to understand what just
 # happened under the hood. We can find that ``deploy_graph`` only
 # contains a single operator ``tvm_op``. This is because NNVM
 # automatically fused the operator together into one operator.
 #
 print("-------deploy graph-------")
 print(deploy_graph.ir())

 ######################################################################
 # Let us also peek into content of ``lib``.
 # Typically a compiled TVM CUDA module contains a host module(lib)
 # and a device module(``lib.imported_modules[0]``) that contains the CUDA code.
 # We print out the the generated device code here.
 # This is exactly a fused CUDA version of kernel that the graph points to.
 #
 print("-------deploy library-------")
 print(lib.imported_modules[0].get_source())

 ######################################################################
 # Deploy and Run
 # --------------
 # Now that we have have compiled module, let us run it.
 # We can use :any:`graph_runtime <tvm.contrib.graph_runtime.create>`
 # in tvm to create a deployable :any:`GraphModule <tvm.contrib.graph_runtime.GraphModule>`.
 # We can use the :any:`set_input <tvm.contrib.graph_runtime.GraphModule.set_input>`,
 # :any:`run <tvm.contrib.graph_runtime.GraphModule.run>` and
 # :any:`get_output <tvm.contrib.graph_runtime.GraphModule.get_output>` function
 # to set the input, execute the graph and get the output we need.
 #
 import tvm
 import numpy as np
 from tvm.contrib import graph_runtime, util

 module = graph_runtime.create(deploy_graph, lib, tvm.gpu(0))
 x_np = np.array([1, 2, 3, 4]).astype("float32")
 y_np = np.array([4, 4, 4, 4]).astype("float32")
 # set input to the graph module
 module.set_input(x=x_np, y=y_np)
 # run forward computation
 module.run()
 # get the first output
 out = module.get_output(0, out=tvm.nd.empty(shape))
 print(out.asnumpy())

 ######################################################################
 # Provide Model Parameters
 # ------------------------
 # Most deep learning models contains two types of inputs: parameters
 # that remains fixed during inference and data input that need to
 # change for each inference task. It is helpful to provide these
 # information to NNVM. Let us assume that ``y`` is the parameter
 # in our example. We can provide the model parameter information
 # by the params argument to :any:`nnvm.compiler.build`.
 #
 deploy_graph, lib, params = nnvm.compiler.build(
     compute_graph, target="cuda", shape={"x": shape}, params={"y": y_np})

 ######################################################################
 # This time we will need params value returned by :any:`nnvm.compiler.build`.
 # NNVM applys  optimization  to pre-compute the intermediate values in
 # the graph that can be determined by parameters. In this case
 # :math:`\sqrt{y}` can be pre-computed. The pre-computed values
 # are returned as new params. We can print out the new compiled library
 # to confirm that the fused kernel only now contains add.
 #
 print("-----optimized params-----")
 print(params)
 print("-------deploy library-------")
 print(lib.imported_modules[0].get_source())

 ######################################################################
 # Save the Deployed Module
 # ------------------------
 # We can save the ``deploy_graph``, ``lib`` and ``params`` separately
 # and load them back later. We can use :any:`tvm.module.Module` to export
 # the compiled library. ``deploy_graph`` is saved in json format and ``params``
 # is serialized into a bytearray.
 #
 temp = util.tempdir()
 path_lib = temp.relpath("deploy.so")
 lib.export_library(path_lib)
 with open(temp.relpath("deploy.json"), "w") as fo:
     fo.write(deploy_graph.json())
 with open(temp.relpath("deploy.params"), "wb") as fo:
     fo.write(nnvm.compiler.save_param_dict(params))
 print(temp.listdir())

 ######################################################################
 # We can load the module back.
 loaded_lib = tvm.module.load(path_lib)
 loaded_json = open(temp.relpath("deploy.json")).read()
 loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read())
 module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0))
 params = nnvm.compiler.load_param_dict(loaded_params)
 # directly load from byte array
 module.load_params(loaded_params)
 module.run(x=x_np)
 # get the first output
 out = module.get_output(0, out=tvm.nd.empty(shape))
 print(out.asnumpy())

 ######################################################################
 # Deploy using Another Language
 # -----------------------------
 # We use python in this example for demonstration.
 # We can also deploy the compiled modules with other languages
 # supported by TVM such as  c++, java, javascript.
 # The graph module itself is fully embedded in TVM runtime.
 #
 # The following block demonstrates how we can directly use TVM's
 # runtime API to execute the compiled module.
 # You can find similar runtime API in TVMRuntime of other languages.
 #
 fcreate = tvm.get_global_func("tvm.graph_runtime.create")
 ctx = tvm.gpu(0)
 gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id)
 set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"]
 set_input("x", tvm.nd.array(x_np))
 gmodule["load_params"](loaded_params)
 run()
 out = tvm.nd.empty(shape)
 get_output(0, out)
 print(out.asnumpy())
	"""
	Get Started with NNVM
	=====================
	Author: `Tianqi Chen <https://tqchen.github.io/>`_

	This article is an introductory tutorial to workflow in NNVM.
	"""
	import nnvm.compiler
	import nnvm.symbol as sym

	######################################################################
	# Declare Computation
	# -------------------
	# We start by describing our need using computational graph.
	# Most deep learning frameworks use computation graph to describe
	# their computation. In this example, we directly use
	# NNVM's API to construct the computational graph.
	#
	# .. note::
	#
	# In a typical deep learning compilation workflow,
	# we can get the models from :any:`nnvm.frontend`
	#
	# The following code snippet describes :math:`z = x + \sqrt{y}`
	# and creates a nnvm graph from the description.
	# We can print out the graph ir to check the graph content.

	x = sym.Variable("x")
	y = sym.Variable("y")
	z = sym.elemwise_add(x, sym.sqrt(y))
	compute_graph = nnvm.graph.create(z)
	print("-------compute graph-------")
	print(compute_graph.ir())

	######################################################################
	# Compile
	# -------
	# We can call :any:`nnvm.compiler.build` to compile the graph.
	# The build function takes a shape parameter which specifies the
	# input shape requirement. Here we only need to pass in shape of ``x``
	# and the other one will be inferred automatically by NNVM.
	#
	# The function returns three values. ``deploy_graph`` contains
	# the final compiled graph structure. ``lib`` is a :any:`tvm.module.Module`
	# that contains compiled CUDA functions. We do not need the ``params``
	# in this case.
	shape = (4,)
	deploy_graph, lib, params = nnvm.compiler.build(
	compute_graph, target="cuda", shape={"x": shape}, dtype="float32")

	######################################################################
	# We can print out the IR of ``deploy_graph`` to understand what just
	# happened under the hood. We can find that ``deploy_graph`` only
	# contains a single operator ``tvm_op``. This is because NNVM
	# automatically fused the operator together into one operator.
	#
	print("-------deploy graph-------")
	print(deploy_graph.ir())

	######################################################################
	# Let us also peek into content of ``lib``.
	# Typically a compiled TVM CUDA module contains a host module(lib)
	# and a device module(``lib.imported_modules[0]``) that contains the CUDA code.
	# We print out the the generated device code here.
	# This is exactly a fused CUDA version of kernel that the graph points to.
	#
	print("-------deploy library-------")
	print(lib.imported_modules[0].get_source())

	######################################################################
	# Deploy and Run
	# --------------
	# Now that we have have compiled module, let us run it.
	# We can use :any:`graph_runtime <tvm.contrib.graph_runtime.create>`
	# in tvm to create a deployable :any:`GraphModule <tvm.contrib.graph_runtime.GraphModule>`.
	# We can use the :any:`set_input <tvm.contrib.graph_runtime.GraphModule.set_input>`,
	# :any:`run <tvm.contrib.graph_runtime.GraphModule.run>` and
	# :any:`get_output <tvm.contrib.graph_runtime.GraphModule.get_output>` function
	# to set the input, execute the graph and get the output we need.
	#
	import tvm
	import numpy as np
	from tvm.contrib import graph_runtime, util

	module = graph_runtime.create(deploy_graph, lib, tvm.gpu(0))
	x_np = np.array([1, 2, 3, 4]).astype("float32")
	y_np = np.array([4, 4, 4, 4]).astype("float32")
	# set input to the graph module
	module.set_input(x=x_np, y=y_np)
	# run forward computation
	module.run()
	# get the first output
	out = module.get_output(0, out=tvm.nd.empty(shape))
	print(out.asnumpy())

	######################################################################
	# Provide Model Parameters
	# ------------------------
	# Most deep learning models contains two types of inputs: parameters
	# that remains fixed during inference and data input that need to
	# change for each inference task. It is helpful to provide these
	# information to NNVM. Let us assume that ``y`` is the parameter
	# in our example. We can provide the model parameter information
	# by the params argument to :any:`nnvm.compiler.build`.
	#
	deploy_graph, lib, params = nnvm.compiler.build(
	compute_graph, target="cuda", shape={"x": shape}, params={"y": y_np})

	######################################################################
	# This time we will need params value returned by :any:`nnvm.compiler.build`.
	# NNVM applys optimization to pre-compute the intermediate values in
	# the graph that can be determined by parameters. In this case
	# :math:`\sqrt{y}` can be pre-computed. The pre-computed values
	# are returned as new params. We can print out the new compiled library
	# to confirm that the fused kernel only now contains add.
	#
	print("-----optimized params-----")
	print(params)
	print("-------deploy library-------")
	print(lib.imported_modules[0].get_source())

	######################################################################
	# Save the Deployed Module
	# ------------------------
	# We can save the ``deploy_graph``, ``lib`` and ``params`` separately
	# and load them back later. We can use :any:`tvm.module.Module` to export
	# the compiled library. ``deploy_graph`` is saved in json format and ``params``
	# is serialized into a bytearray.
	#
	temp = util.tempdir()
	path_lib = temp.relpath("deploy.so")
	lib.export_library(path_lib)
	with open(temp.relpath("deploy.json"), "w") as fo:
	fo.write(deploy_graph.json())
	with open(temp.relpath("deploy.params"), "wb") as fo:
	fo.write(nnvm.compiler.save_param_dict(params))
	print(temp.listdir())

	######################################################################
	# We can load the module back.
	loaded_lib = tvm.module.load(path_lib)
	loaded_json = open(temp.relpath("deploy.json")).read()
	loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read())
	module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0))
	params = nnvm.compiler.load_param_dict(loaded_params)
	# directly load from byte array
	module.load_params(loaded_params)
	module.run(x=x_np)
	# get the first output
	out = module.get_output(0, out=tvm.nd.empty(shape))
	print(out.asnumpy())

	######################################################################
	# Deploy using Another Language
	# -----------------------------
	# We use python in this example for demonstration.
	# We can also deploy the compiled modules with other languages
	# supported by TVM such as c++, java, javascript.
	# The graph module itself is fully embedded in TVM runtime.
	#
	# The following block demonstrates how we can directly use TVM's
	# runtime API to execute the compiled module.
	# You can find similar runtime API in TVMRuntime of other languages.
	#
	fcreate = tvm.get_global_func("tvm.graph_runtime.create")
	ctx = tvm.gpu(0)
	gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id)
	set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"]
	set_input("x", tvm.nd.array(x_np))
	gmodule["load_params"](loaded_params)
	run()
	out = tvm.nd.empty(shape)
	get_output(0, out)
	print(out.asnumpy())