tutorials/nnvm_quick_start.py - tvm - Git at Google

 """
 .. _tutorial-nnvm-quick-start:

 Quick Start Tutorial for Compiling Deep Learning Models
 =======================================================
 **Author**: `Yao Wang <https://github.com/kevinthesun>`_

 This example shows how to build a neural network with NNVM python frontend and
 generate runtime library for Nvidia GPU with TVM.
 Notice that you need to build TVM with cuda and llvm enabled.
 """

 ######################################################################
 # Overview for Supported Hardware Backend of TVM
 # ----------------------------------------------
 # The image below shows hardware backend currently supported by TVM:
 #
 # .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tvm_support_list.png
 #      :align: center
 #      :scale: 100%
 #
 # In this tutorial, we'll choose cuda and llvm as target backends.
 # To begin with, let's import NNVM and TVM.

 import numpy as np

 import nnvm.compiler
 import nnvm.testing
 import tvm
 from tvm.contrib import graph_runtime

 ######################################################################
 # Define Neural Network in NNVM
 # -----------------------------
 # First, let's define a neural network with nnvm python frontend.
 # For simplicity, we'll use pre-defined resnet-18 network in NNVM.
 # Parameters are initialized with Xavier initializer.
 # NNVM also supports other model formats such as MXNet, CoreML, ONNX and
 # Tensorflow.
 #
 # In this tutorial, we assume we will do inference on our device
 # and the batch size is set to be 1. Input images are RGB color
 # images of size 224 * 224. We can call the :any:`nnvm.symbol.debug_str`
 # to show the network structure.

 batch_size = 1
 num_class = 1000
 image_shape = (3, 224, 224)
 data_shape = (batch_size,) + image_shape
 out_shape = (batch_size, num_class)

 net, params = nnvm.testing.resnet.get_workload(
     num_layers=18, batch_size=batch_size, image_shape=image_shape)
 print(net.debug_str())

 ######################################################################
 # Compilation
 # -----------
 # Next step is to compile the model using the NNVM/TVM pipeline.
 # Users can specify the optimization level of the compilation.
 # Currently this value can be 0 to 3. The optimization passes include
 # operator fusion, pre-computation, layout transformation and so on.
 #
 # :any:`nnvm.compiler.build` returns three components: the execution graph in
 # json format, the TVM module library of compiled functions specifically
 # for this graph on the target hardware, and the parameter blobs of
 # the model. During the compilation, NNVM does the graph-level
 # optimization while TVM does the tensor-level optimization, resulting
 # in an optimized runtime module for model serving.
 #
 # We'll first compile for Nvidia GPU. Behind the scene, `nnvm.compiler.build`
 # first does a number of graph-level optimizations, e.g. pruning, fusing, etc.,
 # then registers the operators (i.e. the nodes of the optimized graphs) to
 # TVM implementations to generate a `tvm.module`.
 # To generate the module library, TVM will first transfer the High level IR
 # into the lower intrinsic IR of the specified target backend, which is CUDA
 # in this example. Then the machine code will be generated as the module library.

 opt_level = 3
 target = tvm.target.cuda()
 with nnvm.compiler.build_config(opt_level=opt_level):
     graph, lib, params = nnvm.compiler.build(
         net, target, shape={"data": data_shape}, params=params)

 #####################################################################
 # Run the generate library
 # ------------------------
 # Now we can create graph runtime and run the module on Nvidia GPU.

 # create random input
 ctx = tvm.gpu()
 data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
 # create module
 module = graph_runtime.create(graph, lib, ctx)
 # set input and parameters
 module.set_input("data", data)
 module.set_input(**params)
 # run
 module.run()
 # get output
 out = module.get_output(0, tvm.nd.empty(out_shape))
 # convert to numpy
 out.asnumpy()

 # Print first 10 elements of output
 print(out.asnumpy().flatten()[0:10])

 ######################################################################
 # Save and Load Compiled Module
 # -----------------------------
 # We can also save the graph, lib and parameters into files and load them
 # back in deploy environment.

 ####################################################

 # save the graph, lib and params into separate files
 from tvm.contrib import util

 temp = util.tempdir()
 path_lib = temp.relpath("deploy_lib.tar")
 lib.export_library(path_lib)
 with open(temp.relpath("deploy_graph.json"), "w") as fo:
     fo.write(graph.json())
 with open(temp.relpath("deploy_param.params"), "wb") as fo:
     fo.write(nnvm.compiler.save_param_dict(params))
 print(temp.listdir())

 ####################################################

 # load the module back.
 loaded_json = open(temp.relpath("deploy_graph.json")).read()
 loaded_lib = tvm.module.load(path_lib)
 loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
 input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))

 module = graph_runtime.create(loaded_json, loaded_lib, ctx)
 module.load_params(loaded_params)
 module.run(data=input_data)
 out = module.get_output(0).asnumpy()
	"""
	.. _tutorial-nnvm-quick-start:

	Quick Start Tutorial for Compiling Deep Learning Models
	=======================================================
	Author: `Yao Wang <https://github.com/kevinthesun>`_

	This example shows how to build a neural network with NNVM python frontend and
	generate runtime library for Nvidia GPU with TVM.
	Notice that you need to build TVM with cuda and llvm enabled.
	"""

	######################################################################
	# Overview for Supported Hardware Backend of TVM
	# ----------------------------------------------
	# The image below shows hardware backend currently supported by TVM:
	#
	# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tvm_support_list.png
	# :align: center
	# :scale: 100%
	#
	# In this tutorial, we'll choose cuda and llvm as target backends.
	# To begin with, let's import NNVM and TVM.

	import numpy as np

	import nnvm.compiler
	import nnvm.testing
	import tvm
	from tvm.contrib import graph_runtime

	######################################################################
	# Define Neural Network in NNVM
	# -----------------------------
	# First, let's define a neural network with nnvm python frontend.
	# For simplicity, we'll use pre-defined resnet-18 network in NNVM.
	# Parameters are initialized with Xavier initializer.
	# NNVM also supports other model formats such as MXNet, CoreML, ONNX and
	# Tensorflow.
	#
	# In this tutorial, we assume we will do inference on our device
	# and the batch size is set to be 1. Input images are RGB color
	# images of size 224 * 224. We can call the :any:`nnvm.symbol.debug_str`
	# to show the network structure.

	batch_size = 1
	num_class = 1000
	image_shape = (3, 224, 224)
	data_shape = (batch_size,) + image_shape
	out_shape = (batch_size, num_class)

	net, params = nnvm.testing.resnet.get_workload(
	num_layers=18, batch_size=batch_size, image_shape=image_shape)
	print(net.debug_str())

	######################################################################
	# Compilation
	# -----------
	# Next step is to compile the model using the NNVM/TVM pipeline.
	# Users can specify the optimization level of the compilation.
	# Currently this value can be 0 to 3. The optimization passes include
	# operator fusion, pre-computation, layout transformation and so on.
	#
	# :any:`nnvm.compiler.build` returns three components: the execution graph in
	# json format, the TVM module library of compiled functions specifically
	# for this graph on the target hardware, and the parameter blobs of
	# the model. During the compilation, NNVM does the graph-level
	# optimization while TVM does the tensor-level optimization, resulting
	# in an optimized runtime module for model serving.
	#
	# We'll first compile for Nvidia GPU. Behind the scene, `nnvm.compiler.build`
	# first does a number of graph-level optimizations, e.g. pruning, fusing, etc.,
	# then registers the operators (i.e. the nodes of the optimized graphs) to
	# TVM implementations to generate a `tvm.module`.
	# To generate the module library, TVM will first transfer the High level IR
	# into the lower intrinsic IR of the specified target backend, which is CUDA
	# in this example. Then the machine code will be generated as the module library.

	opt_level = 3
	target = tvm.target.cuda()
	with nnvm.compiler.build_config(opt_level=opt_level):
	graph, lib, params = nnvm.compiler.build(
	net, target, shape={"data": data_shape}, params=params)

	#####################################################################
	# Run the generate library
	# ------------------------
	# Now we can create graph runtime and run the module on Nvidia GPU.

	# create random input
	ctx = tvm.gpu()
	data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
	# create module
	module = graph_runtime.create(graph, lib, ctx)
	# set input and parameters
	module.set_input("data", data)
	module.set_input(**params)
	# run
	module.run()
	# get output
	out = module.get_output(0, tvm.nd.empty(out_shape))
	# convert to numpy
	out.asnumpy()

	# Print first 10 elements of output
	print(out.asnumpy().flatten()[0:10])

	######################################################################
	# Save and Load Compiled Module
	# -----------------------------
	# We can also save the graph, lib and parameters into files and load them
	# back in deploy environment.

	####################################################

	# save the graph, lib and params into separate files
	from tvm.contrib import util

	temp = util.tempdir()
	path_lib = temp.relpath("deploy_lib.tar")
	lib.export_library(path_lib)
	with open(temp.relpath("deploy_graph.json"), "w") as fo:
	fo.write(graph.json())
	with open(temp.relpath("deploy_param.params"), "wb") as fo:
	fo.write(nnvm.compiler.save_param_dict(params))
	print(temp.listdir())

	####################################################

	# load the module back.
	loaded_json = open(temp.relpath("deploy_graph.json")).read()
	loaded_lib = tvm.module.load(path_lib)
	loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
	input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))

	module = graph_runtime.create(loaded_json, loaded_lib, ctx)
	module.load_params(loaded_params)
	module.run(data=input_data)
	out = module.get_output(0).asnumpy()