vta/tutorials/resnet.py - tvm - Git at Google

 """
 ResNet Inference Example
 ========================
 **Author**: `Thierry Moreau <https://homes.cs.washington.edu/~moreau/>`_

 This tutorial provides an end-to-end demo, on how to run ResNet-18 inference
 onto the VTA accelerator design to perform ImageNet classification tasks.

 """

 ######################################################################
 # Import Libraries
 # ----------------
 # We start by importing the tvm, vta, nnvm libraries to run this example.

 from __future__ import absolute_import, print_function

 import os
 import time
 from io import BytesIO

 import numpy as np
 import requests
 from matplotlib import pyplot as plt
 from PIL import Image

 import tvm
 from tvm import rpc, autotvm
 from tvm.contrib import graph_runtime, util
 from tvm.contrib.download import download
 import nnvm.compiler
 import vta
 import vta.testing

 # Load VTA parameters from the vta/config/vta_config.json file
 env = vta.get_env()

 # Helper to crop an image to a square (224, 224)
 # Takes in an Image object, returns an Image object
 def thumbnailify(image, pad=15):
     w, h = image.size
     crop = ((w-h)//2+pad, pad, h+(w-h)//2-pad, h-pad)
     image = image.crop(crop)
     image = image.resize((224, 224))
     return image

 # Helper function to read in image
 # Takes in Image object, returns an ND array
 def process_image(image):
     # Convert to neural network input format
     image = np.array(image) - np.array([123., 117., 104.])
     image /= np.array([58.395, 57.12, 57.375])
     image = image.transpose((2, 0, 1))
     image = image[np.newaxis, :]

     return tvm.nd.array(image.astype("float32"))

 # Classification helper function
 # Takes in the graph runtime, and an image, and returns top result and time
 def classify(m, image):
     m.set_input('data', image)
     timer = m.module.time_evaluator("run", ctx, number=1)
     tcost = timer()
     tvm_output = m.get_output(0)
     top = np.argmax(tvm_output.asnumpy()[0])
     tcost = "t={0:.2f}s".format(tcost.mean)
     return tcost + " {}".format(synset[top])

 # Helper function to compile the NNVM graph
 # Takes in a path to a graph file, params file, and device target
 # Returns the NNVM graph object, a compiled library object, and the params dict
 def generate_graph(graph_fn, params_fn, device="vta"):
     # Measure build start time
     build_start = time.time()

     # Derive the TVM target
     target = tvm.target.create("llvm -device={}".format(device))

     # Derive the LLVM compiler flags
     # When targetting the Pynq, cross-compile to ARMv7 ISA
     if env.TARGET == "sim":
         target_host = "llvm"
     elif env.TARGET == "pynq":
         target_host = "llvm -mtriple=armv7-none-linux-gnueabihf -mcpu=cortex-a9 -mattr=+neon"

     # Load the ResNet-18 graph and parameters
     sym = nnvm.graph.load_json(open(graph_fn).read())
     params = nnvm.compiler.load_param_dict(open(params_fn, 'rb').read())

     # Populate the shape and data type dictionary
     shape_dict = {"data": (1, 3, 224, 224)}
     dtype_dict = {"data": 'float32'}
     shape_dict.update({k: v.shape for k, v in params.items()})
     dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

     # Apply NNVM graph optimization passes
     sym = vta.graph.clean_cast(sym)
     sym = vta.graph.clean_conv_fuse(sym)
     if target.device_name == "vta":
         assert env.BLOCK_IN == env.BLOCK_OUT
         sym = vta.graph.pack(sym, shape_dict, env.BATCH, env.BLOCK_OUT)

     # Compile NNVM graph
     with nnvm.compiler.build_config(opt_level=3):
         if target.device_name != "vta":
             graph, lib, params = nnvm.compiler.build(
                 sym, target, shape_dict, dtype_dict,
                 params=params, target_host=target_host)
         else:
             with vta.build_config():
                 graph, lib, params = nnvm.compiler.build(
                     sym, target, shape_dict, dtype_dict,
                     params=params, target_host=target_host)

     # Save the compiled inference graph library
     assert tvm.module.enabled("rpc")
     temp = util.tempdir()
     lib.save(temp.relpath("graphlib.o"))

     # Send the inference library over to the remote RPC server
     remote.upload(temp.relpath("graphlib.o"))
     lib = remote.load_module("graphlib.o")

     # Measure build time
     build_time = time.time() - build_start
     print("ResNet-18 inference graph built in {0:.2f}s!".format(build_time))

     return graph, lib, params


 ######################################################################
 # Download ResNet Model
 # --------------------------------------------
 # Download the necessary files to run ResNet-18.
 #

 # Obtain ResNet model and download them into _data dir
 url = "https://github.com/uwsaml/web-data/raw/master/vta/models/"
 categ_fn = 'synset.txt'
 graph_fn = 'resnet18_qt8.json'
 params_fn = 'resnet18_qt8.params'

 # Create data dir
 data_dir = "_data/"
 if not os.path.exists(data_dir):
     os.makedirs(data_dir)

 # Download files
 for file in [categ_fn, graph_fn, params_fn]:
     download(os.path.join(url, file), os.path.join(data_dir, file))

 # Read in ImageNet Categories
 synset = eval(open(os.path.join(data_dir, categ_fn)).read())

 # Download pre-tuned op parameters of conv2d for ARM CPU used in VTA
 autotvm.tophub.check_backend('vta')


 ######################################################################
 # Setup the Pynq Board's RPC Server
 # ---------------------------------
 # Build the RPC server's VTA runtime and program the Pynq FPGA.

 # Measure build start time
 reconfig_start = time.time()

 # We read the Pynq RPC host IP address and port number from the OS environment
 host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
 port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))

 # We configure both the bitstream and the runtime system on the Pynq
 # to match the VTA configuration specified by the vta_config.json file.
 if env.TARGET == "pynq":
     # Make sure that TVM was compiled with RPC=1
     assert tvm.module.enabled("rpc")
     remote = rpc.connect(host, port)

     # Reconfigure the JIT runtime
     vta.reconfig_runtime(remote)

     # Program the FPGA with a pre-compiled VTA bitstream.
     # You can program the FPGA with your own custom bitstream
     # by passing the path to the bitstream file instead of None.
     vta.program_fpga(remote, bitstream=None)

     # Report on reconfiguration time
     reconfig_time = time.time() - reconfig_start
     print("Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))

 # In simulation mode, host the RPC server locally.
 elif env.TARGET == "sim":
     remote = rpc.LocalSession()


 ######################################################################
 # Build the ResNet Runtime
 # ------------------------
 # Build the ResNet graph runtime, and configure the parameters.

 # Set ``device=vtacpu`` to run inference on the CPU
 # or ``device=vta`` to run inference on the FPGA.
 device = "vta"

 # Device context
 ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

 # Build the graph runtime
 graph, lib, params = generate_graph(os.path.join(data_dir, graph_fn),
                                     os.path.join(data_dir, params_fn),
                                     device)
 m = graph_runtime.create(graph, lib, ctx)

 # Set the parameters
 m.set_input(**params)

 ######################################################################
 # Run ResNet-18 inference on a sample image
 # -----------------------------------------
 # Perform image classification on test image.
 # You can change the test image URL to any image of your choosing.

 # Read in test image
 image_url = 'https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg'
 # Read in test image
 response = requests.get(image_url)
 image = Image.open(BytesIO(response.content)).resize((224, 224))
 # Show Image
 plt.imshow(image)
 plt.show()
 # Set the input
 image = process_image(image)
 m.set_input('data', image)

 # Perform inference
 timer = m.module.time_evaluator("run", ctx, number=1)
 tcost = timer()

 # Get classification results
 tvm_output = m.get_output(0)
 top_categories = np.argsort(tvm_output.asnumpy()[0])

 # Report top-5 classification results
 print("ResNet-18 Prediction #1:", synset[top_categories[-1]])
 print("                     #2:", synset[top_categories[-2]])
 print("                     #3:", synset[top_categories[-3]])
 print("                     #4:", synset[top_categories[-4]])
 print("                     #5:", synset[top_categories[-5]])
 print("Performed inference in {0:.2f}s".format(tcost.mean))


 ######################################################################
 # Run a Youtube Video Image Classifier
 # ------------------------------------
 # Perform image classification on test stream on 1 frame every 48 frames.
 # Comment the `if False:` out to run the demo

 # Early exit - remove for Demo
 if False:

     import cv2
     import pafy
     from IPython.display import clear_output

     # Helper to crop an image to a square (224, 224)
     # Takes in an Image object, returns an Image object
     def thumbnailify(image, pad=15):
         w, h = image.size
         crop = ((w-h)//2+pad, pad, h+(w-h)//2-pad, h-pad)
         image = image.crop(crop)
         image = image.resize((224, 224))
         return image

     # 16:16 inches
     plt.rcParams['figure.figsize'] = [16, 16]

     # Stream the video in
     url = "https://www.youtube.com/watch?v=PJlmYh27MHg&t=2s"
     video = pafy.new(url)
     best = video.getbest(preftype="mp4")
     cap = cv2.VideoCapture(best.url)

     # Process one frame out of every 48 for variety
     count = 0
     guess = ""
     while(count<2400):

         # Capture frame-by-frame
         ret, frame = cap.read()

         # Process one every 48 frames
         if count % 48 == 1:
             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             frame = Image.fromarray(frame)
             # Crop and resize
             thumb = np.array(thumbnailify(frame))
             image = process_image(thumb)
             guess = classify(m, image)

             # Insert guess in frame
             frame = cv2.rectangle(thumb,(0,0),(200,0),(0,0,0),50)
             cv2.putText(frame, guess, (5,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (256,256,256), 1, cv2.LINE_AA)

             plt.imshow(thumb)
             plt.axis('off')
             plt.show()
             if cv2.waitKey(1) & 0xFF == ord('q'):
                 break
             clear_output(wait=True)

         count += 1

     # When everything done, release the capture
     cap.release()
     cv2.destroyAllWindows()
	"""
	ResNet Inference Example
	========================
	Author: `Thierry Moreau <https://homes.cs.washington.edu/~moreau/>`_

	This tutorial provides an end-to-end demo, on how to run ResNet-18 inference
	onto the VTA accelerator design to perform ImageNet classification tasks.

	"""

	######################################################################
	# Import Libraries
	# ----------------
	# We start by importing the tvm, vta, nnvm libraries to run this example.

	from __future__ import absolute_import, print_function

	import os
	import time
	from io import BytesIO

	import numpy as np
	import requests
	from matplotlib import pyplot as plt
	from PIL import Image

	import tvm
	from tvm import rpc, autotvm
	from tvm.contrib import graph_runtime, util
	from tvm.contrib.download import download
	import nnvm.compiler
	import vta
	import vta.testing

	# Load VTA parameters from the vta/config/vta_config.json file
	env = vta.get_env()

	# Helper to crop an image to a square (224, 224)
	# Takes in an Image object, returns an Image object
	def thumbnailify(image, pad=15):
	w, h = image.size
	crop = ((w-h)//2+pad, pad, h+(w-h)//2-pad, h-pad)
	image = image.crop(crop)
	image = image.resize((224, 224))
	return image

	# Helper function to read in image
	# Takes in Image object, returns an ND array
	def process_image(image):
	# Convert to neural network input format
	image = np.array(image) - np.array([123., 117., 104.])
	image /= np.array([58.395, 57.12, 57.375])
	image = image.transpose((2, 0, 1))
	image = image[np.newaxis, :]

	return tvm.nd.array(image.astype("float32"))

	# Classification helper function
	# Takes in the graph runtime, and an image, and returns top result and time
	def classify(m, image):
	m.set_input('data', image)
	timer = m.module.time_evaluator("run", ctx, number=1)
	tcost = timer()
	tvm_output = m.get_output(0)
	top = np.argmax(tvm_output.asnumpy()[0])
	tcost = "t={0:.2f}s".format(tcost.mean)
	return tcost + " {}".format(synset[top])

	# Helper function to compile the NNVM graph
	# Takes in a path to a graph file, params file, and device target
	# Returns the NNVM graph object, a compiled library object, and the params dict
	def generate_graph(graph_fn, params_fn, device="vta"):
	# Measure build start time
	build_start = time.time()

	# Derive the TVM target
	target = tvm.target.create("llvm -device={}".format(device))

	# Derive the LLVM compiler flags
	# When targetting the Pynq, cross-compile to ARMv7 ISA
	if env.TARGET == "sim":
	target_host = "llvm"
	elif env.TARGET == "pynq":
	target_host = "llvm -mtriple=armv7-none-linux-gnueabihf -mcpu=cortex-a9 -mattr=+neon"

	# Load the ResNet-18 graph and parameters
	sym = nnvm.graph.load_json(open(graph_fn).read())
	params = nnvm.compiler.load_param_dict(open(params_fn, 'rb').read())

	# Populate the shape and data type dictionary
	shape_dict = {"data": (1, 3, 224, 224)}
	dtype_dict = {"data": 'float32'}
	shape_dict.update({k: v.shape for k, v in params.items()})
	dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

	# Apply NNVM graph optimization passes
	sym = vta.graph.clean_cast(sym)
	sym = vta.graph.clean_conv_fuse(sym)
	if target.device_name == "vta":
	assert env.BLOCK_IN == env.BLOCK_OUT
	sym = vta.graph.pack(sym, shape_dict, env.BATCH, env.BLOCK_OUT)

	# Compile NNVM graph
	with nnvm.compiler.build_config(opt_level=3):
	if target.device_name != "vta":
	graph, lib, params = nnvm.compiler.build(
	sym, target, shape_dict, dtype_dict,
	params=params, target_host=target_host)
	else:
	with vta.build_config():
	graph, lib, params = nnvm.compiler.build(
	sym, target, shape_dict, dtype_dict,
	params=params, target_host=target_host)

	# Save the compiled inference graph library
	assert tvm.module.enabled("rpc")
	temp = util.tempdir()
	lib.save(temp.relpath("graphlib.o"))

	# Send the inference library over to the remote RPC server
	remote.upload(temp.relpath("graphlib.o"))
	lib = remote.load_module("graphlib.o")

	# Measure build time
	build_time = time.time() - build_start
	print("ResNet-18 inference graph built in {0:.2f}s!".format(build_time))

	return graph, lib, params


	######################################################################
	# Download ResNet Model
	# --------------------------------------------
	# Download the necessary files to run ResNet-18.
	#

	# Obtain ResNet model and download them into _data dir
	url = "https://github.com/uwsaml/web-data/raw/master/vta/models/"
	categ_fn = 'synset.txt'
	graph_fn = 'resnet18_qt8.json'
	params_fn = 'resnet18_qt8.params'

	# Create data dir
	data_dir = "_data/"
	if not os.path.exists(data_dir):
	os.makedirs(data_dir)

	# Download files
	for file in [categ_fn, graph_fn, params_fn]:
	download(os.path.join(url, file), os.path.join(data_dir, file))

	# Read in ImageNet Categories
	synset = eval(open(os.path.join(data_dir, categ_fn)).read())

	# Download pre-tuned op parameters of conv2d for ARM CPU used in VTA
	autotvm.tophub.check_backend('vta')


	######################################################################
	# Setup the Pynq Board's RPC Server
	# ---------------------------------
	# Build the RPC server's VTA runtime and program the Pynq FPGA.

	# Measure build start time
	reconfig_start = time.time()

	# We read the Pynq RPC host IP address and port number from the OS environment
	host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
	port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))

	# We configure both the bitstream and the runtime system on the Pynq
	# to match the VTA configuration specified by the vta_config.json file.
	if env.TARGET == "pynq":
	# Make sure that TVM was compiled with RPC=1
	assert tvm.module.enabled("rpc")
	remote = rpc.connect(host, port)

	# Reconfigure the JIT runtime
	vta.reconfig_runtime(remote)

	# Program the FPGA with a pre-compiled VTA bitstream.
	# You can program the FPGA with your own custom bitstream
	# by passing the path to the bitstream file instead of None.
	vta.program_fpga(remote, bitstream=None)

	# Report on reconfiguration time
	reconfig_time = time.time() - reconfig_start
	print("Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))

	# In simulation mode, host the RPC server locally.
	elif env.TARGET == "sim":
	remote = rpc.LocalSession()


	######################################################################
	# Build the ResNet Runtime
	# ------------------------
	# Build the ResNet graph runtime, and configure the parameters.

	# Set ``device=vtacpu`` to run inference on the CPU
	# or ``device=vta`` to run inference on the FPGA.
	device = "vta"

	# Device context
	ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

	# Build the graph runtime
	graph, lib, params = generate_graph(os.path.join(data_dir, graph_fn),
	os.path.join(data_dir, params_fn),
	device)
	m = graph_runtime.create(graph, lib, ctx)

	# Set the parameters
	m.set_input(**params)

	######################################################################
	# Run ResNet-18 inference on a sample image
	# -----------------------------------------
	# Perform image classification on test image.
	# You can change the test image URL to any image of your choosing.

	# Read in test image
	image_url = 'https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg'
	# Read in test image
	response = requests.get(image_url)
	image = Image.open(BytesIO(response.content)).resize((224, 224))
	# Show Image
	plt.imshow(image)
	plt.show()
	# Set the input
	image = process_image(image)
	m.set_input('data', image)

	# Perform inference
	timer = m.module.time_evaluator("run", ctx, number=1)
	tcost = timer()

	# Get classification results
	tvm_output = m.get_output(0)
	top_categories = np.argsort(tvm_output.asnumpy()[0])

	# Report top-5 classification results
	print("ResNet-18 Prediction #1:", synset[top_categories[-1]])
	print(" #2:", synset[top_categories[-2]])
	print(" #3:", synset[top_categories[-3]])
	print(" #4:", synset[top_categories[-4]])
	print(" #5:", synset[top_categories[-5]])
	print("Performed inference in {0:.2f}s".format(tcost.mean))


	######################################################################
	# Run a Youtube Video Image Classifier
	# ------------------------------------
	# Perform image classification on test stream on 1 frame every 48 frames.
	# Comment the `if False:` out to run the demo

	# Early exit - remove for Demo
	if False:

	import cv2
	import pafy
	from IPython.display import clear_output

	# Helper to crop an image to a square (224, 224)
	# Takes in an Image object, returns an Image object
	def thumbnailify(image, pad=15):
	w, h = image.size
	crop = ((w-h)//2+pad, pad, h+(w-h)//2-pad, h-pad)
	image = image.crop(crop)
	image = image.resize((224, 224))
	return image

	# 16:16 inches
	plt.rcParams['figure.figsize'] = [16, 16]

	# Stream the video in
	url = "https://www.youtube.com/watch?v=PJlmYh27MHg&t=2s"
	video = pafy.new(url)
	best = video.getbest(preftype="mp4")
	cap = cv2.VideoCapture(best.url)

	# Process one frame out of every 48 for variety
	count = 0
	guess = ""
	while(count<2400):

	# Capture frame-by-frame
	ret, frame = cap.read()

	# Process one every 48 frames
	if count % 48 == 1:
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frame = Image.fromarray(frame)
	# Crop and resize
	thumb = np.array(thumbnailify(frame))
	image = process_image(thumb)
	guess = classify(m, image)

	# Insert guess in frame
	frame = cv2.rectangle(thumb,(0,0),(200,0),(0,0,0),50)
	cv2.putText(frame, guess, (5,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (256,256,256), 1, cv2.LINE_AA)

	plt.imshow(thumb)
	plt.axis('off')
	plt.show()
	if cv2.waitKey(1) & 0xFF == ord('q'):
	break
	clear_output(wait=True)

	count += 1

	# When everything done, release the capture
	cap.release()
	cv2.destroyAllWindows()