tutorials/frontend/deploy_detection.py - tvm-vta - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 """
 Deploy Pretrained Vision Detection Model from Darknet on VTA
 ============================================================
 **Author**: `Hua Jiang <https://github.com/huajsj>`_

 This tutorial provides an end-to-end demo, on how to run Darknet YoloV3-tiny
 inference onto the VTA accelerator design to perform Image detection tasks.
 It showcases Relay as a front end compiler that can perform quantization (VTA
 only supports int8/32 inference) as well as graph packing (in order to enable
 tensorization in the core) to massage the compute graph for the hardware target.
 """

 ######################################################################
 # Install dependencies
 # --------------------
 # To use the autotvm package in tvm, we need to install some extra dependencies.
 # (change "3" to "2" if you use python2):
 #
 # .. code-block:: bash
 #
 # pip3 install "Pillow<7"
 #
 # YOLO-V3-tiny Model with Darknet parsing have dependancy with CFFI and CV2 library,
 # we need to install CFFI and CV2 before executing this script.
 #
 # pip3 install "Pillow<7"
 #
 # pip3 install cffi
 # pip3 install opencv-python
 #
 # Now return to the python code. Import packages.

 from __future__ import absolute_import, print_function

 import sys
 import os
 import time
 import matplotlib.pyplot as plt
 import numpy as np
 import tvm
 import vta
 from tvm import rpc, autotvm, relay
 from tvm.relay.testing import yolo_detection, darknet
 from tvm.relay.testing.darknet import __darknetffi__
 from tvm.contrib import graph_runtime, graph_runtime, util
 from tvm.contrib.download import download_testdata
 from vta.testing import simulator
 from vta.top import graph_pack
 # Make sure that TVM was compiled with RPC=1
 assert tvm.runtime.enabled("rpc")

 ##############################################################################
 # Download yolo net configure file, weight file, darknet library file based on
 # Model Name
 # ----------------------------------------------------------------------------
 MODEL_NAME = 'yolov3-tiny'
 REPO_URL = 'https://github.com/dmlc/web-data/blob/master/darknet/'

 cfg_path = download_testdata('https://github.com/pjreddie/darknet/blob/master/cfg/'
                              + MODEL_NAME + '.cfg' + '?raw=true',
                              MODEL_NAME + '.cfg',
                              module="darknet")
 weights_path = download_testdata('https://pjreddie.com/media/files/'
                                  + MODEL_NAME + '.weights' + '?raw=true',
                                  MODEL_NAME + '.weights',
                                  module="darknet")

 if sys.platform in ['linux', 'linux2']:
     darknet_lib_path = download_testdata(REPO_URL + 'lib/' + 'libdarknet2.0.so' + '?raw=true',
                                          'libdarknet2.0.so',
                                          module="darknet")
 elif sys.platform == 'darwin':
     darknet_lib_path = download_testdata(REPO_URL+'lib_osx/'+'libdarknet_mac2.0.so'+'?raw=true',
                                          'libdarknet_mac2.0.so',
                                          module="darknet")
 else:
     raise NotImplementedError("Darknet lib is not supported on {} platform"
                               .format(sys.platform))

 ##################################################
 # Download yolo categories and illustration front.
 # ------------------------------------------------
 coco_path = download_testdata(REPO_URL + 'data/' + 'coco.names' + '?raw=true',
                               'coco.names',
                               module='data')
 font_path = download_testdata(REPO_URL + 'data/' + 'arial.ttf' + '?raw=true',
                               'arial.ttf',
                               module='data')
 with open(coco_path) as f:
     content = f.readlines()
 names = [x.strip() for x in content]

 ########################################
 # Define the platform and model targets.
 # --------------------------------------
 # Execute on CPU vs. VTA, and define the model.

 # Load VTA parameters from the vta/config/vta_config.json file
 env = vta.get_env()
 # Set ``device=arm_cpu`` to run inference on the CPU
 # or ``device=vta`` to run inference on the FPGA.
 device = "vta"
 target = env.target if device == "vta" else env.target_vta_cpu

 pack_dict = {
     "yolov3-tiny": ["nn.max_pool2d", "cast", 4, 185],
 }

 # Name of Darknet model to compile
 # The ``start_pack`` and ``stop_pack`` labels indicate where
 # to start and end the graph packing relay pass: in other words
 # where to start and finish offloading to VTA.
 # the number 4 indicate the the ``start_pack`` index is 4, the
 # number 185 indicate the ``stop_pack index`` is 185, by using
 # name and index number, here we can located to correct place
 # where to start/end when there are multiple ``nn.max_pool2d``
 # or ``cast``, print(mod.astext(show_meta_data=False)) can help
 # to find operator name and index information.
 assert MODEL_NAME in pack_dict

 #############################
 # Obtain an execution remote.
 # ---------------------------
 # When target is 'pynq' or other FPGA backend, reconfigure FPGA and runtime.
 # Otherwise, if target is 'sim', execute locally.

 if env.TARGET not in ["sim", "tsim"]:
     # Get remote from tracker node if environment variable is set.
     # To set up the tracker, you'll need to follow the "Auto-tuning
     # a convolutional network for VTA" tutorial.
     tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
     tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
     # Otherwise if you have a device you want to program directly from
     # the host, make sure you've set the variables below to the IP of
     # your board.
     device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99")
     device_port = os.environ.get("VTA_RPC_PORT", "9091")
     if not tracker_host or not tracker_port:
         remote = rpc.connect(device_host, int(device_port))
     else:
         remote = autotvm.measure.request_remote(env.TARGET,
                                                 tracker_host,
                                                 int(tracker_port),
                                                 timeout=10000)
     # Reconfigure the JIT runtime and FPGA.
     # You can program the FPGA with your own custom bitstream
     # by passing the path to the bitstream file instead of None.
     reconfig_start = time.time()
     vta.reconfig_runtime(remote)
     vta.program_fpga(remote, bitstream=None)
     reconfig_time = time.time() - reconfig_start
     print("Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))

 # In simulation mode, host the RPC server locally.
 else:
     remote = rpc.LocalSession()

 # Get execution context from remote
 ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

 ####################################
 # Build the inference graph runtime.
 # ----------------------------------
 # Using Darknet library load downloaded vision model and compile with Relay.
 # The compilation steps are:
 #
 # 1. Front end translation from Darknet into Relay module.
 # 2. Apply 8-bit quantization: here we skip the first conv layer,
 #    and dense layer which will both be executed in fp32 on the CPU.
 # 3. Perform graph packing to alter the data layout for tensorization.
 # 4. Perform constant folding to reduce number of operators (e.g. eliminate batch norm multiply).
 # 5. Perform relay build to object file.
 # 6. Load the object file onto remote (FPGA device).
 # 7. Generate graph runtime, `m`.
 #

 # Load pre-configured AutoTVM schedules
 with autotvm.tophub.context(target):
     net = __darknetffi__.dlopen(darknet_lib_path).load_network(cfg_path.encode('utf-8'),
                                                                weights_path.encode('utf-8'),
                                                                0)
     dshape = (env.BATCH, net.c, net.h, net.w)
     dtype = 'float32'

     # Measure build start time
     build_start = time.time()

     # Start front end compilation
     mod, params = relay.frontend.from_darknet(net, dtype=dtype, shape=dshape)

     if target.device_name == "vta":
     # Perform quantization in Relay
     # Note: We set opt_level to 3 in order to fold batch norm
         with relay.build_config(opt_level=3):
             with relay.quantize.qconfig(global_scale=33.0,
                                         skip_conv_layers=[0],
                                         store_lowbit_output=True,
                                         round_for_shift=True):
                 mod = relay.quantize.quantize(mod, params=params)
             # Perform graph packing and constant folding for VTA target
             mod = graph_pack(
                 mod["main"],
                 env.BATCH,
                 env.BLOCK_OUT,
                 env.WGT_WIDTH,
                 start_name=pack_dict[MODEL_NAME][0],
                 stop_name=pack_dict[MODEL_NAME][1],
                 start_name_idx=pack_dict[MODEL_NAME][2],
                 stop_name_idx=pack_dict[MODEL_NAME][3])
     else:
         mod = mod["main"]

     # Compile Relay program with AlterOpLayout disabled
     with vta.build_config(disabled_pass={"AlterOpLayout"}):
         graph, lib, params = relay.build(
             mod,
             target=target,
             params=params,
             target_host=env.target_host)

     # Measure Relay build time
     build_time = time.time() - build_start
     print(MODEL_NAME + " inference graph built in {0:.2f}s!".format(build_time))

     # Send the inference library over to the remote RPC server
     temp = util.tempdir()
     lib.save(temp.relpath("graphlib.o"))
     remote.upload(temp.relpath("graphlib.o"))
     lib = remote.load_module("graphlib.o")

     # Graph runtime
     m = graph_runtime.create(graph, lib, ctx)

 ####################################
 # Perform image detection inference.
 # ----------------------------------
 # We run detect on an downloaded image
 # Download test image
 [neth, netw] = dshape[2:]
 test_image = 'person.jpg'
 img_url = REPO_URL + 'data/' + test_image + '?raw=true'
 img_path = download_testdata(img_url, test_image, "data")
 data = darknet.load_image(img_path, neth, netw).transpose(1, 2, 0)

 # Prepare test image for inference
 plt.imshow(data)
 plt.show()
 data = data.transpose((2, 0, 1))
 data = data[np.newaxis, :]
 data = np.repeat(data, env.BATCH, axis=0)

 # Set the network parameters and inputs
 m.set_input('data', data)
 m.set_input(**params)

 # Perform inference and gather execution statistics
 # More on: https://docs.tvm.ai/api/python/module.html#tvm.runtime.Module.time_evaluator
 num = 4 # number of times we run module for a single measurement
 rep = 3 # number of measurements (we derive std dev from this)
 timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)

 if env.TARGET in ["sim", "tsim"]:
     simulator.clear_stats()
     timer()
     sim_stats = simulator.stats()
     print("\nExecution statistics:")
     for k, v in sim_stats.items():
         # Since we execute the workload many times, we need to normalize stats
         # Note that there is always one warm up run
         # Therefore we divide the overall stats by (num * rep + 1)
         print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
 else:
     tcost = timer()
     std = np.std(tcost.results) * 1000
     mean = tcost.mean * 1000
     print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" % (mean, std, env.BATCH))
     print("Average per sample inference time: %.2fms" % (mean/env.BATCH))

 # Get detection results from out
 thresh = 0.5
 nms_thresh = 0.45
 tvm_out = []
 for i in range(2):
     layer_out = {}
     layer_out['type'] = 'Yolo'
     # Get the yolo layer attributes (n, out_c, out_h, out_w, classes, total)
     layer_attr = m.get_output(i*4+3).asnumpy()
     layer_out['biases'] = m.get_output(i*4+2).asnumpy()
     layer_out['mask'] = m.get_output(i*4+1).asnumpy()
     out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
                  layer_attr[2], layer_attr[3])
     layer_out['output'] = m.get_output(i*4).asnumpy().reshape(out_shape)
     layer_out['classes'] = layer_attr[4]
     tvm_out.append(layer_out)
     thresh = 0.560

 # Show detection results
 img = darknet.load_image_color(img_path)
 _, im_h, im_w = img.shape
 dets = yolo_detection.fill_network_boxes((netw, neth),
                                          (im_w, im_h),
                                          thresh,
                                          1,
                                          tvm_out)
 last_layer = net.layers[net.n - 1]
 yolo_detection.do_nms_sort(dets, last_layer.classes, nms_thresh)
 yolo_detection.draw_detections(font_path,
                                img,
                                dets,
                                thresh,
                                names,
                                last_layer.classes)
 plt.imshow(img.transpose(1, 2, 0))
 plt.show()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	"""
	Deploy Pretrained Vision Detection Model from Darknet on VTA
	============================================================
	Author: `Hua Jiang <https://github.com/huajsj>`_

	This tutorial provides an end-to-end demo, on how to run Darknet YoloV3-tiny
	inference onto the VTA accelerator design to perform Image detection tasks.
	It showcases Relay as a front end compiler that can perform quantization (VTA
	only supports int8/32 inference) as well as graph packing (in order to enable
	tensorization in the core) to massage the compute graph for the hardware target.
	"""

	######################################################################
	# Install dependencies
	# --------------------
	# To use the autotvm package in tvm, we need to install some extra dependencies.
	# (change "3" to "2" if you use python2):
	#
	# .. code-block:: bash
	#
	# pip3 install "Pillow<7"
	#
	# YOLO-V3-tiny Model with Darknet parsing have dependancy with CFFI and CV2 library,
	# we need to install CFFI and CV2 before executing this script.
	#
	# pip3 install "Pillow<7"
	#
	# pip3 install cffi
	# pip3 install opencv-python
	#
	# Now return to the python code. Import packages.

	from __future__ import absolute_import, print_function

	import sys
	import os
	import time
	import matplotlib.pyplot as plt
	import numpy as np
	import tvm
	import vta
	from tvm import rpc, autotvm, relay
	from tvm.relay.testing import yolo_detection, darknet
	from tvm.relay.testing.darknet import __darknetffi__
	from tvm.contrib import graph_runtime, graph_runtime, util
	from tvm.contrib.download import download_testdata
	from vta.testing import simulator
	from vta.top import graph_pack
	# Make sure that TVM was compiled with RPC=1
	assert tvm.runtime.enabled("rpc")

	##############################################################################
	# Download yolo net configure file, weight file, darknet library file based on
	# Model Name
	# ----------------------------------------------------------------------------
	MODEL_NAME = 'yolov3-tiny'
	REPO_URL = 'https://github.com/dmlc/web-data/blob/master/darknet/'

	cfg_path = download_testdata('https://github.com/pjreddie/darknet/blob/master/cfg/'
	+ MODEL_NAME + '.cfg' + '?raw=true',
	MODEL_NAME + '.cfg',
	module="darknet")
	weights_path = download_testdata('https://pjreddie.com/media/files/'
	+ MODEL_NAME + '.weights' + '?raw=true',
	MODEL_NAME + '.weights',
	module="darknet")

	if sys.platform in ['linux', 'linux2']:
	darknet_lib_path = download_testdata(REPO_URL + 'lib/' + 'libdarknet2.0.so' + '?raw=true',
	'libdarknet2.0.so',
	module="darknet")
	elif sys.platform == 'darwin':
	darknet_lib_path = download_testdata(REPO_URL+'lib_osx/'+'libdarknet_mac2.0.so'+'?raw=true',
	'libdarknet_mac2.0.so',
	module="darknet")
	else:
	raise NotImplementedError("Darknet lib is not supported on {} platform"
	.format(sys.platform))

	##################################################
	# Download yolo categories and illustration front.
	# ------------------------------------------------
	coco_path = download_testdata(REPO_URL + 'data/' + 'coco.names' + '?raw=true',
	'coco.names',
	module='data')
	font_path = download_testdata(REPO_URL + 'data/' + 'arial.ttf' + '?raw=true',
	'arial.ttf',
	module='data')
	with open(coco_path) as f:
	content = f.readlines()
	names = [x.strip() for x in content]

	########################################
	# Define the platform and model targets.
	# --------------------------------------
	# Execute on CPU vs. VTA, and define the model.

	# Load VTA parameters from the vta/config/vta_config.json file
	env = vta.get_env()
	# Set ``device=arm_cpu`` to run inference on the CPU
	# or ``device=vta`` to run inference on the FPGA.
	device = "vta"
	target = env.target if device == "vta" else env.target_vta_cpu

	pack_dict = {
	"yolov3-tiny": ["nn.max_pool2d", "cast", 4, 185],
	}

	# Name of Darknet model to compile
	# The ``start_pack`` and ``stop_pack`` labels indicate where
	# to start and end the graph packing relay pass: in other words
	# where to start and finish offloading to VTA.
	# the number 4 indicate the the ``start_pack`` index is 4, the
	# number 185 indicate the ``stop_pack index`` is 185, by using
	# name and index number, here we can located to correct place
	# where to start/end when there are multiple ``nn.max_pool2d``
	# or ``cast``, print(mod.astext(show_meta_data=False)) can help
	# to find operator name and index information.
	assert MODEL_NAME in pack_dict

	#############################
	# Obtain an execution remote.
	# ---------------------------
	# When target is 'pynq' or other FPGA backend, reconfigure FPGA and runtime.
	# Otherwise, if target is 'sim', execute locally.

	if env.TARGET not in ["sim", "tsim"]:
	# Get remote from tracker node if environment variable is set.
	# To set up the tracker, you'll need to follow the "Auto-tuning
	# a convolutional network for VTA" tutorial.
	tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
	tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
	# Otherwise if you have a device you want to program directly from
	# the host, make sure you've set the variables below to the IP of
	# your board.
	device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99")
	device_port = os.environ.get("VTA_RPC_PORT", "9091")
	if not tracker_host or not tracker_port:
	remote = rpc.connect(device_host, int(device_port))
	else:
	remote = autotvm.measure.request_remote(env.TARGET,
	tracker_host,
	int(tracker_port),
	timeout=10000)
	# Reconfigure the JIT runtime and FPGA.
	# You can program the FPGA with your own custom bitstream
	# by passing the path to the bitstream file instead of None.
	reconfig_start = time.time()
	vta.reconfig_runtime(remote)
	vta.program_fpga(remote, bitstream=None)
	reconfig_time = time.time() - reconfig_start
	print("Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))

	# In simulation mode, host the RPC server locally.
	else:
	remote = rpc.LocalSession()

	# Get execution context from remote
	ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

	####################################
	# Build the inference graph runtime.
	# ----------------------------------
	# Using Darknet library load downloaded vision model and compile with Relay.
	# The compilation steps are:
	#
	# 1. Front end translation from Darknet into Relay module.
	# 2. Apply 8-bit quantization: here we skip the first conv layer,
	# and dense layer which will both be executed in fp32 on the CPU.
	# 3. Perform graph packing to alter the data layout for tensorization.
	# 4. Perform constant folding to reduce number of operators (e.g. eliminate batch norm multiply).
	# 5. Perform relay build to object file.
	# 6. Load the object file onto remote (FPGA device).
	# 7. Generate graph runtime, `m`.
	#

	# Load pre-configured AutoTVM schedules
	with autotvm.tophub.context(target):
	net = __darknetffi__.dlopen(darknet_lib_path).load_network(cfg_path.encode('utf-8'),
	weights_path.encode('utf-8'),
	0)
	dshape = (env.BATCH, net.c, net.h, net.w)
	dtype = 'float32'

	# Measure build start time
	build_start = time.time()

	# Start front end compilation
	mod, params = relay.frontend.from_darknet(net, dtype=dtype, shape=dshape)

	if target.device_name == "vta":
	# Perform quantization in Relay
	# Note: We set opt_level to 3 in order to fold batch norm
	with relay.build_config(opt_level=3):
	with relay.quantize.qconfig(global_scale=33.0,
	skip_conv_layers=[0],
	store_lowbit_output=True,
	round_for_shift=True):
	mod = relay.quantize.quantize(mod, params=params)
	# Perform graph packing and constant folding for VTA target
	mod = graph_pack(
	mod["main"],
	env.BATCH,
	env.BLOCK_OUT,
	env.WGT_WIDTH,
	start_name=pack_dict[MODEL_NAME][0],
	stop_name=pack_dict[MODEL_NAME][1],
	start_name_idx=pack_dict[MODEL_NAME][2],
	stop_name_idx=pack_dict[MODEL_NAME][3])
	else:
	mod = mod["main"]

	# Compile Relay program with AlterOpLayout disabled
	with vta.build_config(disabled_pass={"AlterOpLayout"}):
	graph, lib, params = relay.build(
	mod,
	target=target,
	params=params,
	target_host=env.target_host)

	# Measure Relay build time
	build_time = time.time() - build_start
	print(MODEL_NAME + " inference graph built in {0:.2f}s!".format(build_time))

	# Send the inference library over to the remote RPC server
	temp = util.tempdir()
	lib.save(temp.relpath("graphlib.o"))
	remote.upload(temp.relpath("graphlib.o"))
	lib = remote.load_module("graphlib.o")

	# Graph runtime
	m = graph_runtime.create(graph, lib, ctx)

	####################################
	# Perform image detection inference.
	# ----------------------------------
	# We run detect on an downloaded image
	# Download test image
	[neth, netw] = dshape[2:]
	test_image = 'person.jpg'
	img_url = REPO_URL + 'data/' + test_image + '?raw=true'
	img_path = download_testdata(img_url, test_image, "data")
	data = darknet.load_image(img_path, neth, netw).transpose(1, 2, 0)

	# Prepare test image for inference
	plt.imshow(data)
	plt.show()
	data = data.transpose((2, 0, 1))
	data = data[np.newaxis, :]
	data = np.repeat(data, env.BATCH, axis=0)

	# Set the network parameters and inputs
	m.set_input('data', data)
	m.set_input(**params)

	# Perform inference and gather execution statistics
	# More on: https://docs.tvm.ai/api/python/module.html#tvm.runtime.Module.time_evaluator
	num = 4 # number of times we run module for a single measurement
	rep = 3 # number of measurements (we derive std dev from this)
	timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)

	if env.TARGET in ["sim", "tsim"]:
	simulator.clear_stats()
	timer()
	sim_stats = simulator.stats()
	print("\nExecution statistics:")
	for k, v in sim_stats.items():
	# Since we execute the workload many times, we need to normalize stats
	# Note that there is always one warm up run
	# Therefore we divide the overall stats by (num * rep + 1)
	print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
	else:
	tcost = timer()
	std = np.std(tcost.results) * 1000
	mean = tcost.mean * 1000
	print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" % (mean, std, env.BATCH))
	print("Average per sample inference time: %.2fms" % (mean/env.BATCH))

	# Get detection results from out
	thresh = 0.5
	nms_thresh = 0.45
	tvm_out = []
	for i in range(2):
	layer_out = {}
	layer_out['type'] = 'Yolo'
	# Get the yolo layer attributes (n, out_c, out_h, out_w, classes, total)
	layer_attr = m.get_output(i*4+3).asnumpy()
	layer_out['biases'] = m.get_output(i*4+2).asnumpy()
	layer_out['mask'] = m.get_output(i*4+1).asnumpy()
	out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
	layer_attr[2], layer_attr[3])
	layer_out['output'] = m.get_output(i*4).asnumpy().reshape(out_shape)
	layer_out['classes'] = layer_attr[4]
	tvm_out.append(layer_out)
	thresh = 0.560

	# Show detection results
	img = darknet.load_image_color(img_path)
	_, im_h, im_w = img.shape
	dets = yolo_detection.fill_network_boxes((netw, neth),
	(im_w, im_h),
	thresh,
	1,
	tvm_out)
	last_layer = net.layers[net.n - 1]
	yolo_detection.do_nms_sort(dets, last_layer.classes, nms_thresh)
	yolo_detection.draw_detections(font_path,
	img,
	dets,
	thresh,
	names,
	last_layer.classes)
	plt.imshow(img.transpose(1, 2, 0))
	plt.show()