scripts/tune_resnet_nnvm.py - tvm-vta - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 """Perform ResNet autoTVM tuning on VTA using NNVM."""

 import argparse
 import os
 import time
 import numpy as np

 import tvm
 from tvm import rpc, autotvm
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.contrib import graph_runtime, util
 from tvm.contrib.download import download

 import topi
 import nnvm.compiler
 import vta
 import vta.testing

 env = vta.get_env()

 def register_vta_tuning_tasks():
     from tvm.autotvm.task.topi_integration import TaskExtractEnv, deserialize_args

     @tvm.tag_scope(tag=topi.tag.ELEMWISE)
     def my_clip(x, a_min, a_max):
         """Unlike topi's current clip, put min and max into two stages."""
         const_min = tvm.const(a_min, x.dtype)
         const_max = tvm.const(a_max, x.dtype)
         x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
         x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
         return x

     # init autotvm env to register VTA operator
     TaskExtractEnv()

     @autotvm.task.register("topi_nn_conv2d", override=True)
     def _topi_nn_conv2d(*args, **kwargs):
         assert not kwargs, "Do not support kwargs in template function call"
         args = deserialize_args(args)
         A, W = args[:2]

         with tvm.target.vta():
             res = topi.nn.conv2d(*args, **kwargs)
             res = topi.right_shift(res, 8)
             res = my_clip(res, 0, 127)
             res = topi.cast(res, "int8")

         if tvm.target.current_target().device_name == 'vta':
             s = topi.generic.schedule_conv2d_nchw([res])
         else:
             s = tvm.create_schedule([res.op])
         return s, [A, W, res]


 def generate_graph(sym, params, target, target_host):
     # Populate the shape and data type dictionary
     shape_dict = {"data": (1, 3, 224, 224)}
     dtype_dict = {"data": 'float32'}
     shape_dict.update({k: v.shape for k, v in params.items()})
     dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

     # Apply NNVM graph optimization passes
     sym = vta.graph.clean_cast(sym)
     sym = vta.graph.clean_conv_fuse(sym)
     assert env.BLOCK_IN == env.BLOCK_OUT
     sym = vta.graph.pack(sym, shape_dict, env.BATCH, env.BLOCK_OUT)

     # Compile NNVM graph
     with nnvm.compiler.build_config(opt_level=3):
         with vta.build_config():
             graph, lib, params = nnvm.compiler.build(
                 sym, target, shape_dict, dtype_dict,
                 params=params, target_host=target_host)

     return graph, lib, params


 def extract_tasks(sym, params, target, target_host):
     # Populate the shape and data type dictionary
     shape_dict = {"data": (1, 3, 224, 224)}
     dtype_dict = {"data": 'float32'}
     shape_dict.update({k: v.shape for k, v in params.items()})
     dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

     # Apply NNVM graph optimization passes
     sym = vta.graph.clean_cast(sym)
     sym = vta.graph.clean_conv_fuse(sym)
     assert env.BLOCK_IN == env.BLOCK_OUT
     sym = vta.graph.pack(sym, shape_dict, env.BATCH, env.BLOCK_OUT)

     with vta.build_config():
         tasks = autotvm.task.extract_from_graph(graph=sym, shape=shape_dict, dtype=dtype_dict, target=target,
                                                 params=params, symbols=(nnvm.sym.conv2d,), target_host=target_host)
     return tasks


 def download_model():
     url = "https://github.com/uwsaml/web-data/raw/master/vta/models/"
     categ_fn = 'synset.txt'
     graph_fn = 'resnet18_qt8.json'
     params_fn = 'resnet18_qt8.params'
     data_dir = '_data'
     if not os.path.exists(data_dir):
         os.makedirs(data_dir)

     for file in [categ_fn, graph_fn, params_fn]:
         if not os.path.isfile(file):
             download(os.path.join(url, file), os.path.join(data_dir, file))

     sym = nnvm.graph.load_json(open(os.path.join(data_dir, graph_fn)).read())
     params = nnvm.compiler.load_param_dict(open(os.path.join(data_dir, params_fn), 'rb').read())

     return sym, params


 def tune_tasks(tasks,
                measure_option,
                tuner='xgb',
                n_trial=1000,
                early_stopping=None,
                log_filename='tuning.log',
                use_transfer_learning=True,
                try_winograd=True):
     # create tmp log file
     tmp_log_file = log_filename + ".tmp"
     if os.path.exists(tmp_log_file):
         os.remove(tmp_log_file)

     for i, tsk in enumerate(reversed(tasks)):
         prefix = "[Task %2d/%2d] " % (i+1, len(tasks))

         # create tuner
         if tuner == 'xgb' or tuner == 'xgb-rank':
             tuner_obj = XGBTuner(tsk, loss_type='rank')
         elif tuner == 'ga':
             tuner_obj = GATuner(tsk, pop_size=50)
         elif tuner == 'random':
             tuner_obj = RandomTuner(tsk)
         elif tuner == 'gridsearch':
             tuner_obj = GridSearchTuner(tsk)
         else:
             raise ValueError("Invalid tuner: " + tuner)

         if use_transfer_learning:
             if os.path.isfile(tmp_log_file):
                 tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))

         # do tuning
         n_trial_ = min(n_trial, len(tsk.config_space))
         tuner_obj.tune(n_trial_,
                        early_stopping=early_stopping,
                        measure_option=measure_option,
                        callbacks=[
                            autotvm.callback.progress_bar(n_trial_, prefix=prefix),
                            autotvm.callback.log_to_file(tmp_log_file)])

     # pick best records to a cache file
     autotvm.record.pick_best(tmp_log_file, log_filename)
     os.remove(tmp_log_file)

 if __name__ == '__main__':

     # Get tracker info from env
     tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
     tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
     if not tracker_host or not tracker_port:
         print("Set your AutoTVM tracker node host and port variables to run the autotuner")
         exit()

     # Download model
     sym, params = download_model()

     # Register VTA tuning tasks
     register_vta_tuning_tasks()

     # Extract tasks
     print("Extracting tasks...")
     target = tvm.target.vta()
     target_host = env.target_host
     tasks = extract_tasks(sym, params, target, target_host)

     # Perform Autotuning
     print("Tuning...")
     tuning_opt = {
         'log_filename': 'resnet-18.log',

         'tuner': 'random',
         'n_trial': 1e9,
         'early_stopping': None,

         'measure_option':  autotvm.measure_option(
                 builder=autotvm.LocalBuilder(build_func=vta.vta_autotvm_build_func),
                 runner=autotvm.RPCRunner(env.TARGET, tracker_host, int(tracker_port),
                     number=4, repeat=3, timeout=60,
                     check_correctness=True))
     }
     tune_tasks(tasks, **tuning_opt)

     # compile kernels with history best records
     with autotvm.tophub.context(target, extra_files=[tuning_opt['log_filename']]):

         # ResNet parameters
         input_shape = (1, 3, 224, 224)
         dtype = 'float32'\

         # Compile network
         print("Compiling network with best tuning parameters...")
         graph, lib, params = generate_graph(sym, params, target, target_host)
         input_shape = (1, 3, 224, 224)
         dtype = 'float32'

         # Export library
         tmp = util.tempdir()
         filename = "net.tar"
         lib.export_library(tmp.relpath(filename))

         # Upload module to device
         print("Upload...")
         remote = autotvm.measure.request_remote(env.TARGET, tracker_host, tracker_port, timeout=10000)
         remote.upload(tmp.relpath(filename))
         rlib = remote.load_module(filename)

         # Upload parameters to device
         ctx = remote.context(str(target), 0)
         rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
         data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
         module = graph_runtime.create(graph, rlib, ctx)
         module.set_input('data', data_tvm)
         module.set_input(**rparams)

         # Evaluate
         print("Evaluate inference time cost...")
         ftimer = module.module.time_evaluator("run", ctx, number=3, repeat=3)
         prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
         print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
               (np.mean(prof_res), np.std(prof_res)))
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	"""Perform ResNet autoTVM tuning on VTA using NNVM."""

	import argparse
	import os
	import time
	import numpy as np

	import tvm
	from tvm import rpc, autotvm
	from tvm.autotvm.measure.measure_methods import request_remote
	from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
	from tvm.contrib import graph_runtime, util
	from tvm.contrib.download import download

	import topi
	import nnvm.compiler
	import vta
	import vta.testing

	env = vta.get_env()

	def register_vta_tuning_tasks():
	from tvm.autotvm.task.topi_integration import TaskExtractEnv, deserialize_args

	@tvm.tag_scope(tag=topi.tag.ELEMWISE)
	def my_clip(x, a_min, a_max):
	"""Unlike topi's current clip, put min and max into two stages."""
	const_min = tvm.const(a_min, x.dtype)
	const_max = tvm.const(a_max, x.dtype)
	x = tvm.compute(x.shape, lambda i: tvm.min(x(i), const_max), name="clipA")
	x = tvm.compute(x.shape, lambda i: tvm.max(x(i), const_min), name="clipB")
	return x

	# init autotvm env to register VTA operator
	TaskExtractEnv()

	@autotvm.task.register("topi_nn_conv2d", override=True)
	def _topi_nn_conv2d(args, *kwargs):
	assert not kwargs, "Do not support kwargs in template function call"
	args = deserialize_args(args)
	A, W = args[:2]

	with tvm.target.vta():
	res = topi.nn.conv2d(args, *kwargs)
	res = topi.right_shift(res, 8)
	res = my_clip(res, 0, 127)
	res = topi.cast(res, "int8")

	if tvm.target.current_target().device_name == 'vta':
	s = topi.generic.schedule_conv2d_nchw([res])
	else:
	s = tvm.create_schedule([res.op])
	return s, [A, W, res]



	def generate_graph(sym, params, target, target_host):
	# Populate the shape and data type dictionary
	shape_dict = {"data": (1, 3, 224, 224)}
	dtype_dict = {"data": 'float32'}
	shape_dict.update({k: v.shape for k, v in params.items()})
	dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

	# Apply NNVM graph optimization passes
	sym = vta.graph.clean_cast(sym)
	sym = vta.graph.clean_conv_fuse(sym)
	assert env.BLOCK_IN == env.BLOCK_OUT
	sym = vta.graph.pack(sym, shape_dict, env.BATCH, env.BLOCK_OUT)

	# Compile NNVM graph
	with nnvm.compiler.build_config(opt_level=3):
	with vta.build_config():
	graph, lib, params = nnvm.compiler.build(
	sym, target, shape_dict, dtype_dict,
	params=params, target_host=target_host)

	return graph, lib, params


	def extract_tasks(sym, params, target, target_host):
	# Populate the shape and data type dictionary
	shape_dict = {"data": (1, 3, 224, 224)}
	dtype_dict = {"data": 'float32'}
	shape_dict.update({k: v.shape for k, v in params.items()})
	dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

	# Apply NNVM graph optimization passes
	sym = vta.graph.clean_cast(sym)
	sym = vta.graph.clean_conv_fuse(sym)
	assert env.BLOCK_IN == env.BLOCK_OUT
	sym = vta.graph.pack(sym, shape_dict, env.BATCH, env.BLOCK_OUT)

	with vta.build_config():
	tasks = autotvm.task.extract_from_graph(graph=sym, shape=shape_dict, dtype=dtype_dict, target=target,
	params=params, symbols=(nnvm.sym.conv2d,), target_host=target_host)
	return tasks


	def download_model():
	url = "https://github.com/uwsaml/web-data/raw/master/vta/models/"
	categ_fn = 'synset.txt'
	graph_fn = 'resnet18_qt8.json'
	params_fn = 'resnet18_qt8.params'
	data_dir = '_data'
	if not os.path.exists(data_dir):
	os.makedirs(data_dir)

	for file in [categ_fn, graph_fn, params_fn]:
	if not os.path.isfile(file):
	download(os.path.join(url, file), os.path.join(data_dir, file))

	sym = nnvm.graph.load_json(open(os.path.join(data_dir, graph_fn)).read())
	params = nnvm.compiler.load_param_dict(open(os.path.join(data_dir, params_fn), 'rb').read())

	return sym, params


	def tune_tasks(tasks,
	measure_option,
	tuner='xgb',
	n_trial=1000,
	early_stopping=None,
	log_filename='tuning.log',
	use_transfer_learning=True,
	try_winograd=True):
	# create tmp log file
	tmp_log_file = log_filename + ".tmp"
	if os.path.exists(tmp_log_file):
	os.remove(tmp_log_file)

	for i, tsk in enumerate(reversed(tasks)):
	prefix = "[Task %2d/%2d] " % (i+1, len(tasks))

	# create tuner
	if tuner == 'xgb' or tuner == 'xgb-rank':
	tuner_obj = XGBTuner(tsk, loss_type='rank')
	elif tuner == 'ga':
	tuner_obj = GATuner(tsk, pop_size=50)
	elif tuner == 'random':
	tuner_obj = RandomTuner(tsk)
	elif tuner == 'gridsearch':
	tuner_obj = GridSearchTuner(tsk)
	else:
	raise ValueError("Invalid tuner: " + tuner)

	if use_transfer_learning:
	if os.path.isfile(tmp_log_file):
	tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))

	# do tuning
	n_trial_ = min(n_trial, len(tsk.config_space))
	tuner_obj.tune(n_trial_,
	early_stopping=early_stopping,
	measure_option=measure_option,
	callbacks=[
	autotvm.callback.progress_bar(n_trial_, prefix=prefix),
	autotvm.callback.log_to_file(tmp_log_file)])

	# pick best records to a cache file
	autotvm.record.pick_best(tmp_log_file, log_filename)
	os.remove(tmp_log_file)

	if __name__ == '__main__':

	# Get tracker info from env
	tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
	tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
	if not tracker_host or not tracker_port:
	print("Set your AutoTVM tracker node host and port variables to run the autotuner")
	exit()

	# Download model
	sym, params = download_model()

	# Register VTA tuning tasks
	register_vta_tuning_tasks()

	# Extract tasks
	print("Extracting tasks...")
	target = tvm.target.vta()
	target_host = env.target_host
	tasks = extract_tasks(sym, params, target, target_host)

	# Perform Autotuning
	print("Tuning...")
	tuning_opt = {
	'log_filename': 'resnet-18.log',

	'tuner': 'random',
	'n_trial': 1e9,
	'early_stopping': None,

	'measure_option': autotvm.measure_option(
	builder=autotvm.LocalBuilder(build_func=vta.vta_autotvm_build_func),
	runner=autotvm.RPCRunner(env.TARGET, tracker_host, int(tracker_port),
	number=4, repeat=3, timeout=60,
	check_correctness=True))
	}
	tune_tasks(tasks, **tuning_opt)

	# compile kernels with history best records
	with autotvm.tophub.context(target, extra_files=[tuning_opt['log_filename']]):

	# ResNet parameters
	input_shape = (1, 3, 224, 224)
	dtype = 'float32'\

	# Compile network
	print("Compiling network with best tuning parameters...")
	graph, lib, params = generate_graph(sym, params, target, target_host)
	input_shape = (1, 3, 224, 224)
	dtype = 'float32'

	# Export library
	tmp = util.tempdir()
	filename = "net.tar"
	lib.export_library(tmp.relpath(filename))

	# Upload module to device
	print("Upload...")
	remote = autotvm.measure.request_remote(env.TARGET, tracker_host, tracker_port, timeout=10000)
	remote.upload(tmp.relpath(filename))
	rlib = remote.load_module(filename)

	# Upload parameters to device
	ctx = remote.context(str(target), 0)
	rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
	data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
	module = graph_runtime.create(graph, rlib, ctx)
	module.set_input('data', data_tvm)
	module.set_input(**rparams)

	# Evaluate
	print("Evaluate inference time cost...")
	ftimer = module.module.time_evaluator("run", ctx, number=3, repeat=3)
	prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
	print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
	(np.mean(prof_res), np.std(prof_res)))