tests/python/tensorrt/test_cvnets.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import gc
 import gluoncv
 import mxnet as mx
 import numpy as np

 from mxnet import gluon
 from time import time

 from mxnet.gluon.data.vision import transforms


 def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128):
     mx.contrib.tensorrt.set_use_fp16(False)
     h, w = 32, 32
     net = gluoncv.model_zoo.get_model(model_name, pretrained=True)
     net.hybridize()
     net.forward(mx.nd.zeros((batch_size, 3, h, w)))
     net.export(model_name)
     _sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0)
     if use_tensorrt:
         sym = _sym.get_backend_symbol('TensorRT')
         mx.contrib.tensorrt.init_tensorrt_params(sym, arg_params, aux_params)
     else:
         sym = _sym
     executor = sym.simple_bind(ctx=ctx, data=(batch_size, 3, h, w),
                                softmax_label=(batch_size,),
 			       grad_req='null', force_rebind=True)
     executor.copy_params_from(arg_params, aux_params)
     return executor


 def cifar10_infer(model_name, use_tensorrt, num_workers, ctx=mx.gpu(0), batch_size=128):
     executor = get_classif_model(model_name, use_tensorrt, ctx, batch_size)

     num_ex = 10000
     all_preds = np.zeros([num_ex, 10])

     all_label_test = np.zeros(num_ex)

     transform_test = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
     ])

     data_loader = lambda: gluon.data.DataLoader(
         gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
         batch_size=batch_size, shuffle=False, num_workers=num_workers)

     val_data = data_loader()

     for idx, (data, label) in enumerate(val_data):
         # Skip last batch if it's undersized.
         if data.shape[0] < batch_size:
             continue
         offset = idx * batch_size
         all_label_test[offset:offset + batch_size] = label.asnumpy()

         # warm-up, but don't use result
         executor.forward(is_train=False, data=data)
         executor.outputs[0].wait_to_read()

     gc.collect()
     val_data = data_loader()
     example_ct = 0
     start = time()

     # if use_tensorrt:
     for idx, (data, label) in enumerate(val_data):
         # Skip last batch if it's undersized.
         if data.shape[0] < batch_size:
             continue
         executor.forward(is_train=False, data=data)
         preds = executor.outputs[0].asnumpy()
         offset = idx * batch_size
         all_preds[offset:offset + batch_size, :] = preds[:batch_size]
         example_ct += batch_size

     all_preds = np.argmax(all_preds, axis=1)
     matches = (all_preds[:example_ct] == all_label_test[:example_ct]).sum()
     duration = time() - start

     return duration, 100.0 * matches / example_ct


 def run_experiment_for(model_name, batch_size, num_workers):
     print("\n===========================================")
     print("Model: %s" % model_name)
     print("===========================================")
     print("*** Running inference using pure MXNet ***\n")
     mx_duration, mx_pct = cifar10_infer(model_name=model_name, batch_size=batch_size,
                                         num_workers=num_workers, use_tensorrt=False)
     print("\nMXNet: time elapsed: %.3fs, accuracy: %.2f%%" % (mx_duration, mx_pct))
     print("\n*** Running inference using MXNet + TensorRT ***\n")
     trt_duration, trt_pct = cifar10_infer(model_name=model_name, batch_size=batch_size,
                                           num_workers=num_workers, use_tensorrt=True)
     print("TensorRT: time elapsed: %.3fs, accuracy: %.2f%%" % (trt_duration, trt_pct))
     speedup = mx_duration / trt_duration
     print("TensorRT speed-up (not counting compilation): %.2fx" % speedup)

     acc_diff = abs(mx_pct - trt_pct)
     print("Absolute accuracy difference: %f" % acc_diff)
     return speedup, acc_diff


 def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1):
     original_use_fp16 = mx.contrib.tensorrt.get_use_fp16()
     try:
         models = [
             'cifar_resnet20_v1',
             'cifar_resnet56_v1',
             'cifar_resnet110_v1',
             'cifar_resnet20_v2',
             'cifar_resnet56_v2',
             'cifar_resnet110_v2',
             'cifar_wideresnet16_10',
             'cifar_wideresnet28_10',
             'cifar_wideresnet40_8',
             'cifar_resnext29_16x64d'
         ]

         num_models = len(models)

         speedups = np.zeros(num_models, dtype=np.float32)
         acc_diffs = np.zeros(num_models, dtype=np.float32)

         test_start = time()

         for idx, model in enumerate(models):
             speedup, acc_diff = run_experiment_for(model, batch_size, num_workers)
             speedups[idx] = speedup
             acc_diffs[idx] = acc_diff
             assert acc_diff < tolerance, "Accuracy difference between MXNet and TensorRT > %.2f%% for model %s" % (
                 tolerance, model)

         print("Perf and correctness checks run on the following models:")
         print(models)
         mean_speedup = np.mean(speedups)
         std_speedup = np.std(speedups)
         print("\nSpeedups:")
         print(speedups)
         print("Speedup range: [%.2f, %.2f]" % (np.min(speedups), np.max(speedups)))
         print("Mean speedup: %.2f" % mean_speedup)
         print("St. dev. of speedups: %.2f" % std_speedup)
         print("\nAcc. differences: %s" % str(acc_diffs))

         test_duration = time() - test_start

         print("Test duration: %.2f seconds" % test_duration)
     finally:
         mx.contrib.tensorrt.set_use_fp16(original_use_fp16)


 if __name__ == '__main__':
     import nose

     nose.runmodule()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import gc
	import gluoncv
	import mxnet as mx
	import numpy as np

	from mxnet import gluon
	from time import time

	from mxnet.gluon.data.vision import transforms


	def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128):
	mx.contrib.tensorrt.set_use_fp16(False)
	h, w = 32, 32
	net = gluoncv.model_zoo.get_model(model_name, pretrained=True)
	net.hybridize()
	net.forward(mx.nd.zeros((batch_size, 3, h, w)))
	net.export(model_name)
	_sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0)
	if use_tensorrt:
	sym = _sym.get_backend_symbol('TensorRT')
	mx.contrib.tensorrt.init_tensorrt_params(sym, arg_params, aux_params)
	else:
	sym = _sym
	executor = sym.simple_bind(ctx=ctx, data=(batch_size, 3, h, w),
	softmax_label=(batch_size,),
	grad_req='null', force_rebind=True)
	executor.copy_params_from(arg_params, aux_params)
	return executor


	def cifar10_infer(model_name, use_tensorrt, num_workers, ctx=mx.gpu(0), batch_size=128):
	executor = get_classif_model(model_name, use_tensorrt, ctx, batch_size)

	num_ex = 10000
	all_preds = np.zeros([num_ex, 10])

	all_label_test = np.zeros(num_ex)

	transform_test = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
	])

	data_loader = lambda: gluon.data.DataLoader(
	gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
	batch_size=batch_size, shuffle=False, num_workers=num_workers)

	val_data = data_loader()

	for idx, (data, label) in enumerate(val_data):
	# Skip last batch if it's undersized.
	if data.shape[0] < batch_size:
	continue
	offset = idx * batch_size
	all_label_test[offset:offset + batch_size] = label.asnumpy()

	# warm-up, but don't use result
	executor.forward(is_train=False, data=data)
	executor.outputs[0].wait_to_read()

	gc.collect()
	val_data = data_loader()
	example_ct = 0
	start = time()

	# if use_tensorrt:
	for idx, (data, label) in enumerate(val_data):
	# Skip last batch if it's undersized.
	if data.shape[0] < batch_size:
	continue
	executor.forward(is_train=False, data=data)
	preds = executor.outputs[0].asnumpy()
	offset = idx * batch_size
	all_preds[offset:offset + batch_size, :] = preds[:batch_size]
	example_ct += batch_size

	all_preds = np.argmax(all_preds, axis=1)
	matches = (all_preds[:example_ct] == all_label_test[:example_ct]).sum()
	duration = time() - start

	return duration, 100.0 * matches / example_ct


	def run_experiment_for(model_name, batch_size, num_workers):
	print("\n===========================================")
	print("Model: %s" % model_name)
	print("===========================================")
	print("* Running inference using pure MXNet *\n")
	mx_duration, mx_pct = cifar10_infer(model_name=model_name, batch_size=batch_size,
	num_workers=num_workers, use_tensorrt=False)
	print("\nMXNet: time elapsed: %.3fs, accuracy: %.2f%%" % (mx_duration, mx_pct))
	print("\n* Running inference using MXNet + TensorRT *\n")
	trt_duration, trt_pct = cifar10_infer(model_name=model_name, batch_size=batch_size,
	num_workers=num_workers, use_tensorrt=True)
	print("TensorRT: time elapsed: %.3fs, accuracy: %.2f%%" % (trt_duration, trt_pct))
	speedup = mx_duration / trt_duration
	print("TensorRT speed-up (not counting compilation): %.2fx" % speedup)

	acc_diff = abs(mx_pct - trt_pct)
	print("Absolute accuracy difference: %f" % acc_diff)
	return speedup, acc_diff


	def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1):
	original_use_fp16 = mx.contrib.tensorrt.get_use_fp16()
	try:
	models = [
	'cifar_resnet20_v1',
	'cifar_resnet56_v1',
	'cifar_resnet110_v1',
	'cifar_resnet20_v2',
	'cifar_resnet56_v2',
	'cifar_resnet110_v2',
	'cifar_wideresnet16_10',
	'cifar_wideresnet28_10',
	'cifar_wideresnet40_8',
	'cifar_resnext29_16x64d'
	]

	num_models = len(models)

	speedups = np.zeros(num_models, dtype=np.float32)
	acc_diffs = np.zeros(num_models, dtype=np.float32)

	test_start = time()

	for idx, model in enumerate(models):
	speedup, acc_diff = run_experiment_for(model, batch_size, num_workers)
	speedups[idx] = speedup
	acc_diffs[idx] = acc_diff
	assert acc_diff < tolerance, "Accuracy difference between MXNet and TensorRT > %.2f%% for model %s" % (
	tolerance, model)

	print("Perf and correctness checks run on the following models:")
	print(models)
	mean_speedup = np.mean(speedups)
	std_speedup = np.std(speedups)
	print("\nSpeedups:")
	print(speedups)
	print("Speedup range: [%.2f, %.2f]" % (np.min(speedups), np.max(speedups)))
	print("Mean speedup: %.2f" % mean_speedup)
	print("St. dev. of speedups: %.2f" % std_speedup)
	print("\nAcc. differences: %s" % str(acc_diffs))

	test_duration = time() - test_start

	print("Test duration: %.2f seconds" % test_duration)
	finally:
	mx.contrib.tensorrt.set_use_fp16(original_use_fp16)


	if __name__ == '__main__':
	import nose

	nose.runmodule()