blob: 9282bc6f0de6a94572f429804209f5f40e2cd206 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import gc
import gluoncv
import mxnet as mx
import numpy as np
from mxnet import gluon
from time import time
from mxnet.gluon.data.vision import transforms
def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128):
mx.contrib.tensorrt.set_use_fp16(False)
h, w = 32, 32
net = gluoncv.model_zoo.get_model(model_name, pretrained=True)
net.hybridize()
net.forward(mx.nd.zeros((batch_size, 3, h, w)))
net.export(model_name)
_sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0)
if use_tensorrt:
sym = _sym.get_backend_symbol('TensorRT')
mx.contrib.tensorrt.init_tensorrt_params(sym, arg_params, aux_params)
else:
sym = _sym
executor = sym.simple_bind(ctx=ctx, data=(batch_size, 3, h, w),
softmax_label=(batch_size,),
grad_req='null', force_rebind=True)
executor.copy_params_from(arg_params, aux_params)
return executor
def cifar10_infer(model_name, use_tensorrt, num_workers, ctx=mx.gpu(0), batch_size=128):
executor = get_classif_model(model_name, use_tensorrt, ctx, batch_size)
num_ex = 10000
all_preds = np.zeros([num_ex, 10])
all_label_test = np.zeros(num_ex)
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])
data_loader = lambda: gluon.data.DataLoader(
gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
batch_size=batch_size, shuffle=False, num_workers=num_workers)
val_data = data_loader()
for idx, (data, label) in enumerate(val_data):
# Skip last batch if it's undersized.
if data.shape[0] < batch_size:
continue
offset = idx * batch_size
all_label_test[offset:offset + batch_size] = label.asnumpy()
# warm-up, but don't use result
executor.forward(is_train=False, data=data)
executor.outputs[0].wait_to_read()
gc.collect()
val_data = data_loader()
example_ct = 0
start = time()
# if use_tensorrt:
for idx, (data, label) in enumerate(val_data):
# Skip last batch if it's undersized.
if data.shape[0] < batch_size:
continue
executor.forward(is_train=False, data=data)
preds = executor.outputs[0].asnumpy()
offset = idx * batch_size
all_preds[offset:offset + batch_size, :] = preds[:batch_size]
example_ct += batch_size
all_preds = np.argmax(all_preds, axis=1)
matches = (all_preds[:example_ct] == all_label_test[:example_ct]).sum()
duration = time() - start
return duration, 100.0 * matches / example_ct
def run_experiment_for(model_name, batch_size, num_workers):
print("\n===========================================")
print("Model: %s" % model_name)
print("===========================================")
print("*** Running inference using pure MXNet ***\n")
mx_duration, mx_pct = cifar10_infer(model_name=model_name, batch_size=batch_size,
num_workers=num_workers, use_tensorrt=False)
print("\nMXNet: time elapsed: %.3fs, accuracy: %.2f%%" % (mx_duration, mx_pct))
print("\n*** Running inference using MXNet + TensorRT ***\n")
trt_duration, trt_pct = cifar10_infer(model_name=model_name, batch_size=batch_size,
num_workers=num_workers, use_tensorrt=True)
print("TensorRT: time elapsed: %.3fs, accuracy: %.2f%%" % (trt_duration, trt_pct))
speedup = mx_duration / trt_duration
print("TensorRT speed-up (not counting compilation): %.2fx" % speedup)
acc_diff = abs(mx_pct - trt_pct)
print("Absolute accuracy difference: %f" % acc_diff)
return speedup, acc_diff
def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1):
original_use_fp16 = mx.contrib.tensorrt.get_use_fp16()
try:
models = [
'cifar_resnet20_v1',
'cifar_resnet56_v1',
'cifar_resnet110_v1',
'cifar_resnet20_v2',
'cifar_resnet56_v2',
'cifar_resnet110_v2',
'cifar_wideresnet16_10',
'cifar_wideresnet28_10',
'cifar_wideresnet40_8',
'cifar_resnext29_16x64d'
]
num_models = len(models)
speedups = np.zeros(num_models, dtype=np.float32)
acc_diffs = np.zeros(num_models, dtype=np.float32)
test_start = time()
for idx, model in enumerate(models):
speedup, acc_diff = run_experiment_for(model, batch_size, num_workers)
speedups[idx] = speedup
acc_diffs[idx] = acc_diff
assert acc_diff < tolerance, "Accuracy difference between MXNet and TensorRT > %.2f%% for model %s" % (
tolerance, model)
print("Perf and correctness checks run on the following models:")
print(models)
mean_speedup = np.mean(speedups)
std_speedup = np.std(speedups)
print("\nSpeedups:")
print(speedups)
print("Speedup range: [%.2f, %.2f]" % (np.min(speedups), np.max(speedups)))
print("Mean speedup: %.2f" % mean_speedup)
print("St. dev. of speedups: %.2f" % std_speedup)
print("\nAcc. differences: %s" % str(acc_diffs))
test_duration = time() - test_start
print("Test duration: %.2f seconds" % test_duration)
finally:
mx.contrib.tensorrt.set_use_fp16(original_use_fp16)
if __name__ == '__main__':
import nose
nose.runmodule()