tests/python/contrib/test_tensorrt_int8_exp.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import pytest
 import os
 import numpy as np

 try:
     # See issue #9362.
     import torch
 except:
     pass

 import tvm
 import tvm.testing
 from tvm import relay
 from tvm.contrib.download import download_testdata
 from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
 from tvm.relay.op.contrib import tensorrt


 def skip_codegen_test():
     """Skip test if TensorRT and CUDA codegen are not present"""
     if not tvm.runtime.enabled("cuda") or not tvm.cuda(0).exist:
         print("Skip because CUDA is not enabled.")
         return True
     if not tensorrt.is_tensorrt_compiler_enabled():
         print("Skip because TensorRT compiler is not available.")
         return True
     print("TensorRT compiler is available!")
     return False


 def skip_runtime_test():
     if not tvm.runtime.enabled("cuda") or not tvm.cuda(0).exist:
         print("Skip because CUDA is not enabled.")
         return True
     if not tensorrt.is_tensorrt_runtime_enabled():
         print("Skip because TensorRT runtime is not available.")
         return True
     print("TensorRT runtime is available!")
     return False


 def test_trt_int8():
     """
     This Function is used to use tensorrt int8 to compile a resnet34 model,
     and compare cosine distance between the output of the original model and trt int8 tvm output

     """
     if skip_codegen_test() or skip_runtime_test():
         return

     try:
         from PIL import Image
         from scipy.spatial import distance
     except:
         print("please install scipy and Image python packages")
         return

     try:
         import torch
         import torchvision
         from torchvision import transforms
     except:
         print("please install pytorch python package")
         return

     os.environ["TVM_TENSORRT_USE_INT8"] = "1"
     os.environ["TENSORRT_NUM_CALI_INT8"] = "10"
     model_name = "resnet34"
     model = getattr(torchvision.models, model_name)(pretrained=True)
     model = model.eval()

     # We grab the TorchScripted model via tracing
     input_shape = [1, 3, 224, 224]
     input_data = torch.randn(input_shape)
     scripted_model = torch.jit.trace(model, input_data).eval()

     img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
     img_path = download_testdata(img_url, "cat.png", module="data")
     img = Image.open(img_path).resize((224, 224))
     my_preprocess = transforms.Compose(
         [
             transforms.Resize(256),
             transforms.CenterCrop(224),
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         ]
     )
     img = my_preprocess(img)
     img = np.expand_dims(img, 0)

     input_name = "input0"
     shape_list = [(input_name, img.shape)]
     mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)

     # compile the model
     target = "cuda"
     dev = tvm.cuda()
     mod = partition_for_tensorrt(mod, params)
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target=target, params=params)

     gen_module = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))

     num_cali_int8 = int(os.environ["TENSORRT_NUM_CALI_INT8"])
     if num_cali_int8 != 0:
         print("start calibrating data ... ")
         for i in range(num_cali_int8):
             tvm_data = tvm.nd.array(img)
             gen_module.set_input(input_name, tvm_data)
             gen_module.run(data=tvm_data)
         print("finished calibrating data ... ")

     # get output of tvm model
     print("rebuild engine and test to run ... ")
     tvm_data = tvm.nd.array(img)
     gen_module.set_input(input_name, tvm_data)
     gen_module.run(data=tvm_data)
     out = gen_module.get_output(0)

     # check output of tvm and output of pytorch model are equal
     torch_data = torch.from_numpy(img)
     model = scripted_model.eval()
     torch_output = model(torch_data)

     cosine_distance_res = distance.cosine(out.numpy(), torch_output.detach().cpu().numpy())
     assert cosine_distance_res <= 0.01

     # Evaluate
     print("Evaluate inference time cost...")
     ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500)
     prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
     message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (
         np.mean(prof_res),
         np.std(prof_res),
     )
     print(message)


 if __name__ == "__main__":
     tvm.testing.main()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	import pytest
	import os
	import numpy as np

	try:
	# See issue #9362.
	import torch
	except:
	pass

	import tvm
	import tvm.testing
	from tvm import relay
	from tvm.contrib.download import download_testdata
	from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
	from tvm.relay.op.contrib import tensorrt


	def skip_codegen_test():
	"""Skip test if TensorRT and CUDA codegen are not present"""
	if not tvm.runtime.enabled("cuda") or not tvm.cuda(0).exist:
	print("Skip because CUDA is not enabled.")
	return True
	if not tensorrt.is_tensorrt_compiler_enabled():
	print("Skip because TensorRT compiler is not available.")
	return True
	print("TensorRT compiler is available!")
	return False


	def skip_runtime_test():
	if not tvm.runtime.enabled("cuda") or not tvm.cuda(0).exist:
	print("Skip because CUDA is not enabled.")
	return True
	if not tensorrt.is_tensorrt_runtime_enabled():
	print("Skip because TensorRT runtime is not available.")
	return True
	print("TensorRT runtime is available!")
	return False


	def test_trt_int8():
	"""
	This Function is used to use tensorrt int8 to compile a resnet34 model,
	and compare cosine distance between the output of the original model and trt int8 tvm output

	"""
	if skip_codegen_test() or skip_runtime_test():
	return

	try:
	from PIL import Image
	from scipy.spatial import distance
	except:
	print("please install scipy and Image python packages")
	return

	try:
	import torch
	import torchvision
	from torchvision import transforms
	except:
	print("please install pytorch python package")
	return

	os.environ["TVM_TENSORRT_USE_INT8"] = "1"
	os.environ["TENSORRT_NUM_CALI_INT8"] = "10"
	model_name = "resnet34"
	model = getattr(torchvision.models, model_name)(pretrained=True)
	model = model.eval()

	# We grab the TorchScripted model via tracing
	input_shape = [1, 3, 224, 224]
	input_data = torch.randn(input_shape)
	scripted_model = torch.jit.trace(model, input_data).eval()

	img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
	img_path = download_testdata(img_url, "cat.png", module="data")
	img = Image.open(img_path).resize((224, 224))
	my_preprocess = transforms.Compose(
	[
	transforms.Resize(256),
	transforms.CenterCrop(224),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	]
	)
	img = my_preprocess(img)
	img = np.expand_dims(img, 0)

	input_name = "input0"
	shape_list = [(input_name, img.shape)]
	mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)

	# compile the model
	target = "cuda"
	dev = tvm.cuda()
	mod = partition_for_tensorrt(mod, params)
	with tvm.transform.PassContext(opt_level=3):
	lib = relay.build(mod, target=target, params=params)

	gen_module = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))

	num_cali_int8 = int(os.environ["TENSORRT_NUM_CALI_INT8"])
	if num_cali_int8 != 0:
	print("start calibrating data ... ")
	for i in range(num_cali_int8):
	tvm_data = tvm.nd.array(img)
	gen_module.set_input(input_name, tvm_data)
	gen_module.run(data=tvm_data)
	print("finished calibrating data ... ")

	# get output of tvm model
	print("rebuild engine and test to run ... ")
	tvm_data = tvm.nd.array(img)
	gen_module.set_input(input_name, tvm_data)
	gen_module.run(data=tvm_data)
	out = gen_module.get_output(0)

	# check output of tvm and output of pytorch model are equal
	torch_data = torch.from_numpy(img)
	model = scripted_model.eval()
	torch_output = model(torch_data)

	cosine_distance_res = distance.cosine(out.numpy(), torch_output.detach().cpu().numpy())
	assert cosine_distance_res <= 0.01

	# Evaluate
	print("Evaluate inference time cost...")
	ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500)
	prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond
	message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (
	np.mean(prof_res),
	np.std(prof_res),
	)
	print(message)


	if __name__ == "__main__":
	tvm.testing.main()