tests/python/relay/test_op_qnn_dense.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import tvm
 from tvm import te
 import numpy as np
 from tvm import relay
 from tvm.contrib import graph_executor
 from tvm.relay.testing.temp_op_attr import TempOpAttr


 # We use llvm target for testing functionality. `llvm` points to an older Intel
 # generation machine, that legalizes to a simple lowering. Therefore, the
 # legalization is overwritten such that it can be skipped and we use the
 # QNNCanonicalizeOps lowering for the testing.
 def legalize_qnn_dense(attrs, inputs, types):
     return None


 def make_requantize_params(input_scale, output_scale, output_zero_point, out_dtype):
     config = {
         "input_scale": input_scale,
         "output_scale": output_scale,
         "output_zero_point": output_zero_point,
         "out_dtype": out_dtype,
     }
     return config


 def make_configuration(
     quantized_data,
     quantized_kernel,
     dtype,
     input_shape,
     kernel_shape,
     input_zero_point,
     kernel_zero_point,
     input_scale,
     kernel_scale,
     units,
     output,
     out_dtype="int32",
     bias=None,
     requantize=None,
 ):
     if requantize is not None:
         assert bias is not None
     config = {
         "quantized_data": quantized_data,
         "quantized_kernel": quantized_kernel,
         "dtype": dtype,
         "input_shape": input_shape,
         "kernel_shape": kernel_shape,
         "input_zero_point": input_zero_point,
         "kernel_zero_point": kernel_zero_point,
         "input_scale": input_scale,
         "kernel_scale": kernel_scale,
         "units": units,
         "output": output,
         "out_dtype": out_dtype,
         "bias": bias,
         "requantize": requantize,
     }
     return config


 def make_int_configuration(use_bias=False, requantize_output=False, per_channel=False):
     input_shape, kernel_shape, output_shape = (2, 10), (3, 10), (2, 3)
     input_zero_point, kernel_zero_point = -1, -1
     in_dtype = "int8"
     out_dtype = "int32" if not requantize_output else "int8"
     units = 3
     quantized_data_np = (
         np.array([1, 3, 5, 7, 9, 11, 13, 15, -19, -21, 1, 3, 5, 7, 9, 11, 13, -17, 17, -21])
         .astype(in_dtype)
         .reshape(input_shape)
     )
     quantized_kernel_np = (
         np.array(
             [
                 1,
                 3,
                 5,
                 7,
                 9,
                 11,
                 13,
                 15,
                 17,
                 19,
                 1,
                 3,
                 5,
                 7,
                 9,
                 11,
                 13,
                 15,
                 17,
                 19,
                 1,
                 3,
                 5,
                 7,
                 9,
                 11,
                 13,
                 15,
                 17,
                 19,
             ]
         )
         .astype(in_dtype)
         .reshape(kernel_shape)
     )
     input_scale = 0.5
     kernel_scale = 0.5
     output_scale = 1.0
     bias = np.array([4, 8, 12]).astype(out_dtype).reshape((units,)) if use_bias else None

     if per_channel:
         assert use_bias and requantize_output
         kernel_scale = np.array([0.5, 0.3, 0.4], dtype=np.float32)
         output = np.array([23, 14, 20, 57, 34, 47])
     elif requantize_output:
         assert use_bias
         output = np.array([23, 24, 25, 57, 58, 59])
     elif use_bias:
         output = np.array([96, 100, 104, 232, 236, 240])
     else:
         output = np.array([92, 92, 92, 228, 228, 228])

     requant_params = (
         make_requantize_params(input_scale * kernel_scale, output_scale, -1, "int8")
         if requantize_output
         else None
     )

     output = output.astype(out_dtype).reshape(output_shape)
     return make_configuration(
         quantized_data=quantized_data_np,
         quantized_kernel=quantized_kernel_np,
         dtype=in_dtype,
         input_shape=input_shape,
         kernel_shape=kernel_shape,
         input_zero_point=input_zero_point,
         kernel_zero_point=kernel_zero_point,
         input_scale=input_scale,
         kernel_scale=kernel_scale,
         units=units,
         output=output,
         bias=bias,
         requantize=requant_params,
     )


 def qnn_dense_driver(test_configuration):
     in_dtype = test_configuration["dtype"]
     out_dtype = test_configuration["out_dtype"]
     quantized_data_name = "quantized_data"
     quantized_kernel_name = "quantized_kernel"
     expected_out_dtype = test_configuration["out_dtype"]
     bias_name = "bias"
     quantized_data = relay.var(
         quantized_data_name, shape=test_configuration["input_shape"], dtype=in_dtype
     )
     quantized_kernel = relay.var(
         quantized_kernel_name, shape=test_configuration["kernel_shape"], dtype=in_dtype
     )
     mod = relay.qnn.op.dense(
         quantized_data,
         quantized_kernel,
         relay.const(test_configuration["input_zero_point"], "int32"),
         relay.const(test_configuration["kernel_zero_point"], "int32"),
         relay.const(test_configuration["input_scale"], "float32"),
         relay.const(test_configuration["kernel_scale"], "float32"),
         test_configuration["units"],
     )
     if test_configuration[bias_name] is not None:
         bias = relay.var(bias_name, shape=test_configuration["bias"].shape, dtype=out_dtype)
         mod = relay.nn.bias_add(mod, bias)
     if test_configuration["requantize"] is not None:
         requantize_config = test_configuration["requantize"]
         mod = relay.qnn.op.requantize(
             mod,
             input_scale=relay.const(requantize_config["input_scale"], "float32"),
             input_zero_point=relay.const(0, "int32"),
             output_scale=relay.const(requantize_config["output_scale"], "float32"),
             output_zero_point=relay.const(requantize_config["output_zero_point"], "int32"),
             out_dtype=requantize_config["out_dtype"],
         )
         expected_out_dtype = requantize_config["out_dtype"]

     mod = relay.Function(relay.analysis.free_vars(mod), mod)
     mod = tvm.IRModule.from_expr(mod)
     mod = relay.transform.InferType()(mod)
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     with tvm.transform.PassContext(opt_level=2):
         graph, lib, params = relay.build(mod, "llvm", params=None)
         mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         mod.set_input(quantized_data_name, test_configuration[quantized_data_name])
         mod.set_input(quantized_kernel_name, test_configuration[quantized_kernel_name])
         if test_configuration[bias_name] is not None:
             mod.set_input(bias_name, test_configuration[bias_name])
         mod.set_input(**params)
         mod.run()
         res = mod.get_output(0).numpy()
         np.testing.assert_equal(res, test_configuration["output"])
         assert res.dtype == expected_out_dtype


 def test_qnn_dense_without_bias():
     with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

         int32_output_without_bias_params = make_int_configuration(use_bias=False)
         qnn_dense_driver(int32_output_without_bias_params)


 def test_qnn_dense_with_bias():
     with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

         int32_output_with_bias_params = make_int_configuration(use_bias=True)
         qnn_dense_driver(int32_output_with_bias_params)


 def test_qnn_dense_with_requantized_output():
     with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

         int8_requantized_output_with_bias_params = make_int_configuration(
             use_bias=True, requantize_output=True
         )
         qnn_dense_driver(int8_requantized_output_with_bias_params)


 def test_per_channel_weight_scale():
     with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):
         config = make_int_configuration(use_bias=True, requantize_output=True, per_channel=True)
         qnn_dense_driver(config)


 if __name__ == "__main__":
     test_qnn_dense_without_bias()
     test_qnn_dense_with_bias()
     test_qnn_dense_with_requantized_output()
     test_per_channel_weight_scale()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import tvm
	from tvm import te
	import numpy as np
	from tvm import relay
	from tvm.contrib import graph_executor
	from tvm.relay.testing.temp_op_attr import TempOpAttr


	# We use llvm target for testing functionality. `llvm` points to an older Intel
	# generation machine, that legalizes to a simple lowering. Therefore, the
	# legalization is overwritten such that it can be skipped and we use the
	# QNNCanonicalizeOps lowering for the testing.
	def legalize_qnn_dense(attrs, inputs, types):
	return None


	def make_requantize_params(input_scale, output_scale, output_zero_point, out_dtype):
	config = {
	"input_scale": input_scale,
	"output_scale": output_scale,
	"output_zero_point": output_zero_point,
	"out_dtype": out_dtype,
	}
	return config


	def make_configuration(
	quantized_data,
	quantized_kernel,
	dtype,
	input_shape,
	kernel_shape,
	input_zero_point,
	kernel_zero_point,
	input_scale,
	kernel_scale,
	units,
	output,
	out_dtype="int32",
	bias=None,
	requantize=None,
	):
	if requantize is not None:
	assert bias is not None
	config = {
	"quantized_data": quantized_data,
	"quantized_kernel": quantized_kernel,
	"dtype": dtype,
	"input_shape": input_shape,
	"kernel_shape": kernel_shape,
	"input_zero_point": input_zero_point,
	"kernel_zero_point": kernel_zero_point,
	"input_scale": input_scale,
	"kernel_scale": kernel_scale,
	"units": units,
	"output": output,
	"out_dtype": out_dtype,
	"bias": bias,
	"requantize": requantize,
	}
	return config


	def make_int_configuration(use_bias=False, requantize_output=False, per_channel=False):
	input_shape, kernel_shape, output_shape = (2, 10), (3, 10), (2, 3)
	input_zero_point, kernel_zero_point = -1, -1
	in_dtype = "int8"
	out_dtype = "int32" if not requantize_output else "int8"
	units = 3
	quantized_data_np = (
	np.array([1, 3, 5, 7, 9, 11, 13, 15, -19, -21, 1, 3, 5, 7, 9, 11, 13, -17, 17, -21])
	.astype(in_dtype)
	.reshape(input_shape)
	)
	quantized_kernel_np = (
	np.array(
	[
	1,
	3,
	5,
	7,
	9,
	11,
	13,
	15,
	17,
	19,
	1,
	3,
	5,
	7,
	9,
	11,
	13,
	15,
	17,
	19,
	1,
	3,
	5,
	7,
	9,
	11,
	13,
	15,
	17,
	19,
	]
	)
	.astype(in_dtype)
	.reshape(kernel_shape)
	)
	input_scale = 0.5
	kernel_scale = 0.5
	output_scale = 1.0
	bias = np.array([4, 8, 12]).astype(out_dtype).reshape((units,)) if use_bias else None

	if per_channel:
	assert use_bias and requantize_output
	kernel_scale = np.array([0.5, 0.3, 0.4], dtype=np.float32)
	output = np.array([23, 14, 20, 57, 34, 47])
	elif requantize_output:
	assert use_bias
	output = np.array([23, 24, 25, 57, 58, 59])
	elif use_bias:
	output = np.array([96, 100, 104, 232, 236, 240])
	else:
	output = np.array([92, 92, 92, 228, 228, 228])

	requant_params = (
	make_requantize_params(input_scale * kernel_scale, output_scale, -1, "int8")
	if requantize_output
	else None
	)

	output = output.astype(out_dtype).reshape(output_shape)
	return make_configuration(
	quantized_data=quantized_data_np,
	quantized_kernel=quantized_kernel_np,
	dtype=in_dtype,
	input_shape=input_shape,
	kernel_shape=kernel_shape,
	input_zero_point=input_zero_point,
	kernel_zero_point=kernel_zero_point,
	input_scale=input_scale,
	kernel_scale=kernel_scale,
	units=units,
	output=output,
	bias=bias,
	requantize=requant_params,
	)


	def qnn_dense_driver(test_configuration):
	in_dtype = test_configuration["dtype"]
	out_dtype = test_configuration["out_dtype"]
	quantized_data_name = "quantized_data"
	quantized_kernel_name = "quantized_kernel"
	expected_out_dtype = test_configuration["out_dtype"]
	bias_name = "bias"
	quantized_data = relay.var(
	quantized_data_name, shape=test_configuration["input_shape"], dtype=in_dtype
	)
	quantized_kernel = relay.var(
	quantized_kernel_name, shape=test_configuration["kernel_shape"], dtype=in_dtype
	)
	mod = relay.qnn.op.dense(
	quantized_data,
	quantized_kernel,
	relay.const(test_configuration["input_zero_point"], "int32"),
	relay.const(test_configuration["kernel_zero_point"], "int32"),
	relay.const(test_configuration["input_scale"], "float32"),
	relay.const(test_configuration["kernel_scale"], "float32"),
	test_configuration["units"],
	)
	if test_configuration[bias_name] is not None:
	bias = relay.var(bias_name, shape=test_configuration["bias"].shape, dtype=out_dtype)
	mod = relay.nn.bias_add(mod, bias)
	if test_configuration["requantize"] is not None:
	requantize_config = test_configuration["requantize"]
	mod = relay.qnn.op.requantize(
	mod,
	input_scale=relay.const(requantize_config["input_scale"], "float32"),
	input_zero_point=relay.const(0, "int32"),
	output_scale=relay.const(requantize_config["output_scale"], "float32"),
	output_zero_point=relay.const(requantize_config["output_zero_point"], "int32"),
	out_dtype=requantize_config["out_dtype"],
	)
	expected_out_dtype = requantize_config["out_dtype"]

	mod = relay.Function(relay.analysis.free_vars(mod), mod)
	mod = tvm.IRModule.from_expr(mod)
	mod = relay.transform.InferType()(mod)
	mod = relay.qnn.transform.CanonicalizeOps()(mod)
	with tvm.transform.PassContext(opt_level=2):
	graph, lib, params = relay.build(mod, "llvm", params=None)
	mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
	mod.set_input(quantized_data_name, test_configuration[quantized_data_name])
	mod.set_input(quantized_kernel_name, test_configuration[quantized_kernel_name])
	if test_configuration[bias_name] is not None:
	mod.set_input(bias_name, test_configuration[bias_name])
	mod.set_input(**params)
	mod.run()
	res = mod.get_output(0).numpy()
	np.testing.assert_equal(res, test_configuration["output"])
	assert res.dtype == expected_out_dtype


	def test_qnn_dense_without_bias():
	with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

	int32_output_without_bias_params = make_int_configuration(use_bias=False)
	qnn_dense_driver(int32_output_without_bias_params)


	def test_qnn_dense_with_bias():
	with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

	int32_output_with_bias_params = make_int_configuration(use_bias=True)
	qnn_dense_driver(int32_output_with_bias_params)


	def test_qnn_dense_with_requantized_output():
	with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):

	int8_requantized_output_with_bias_params = make_int_configuration(
	use_bias=True, requantize_output=True
	)
	qnn_dense_driver(int8_requantized_output_with_bias_params)


	def test_per_channel_weight_scale():
	with TempOpAttr("qnn.dense", "FTVMQnnLegalize", legalize_qnn_dense):
	config = make_int_configuration(use_bias=True, requantize_output=True, per_channel=True)
	qnn_dense_driver(config)


	if __name__ == "__main__":
	test_qnn_dense_without_bias()
	test_qnn_dense_with_bias()
	test_qnn_dense_with_requantized_output()
	test_per_channel_weight_scale()