tests/python/topi/test_topi_qnn.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 """Test code for QNN operators."""
 import numpy as np
 import tvm
 from tvm import topi, relay, te
 from tvm.contrib import graph_executor
 import tvm.topi.testing


 def verify_simulated_quantize(data_shape, out_dtype, channels, axis):
     # Create placeholder variables for all qnn inputs.
     A = te.placeholder(data_shape, name="value", dtype="float32")
     D = te.placeholder([], name="dtype", dtype="int32")
     S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
     Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
     SIM_Q = topi.nn.simulated_quantize(A, D, output_scale=S, output_zero_point=Z, axis=axis)

     # Create random numpy values to assign to inputs.
     a_np = np.random.uniform(size=data_shape).astype("float32")
     d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[out_dtype])
     s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
     z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
     q_np = np.zeros(shape=data_shape, dtype="float32")

     def check_target(target, dev):
         # Wrap the numpy arrays in nd arrays.
         a = tvm.nd.array(a_np, dev)
         d = tvm.nd.array(d_np, dev)
         s = tvm.nd.array(s_np, dev)
         z = tvm.nd.array(z_np, dev)
         q = tvm.nd.array(q_np, dev)

         # Construct equivalent relay graph.
         per_channel = channels[0] != 1
         a_var = relay.var("a", shape=data_shape, dtype="float32")
         if per_channel:
             s_var = relay.const(s_np)
             z_var = relay.const(z_np)
         else:
             s_var = relay.const(s_np[0])
             z_var = relay.const(z_np[0])
         real_q_op = relay.qnn.quantize(a_var, s_var, z_var, axis=axis, out_dtype=out_dtype)
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build(tvm.IRModule.from_expr(real_q_op), target=target)

         # Get real qnn quantize output.
         m = graph_executor.GraphModule(lib["default"](dev))
         m.set_input("a", a_np)

         m.run()
         real_q_out = m.get_output(0)

         # Compile the simulated quantize function.
         with tvm.target.Target(target):
             sched = tvm.topi.testing.get_injective_schedule(target)(SIM_Q)
         func = tvm.build(sched, [A, D, S, Z, SIM_Q], target, name="sim_quantize")
         func(a, d, s, z, q)

         # Check correctness against the true qnn output.
         mismatch = q.numpy() != real_q_out.numpy().astype("float32")
         # Allow some rounding errors due to GPU fp32 arithmetic.
         assert np.sum(mismatch) <= 3

     for target, dev in tvm.testing.enabled_targets():
         check_target(target, dev)


 def test_simulated_quantize():
     verify_simulated_quantize([1], "int8", [1], -1)
     verify_simulated_quantize([2, 5], "int8", [5], 1)
     verify_simulated_quantize([1, 32, 32, 32], "int8", [32], -1)
     verify_simulated_quantize([1, 32, 32, 32], "uint8", [32], -2)
     verify_simulated_quantize([2, 5], "int32", [5], 1)


 def verify_simulated_dequantize(data_shape, in_dtype, channels, axis):
     # Create placeholder variables for all qnn inputs.
     A = te.placeholder(data_shape, name="value", dtype="float32")
     D = te.placeholder([], name="dtype", dtype="int32")
     S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
     Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
     SIM_DQ = topi.nn.simulated_dequantize(A, D, input_scale=S, input_zero_point=Z, axis=axis)

     # Create random numpy values to assign to inputs.
     a_np = np.random.uniform(low=-128, high=127, size=data_shape).astype(in_dtype)
     a_np_f = a_np.astype("float32")
     d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[in_dtype])
     s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
     z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
     dq_np = np.zeros(shape=data_shape, dtype="float32")

     def check_target(target, dev):
         # Wrap the numpy arrays in nd arrays.
         a = tvm.nd.array(a_np_f, dev)
         d = tvm.nd.array(d_np, dev)
         s = tvm.nd.array(s_np, dev)
         z = tvm.nd.array(z_np, dev)
         dq = tvm.nd.array(dq_np, dev)

         # Construct equivalent relay graph.
         per_channel = channels[0] != 1
         a_var = relay.var("a", shape=data_shape, dtype=in_dtype)
         if per_channel:
             s_var = relay.const(s_np)
             z_var = relay.const(z_np)
         else:
             s_var = relay.const(s_np[0])
             z_var = relay.const(z_np[0])
         real_dq_op = relay.qnn.dequantize(a_var, s_var, z_var, axis=axis)
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build(tvm.IRModule.from_expr(real_dq_op), target=target)

         # Get real qnn quantize output.
         m = graph_executor.GraphModule(lib["default"](dev))
         m.set_input("a", a_np)

         m.run()
         real_dq_out = m.get_output(0)

         # Compile the simulated quantize function.
         with tvm.target.Target(target):
             sched = tvm.topi.testing.get_injective_schedule(target)(SIM_DQ)
         func = tvm.build(sched, [A, D, S, Z, SIM_DQ], target, name="sim_quantize")
         func(a, d, s, z, dq)

         # Check correctness against the true qnn output.
         tvm.testing.assert_allclose(dq.numpy(), real_dq_out.numpy().astype("float32"), rtol=1e-5)

     for target, dev in tvm.testing.enabled_targets():
         check_target(target, dev)


 def test_simulated_dequantize():
     verify_simulated_dequantize([1], "int8", [1], -1)
     verify_simulated_dequantize([2, 5], "int8", [5], 1)
     verify_simulated_dequantize([2, 5], "int8", [2], 0)
     verify_simulated_dequantize([1, 32, 32, 32], "int8", [32], -1)
     verify_simulated_dequantize([1, 32, 32, 32], "uint8", [32], -2)
     verify_simulated_dequantize([2, 5], "int32", [5], 1)


 if __name__ == "__main__":
     test_simulated_quantize()
     test_simulated_dequantize()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	"""Test code for QNN operators."""
	import numpy as np
	import tvm
	from tvm import topi, relay, te
	from tvm.contrib import graph_executor
	import tvm.topi.testing


	def verify_simulated_quantize(data_shape, out_dtype, channels, axis):
	# Create placeholder variables for all qnn inputs.
	A = te.placeholder(data_shape, name="value", dtype="float32")
	D = te.placeholder([], name="dtype", dtype="int32")
	S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
	Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
	SIM_Q = topi.nn.simulated_quantize(A, D, output_scale=S, output_zero_point=Z, axis=axis)

	# Create random numpy values to assign to inputs.
	a_np = np.random.uniform(size=data_shape).astype("float32")
	d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[out_dtype])
	s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
	z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
	q_np = np.zeros(shape=data_shape, dtype="float32")

	def check_target(target, dev):
	# Wrap the numpy arrays in nd arrays.
	a = tvm.nd.array(a_np, dev)
	d = tvm.nd.array(d_np, dev)
	s = tvm.nd.array(s_np, dev)
	z = tvm.nd.array(z_np, dev)
	q = tvm.nd.array(q_np, dev)

	# Construct equivalent relay graph.
	per_channel = channels[0] != 1
	a_var = relay.var("a", shape=data_shape, dtype="float32")
	if per_channel:
	s_var = relay.const(s_np)
	z_var = relay.const(z_np)
	else:
	s_var = relay.const(s_np[0])
	z_var = relay.const(z_np[0])
	real_q_op = relay.qnn.quantize(a_var, s_var, z_var, axis=axis, out_dtype=out_dtype)
	with tvm.transform.PassContext(opt_level=3):
	lib = relay.build(tvm.IRModule.from_expr(real_q_op), target=target)

	# Get real qnn quantize output.
	m = graph_executor.GraphModule(lib["default"](dev))
	m.set_input("a", a_np)

	m.run()
	real_q_out = m.get_output(0)

	# Compile the simulated quantize function.
	with tvm.target.Target(target):
	sched = tvm.topi.testing.get_injective_schedule(target)(SIM_Q)
	func = tvm.build(sched, [A, D, S, Z, SIM_Q], target, name="sim_quantize")
	func(a, d, s, z, q)

	# Check correctness against the true qnn output.
	mismatch = q.numpy() != real_q_out.numpy().astype("float32")
	# Allow some rounding errors due to GPU fp32 arithmetic.
	assert np.sum(mismatch) <= 3

	for target, dev in tvm.testing.enabled_targets():
	check_target(target, dev)


	def test_simulated_quantize():
	verify_simulated_quantize([1], "int8", [1], -1)
	verify_simulated_quantize([2, 5], "int8", [5], 1)
	verify_simulated_quantize([1, 32, 32, 32], "int8", [32], -1)
	verify_simulated_quantize([1, 32, 32, 32], "uint8", [32], -2)
	verify_simulated_quantize([2, 5], "int32", [5], 1)


	def verify_simulated_dequantize(data_shape, in_dtype, channels, axis):
	# Create placeholder variables for all qnn inputs.
	A = te.placeholder(data_shape, name="value", dtype="float32")
	D = te.placeholder([], name="dtype", dtype="int32")
	S = te.placeholder([te.size_var("scale_dim")], name="scale", dtype="float32")
	Z = te.placeholder([te.size_var("zp_dim")], name="zp", dtype="int32")
	SIM_DQ = topi.nn.simulated_dequantize(A, D, input_scale=S, input_zero_point=Z, axis=axis)

	# Create random numpy values to assign to inputs.
	a_np = np.random.uniform(low=-128, high=127, size=data_shape).astype(in_dtype)
	a_np_f = a_np.astype("float32")
	d_np = np.int32(topi.nn.SQNN_DTYPE_TO_CODE[in_dtype])
	s_np = np.random.uniform(low=1e-4, high=0.1, size=channels).astype("float32")
	z_np = np.random.uniform(low=-10, high=10, size=channels).astype("int32")
	dq_np = np.zeros(shape=data_shape, dtype="float32")

	def check_target(target, dev):
	# Wrap the numpy arrays in nd arrays.
	a = tvm.nd.array(a_np_f, dev)
	d = tvm.nd.array(d_np, dev)
	s = tvm.nd.array(s_np, dev)
	z = tvm.nd.array(z_np, dev)
	dq = tvm.nd.array(dq_np, dev)

	# Construct equivalent relay graph.
	per_channel = channels[0] != 1
	a_var = relay.var("a", shape=data_shape, dtype=in_dtype)
	if per_channel:
	s_var = relay.const(s_np)
	z_var = relay.const(z_np)
	else:
	s_var = relay.const(s_np[0])
	z_var = relay.const(z_np[0])
	real_dq_op = relay.qnn.dequantize(a_var, s_var, z_var, axis=axis)
	with tvm.transform.PassContext(opt_level=3):
	lib = relay.build(tvm.IRModule.from_expr(real_dq_op), target=target)

	# Get real qnn quantize output.
	m = graph_executor.GraphModule(lib["default"](dev))
	m.set_input("a", a_np)

	m.run()
	real_dq_out = m.get_output(0)

	# Compile the simulated quantize function.
	with tvm.target.Target(target):
	sched = tvm.topi.testing.get_injective_schedule(target)(SIM_DQ)
	func = tvm.build(sched, [A, D, S, Z, SIM_DQ], target, name="sim_quantize")
	func(a, d, s, z, dq)

	# Check correctness against the true qnn output.
	tvm.testing.assert_allclose(dq.numpy(), real_dq_out.numpy().astype("float32"), rtol=1e-5)

	for target, dev in tvm.testing.enabled_targets():
	check_target(target, dev)


	def test_simulated_dequantize():
	verify_simulated_dequantize([1], "int8", [1], -1)
	verify_simulated_dequantize([2, 5], "int8", [5], 1)
	verify_simulated_dequantize([2, 5], "int8", [2], 0)
	verify_simulated_dequantize([1, 32, 32, 32], "int8", [32], -1)
	verify_simulated_dequantize([1, 32, 32, 32], "uint8", [32], -2)
	verify_simulated_dequantize([2, 5], "int32", [5], 1)


	if __name__ == "__main__":
	test_simulated_quantize()
	test_simulated_dequantize()