benchmark/python/quantization/benchmark_op.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import time
 import mxnet as mx
 from mxnet.test_utils import check_speed


 def quantize_int8_helper(data):
     min_data = mx.nd.min(data)
     max_data = mx.nd.max(data)
     return mx.nd.contrib.quantize(data, min_data, max_data, out_type='int8')


 def benchmark_convolution(data_shape, kernel, num_filter, pad, stride, no_bias=True, layout='NCHW', repeats=20):
     ctx_gpu = mx.gpu(0)
     data = mx.sym.Variable(name="data", shape=data_shape, dtype='float32')
     # conv cudnn
     conv_cudnn = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
                                     no_bias=no_bias, layout=layout, cudnn_off=False, name="conv_cudnn")
     arg_shapes, _, _ = conv_cudnn.infer_shape(data=data_shape)
     input_data = mx.nd.random.normal(0, 0.2, shape=data_shape, ctx=ctx_gpu)
     conv_weight_name = conv_cudnn.list_arguments()[1]
     args = {data.name: input_data, conv_weight_name: mx.random.normal(0, 1, shape=arg_shapes[1], ctx=ctx_gpu)}
     conv_cudnn_time = check_speed(sym=conv_cudnn, location=args, ctx=ctx_gpu, N=repeats,
                                   grad_req='null', typ='forward') * 1000

     # quantized_conv2d
     qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8')
     weight = mx.sym.Variable(name='weight', shape=arg_shapes[1], dtype='int8')
     min_data = mx.sym.Variable(name='min_data', shape=(1,), dtype='float32')
     max_data = mx.sym.Variable(name='max_data', shape=(1,), dtype='float32')
     min_weight = mx.sym.Variable(name='min_weight', shape=(1,), dtype='float32')
     max_weight = mx.sym.Variable(name='max_weight', shape=(1,), dtype='float32')
     quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=weight, min_data=min_data, max_data=max_data,
                                                      min_weight=min_weight, max_weight=max_weight,
                                                      kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
                                                      no_bias=no_bias, layout=layout, cudnn_off=False,
                                                      name='quantized_conv2d')
     qargs = {qdata.name: quantize_int8_helper(input_data)[0],
              min_data.name: quantize_int8_helper(input_data)[1],
              max_data.name: quantize_int8_helper(input_data)[2],
              weight.name: quantize_int8_helper(args[conv_weight_name])[0],
              min_weight.name: quantize_int8_helper(args[conv_weight_name])[1],
              max_weight.name: quantize_int8_helper(args[conv_weight_name])[2]}
     qconv_time = check_speed(sym=quantized_conv2d, location=qargs, ctx=ctx_gpu, N=repeats,
                              grad_req='null', typ='forward') * 1000

     print('==================================================================================================')
     print('data=%s, kernel=%s, num_filter=%s, pad=%s, stride=%s, no_bias=%s, layout=%s, repeats=%s'
           % (data_shape, kernel, num_filter, pad, stride, no_bias, layout, repeats))
     print('%s , ctx=%s, time=%.2f ms' % (conv_cudnn.name + '-FP32', ctx_gpu, conv_cudnn_time))
     print('%s, ctx=%s, time=%.2f ms' % (quantized_conv2d.name, ctx_gpu, qconv_time))
     print('quantization speedup:               %.1fX' % (conv_cudnn_time / qconv_time))
     print('\n')


 if __name__ == '__main__':
     for batch_size in [32, 64, 128]:
         benchmark_convolution(data_shape=(batch_size, 64, 56, 56), kernel=(1, 1), num_filter=256,
                               pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)

         benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=64,
                               pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)

         benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=128,
                               pad=(0, 0), stride=(2, 2), layout='NCHW', repeats=20)

         benchmark_convolution(data_shape=(batch_size, 128, 28, 28), kernel=(3, 3), num_filter=128,
                               pad=(1, 1), stride=(1, 1), layout='NCHW', repeats=20)

         benchmark_convolution(data_shape=(batch_size, 1024, 14, 14), kernel=(1, 1), num_filter=256,
                               pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)

         benchmark_convolution(data_shape=(batch_size, 2048, 7, 7), kernel=(1, 1), num_filter=512,
                               pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import time
	import mxnet as mx
	from mxnet.test_utils import check_speed


	def quantize_int8_helper(data):
	min_data = mx.nd.min(data)
	max_data = mx.nd.max(data)
	return mx.nd.contrib.quantize(data, min_data, max_data, out_type='int8')


	def benchmark_convolution(data_shape, kernel, num_filter, pad, stride, no_bias=True, layout='NCHW', repeats=20):
	ctx_gpu = mx.gpu(0)
	data = mx.sym.Variable(name="data", shape=data_shape, dtype='float32')
	# conv cudnn
	conv_cudnn = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
	no_bias=no_bias, layout=layout, cudnn_off=False, name="conv_cudnn")
	arg_shapes, _, _ = conv_cudnn.infer_shape(data=data_shape)
	input_data = mx.nd.random.normal(0, 0.2, shape=data_shape, ctx=ctx_gpu)
	conv_weight_name = conv_cudnn.list_arguments()[1]
	args = {data.name: input_data, conv_weight_name: mx.random.normal(0, 1, shape=arg_shapes[1], ctx=ctx_gpu)}
	conv_cudnn_time = check_speed(sym=conv_cudnn, location=args, ctx=ctx_gpu, N=repeats,
	grad_req='null', typ='forward') * 1000

	# quantized_conv2d
	qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8')
	weight = mx.sym.Variable(name='weight', shape=arg_shapes[1], dtype='int8')
	min_data = mx.sym.Variable(name='min_data', shape=(1,), dtype='float32')
	max_data = mx.sym.Variable(name='max_data', shape=(1,), dtype='float32')
	min_weight = mx.sym.Variable(name='min_weight', shape=(1,), dtype='float32')
	max_weight = mx.sym.Variable(name='max_weight', shape=(1,), dtype='float32')
	quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=weight, min_data=min_data, max_data=max_data,
	min_weight=min_weight, max_weight=max_weight,
	kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
	no_bias=no_bias, layout=layout, cudnn_off=False,
	name='quantized_conv2d')
	qargs = {qdata.name: quantize_int8_helper(input_data)[0],
	min_data.name: quantize_int8_helper(input_data)[1],
	max_data.name: quantize_int8_helper(input_data)[2],
	weight.name: quantize_int8_helper(args[conv_weight_name])[0],
	min_weight.name: quantize_int8_helper(args[conv_weight_name])[1],
	max_weight.name: quantize_int8_helper(args[conv_weight_name])[2]}
	qconv_time = check_speed(sym=quantized_conv2d, location=qargs, ctx=ctx_gpu, N=repeats,
	grad_req='null', typ='forward') * 1000

	print('==================================================================================================')
	print('data=%s, kernel=%s, num_filter=%s, pad=%s, stride=%s, no_bias=%s, layout=%s, repeats=%s'
	% (data_shape, kernel, num_filter, pad, stride, no_bias, layout, repeats))
	print('%s , ctx=%s, time=%.2f ms' % (conv_cudnn.name + '-FP32', ctx_gpu, conv_cudnn_time))
	print('%s, ctx=%s, time=%.2f ms' % (quantized_conv2d.name, ctx_gpu, qconv_time))
	print('quantization speedup: %.1fX' % (conv_cudnn_time / qconv_time))
	print('\n')


	if __name__ == '__main__':
	for batch_size in [32, 64, 128]:
	benchmark_convolution(data_shape=(batch_size, 64, 56, 56), kernel=(1, 1), num_filter=256,
	pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)

	benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=64,
	pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)

	benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=128,
	pad=(0, 0), stride=(2, 2), layout='NCHW', repeats=20)

	benchmark_convolution(data_shape=(batch_size, 128, 28, 28), kernel=(3, 3), num_filter=128,
	pad=(1, 1), stride=(1, 1), layout='NCHW', repeats=20)

	benchmark_convolution(data_shape=(batch_size, 1024, 14, 14), kernel=(1, 1), num_filter=256,
	pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)

	benchmark_convolution(data_shape=(batch_size, 2048, 7, 7), kernel=(1, 1), num_filter=512,
	pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)