blob: de955eec55bb40172f8f495961685b325c2cfc14 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import time
import mxnet as mx
from mxnet.test_utils import check_speed
def quantize_int8_helper(data):
min_data = mx.nd.min(data)
max_data = mx.nd.max(data)
return mx.nd.contrib.quantize(data, min_data, max_data, out_type='int8')
def benchmark_convolution(data_shape, kernel, num_filter, pad, stride, no_bias=True, layout='NCHW', repeats=20):
ctx_gpu = mx.gpu(0)
data = mx.sym.Variable(name="data", shape=data_shape, dtype='float32')
# conv cudnn
conv_cudnn = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
no_bias=no_bias, layout=layout, cudnn_off=False, name="conv_cudnn")
arg_shapes, _, _ = conv_cudnn.infer_shape(data=data_shape)
input_data = mx.nd.random.normal(0, 0.2, shape=data_shape, ctx=ctx_gpu)
conv_weight_name = conv_cudnn.list_arguments()[1]
args = {data.name: input_data, conv_weight_name: mx.random.normal(0, 1, shape=arg_shapes[1], ctx=ctx_gpu)}
conv_cudnn_time = check_speed(sym=conv_cudnn, location=args, ctx=ctx_gpu, N=repeats,
grad_req='null', typ='forward') * 1000
# quantized_conv2d
qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8')
weight = mx.sym.Variable(name='weight', shape=arg_shapes[1], dtype='int8')
min_data = mx.sym.Variable(name='min_data', shape=(1,), dtype='float32')
max_data = mx.sym.Variable(name='max_data', shape=(1,), dtype='float32')
min_weight = mx.sym.Variable(name='min_weight', shape=(1,), dtype='float32')
max_weight = mx.sym.Variable(name='max_weight', shape=(1,), dtype='float32')
quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=weight, min_data=min_data, max_data=max_data,
min_weight=min_weight, max_weight=max_weight,
kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
no_bias=no_bias, layout=layout, cudnn_off=False,
name='quantized_conv2d')
qargs = {qdata.name: quantize_int8_helper(input_data)[0],
min_data.name: quantize_int8_helper(input_data)[1],
max_data.name: quantize_int8_helper(input_data)[2],
weight.name: quantize_int8_helper(args[conv_weight_name])[0],
min_weight.name: quantize_int8_helper(args[conv_weight_name])[1],
max_weight.name: quantize_int8_helper(args[conv_weight_name])[2]}
qconv_time = check_speed(sym=quantized_conv2d, location=qargs, ctx=ctx_gpu, N=repeats,
grad_req='null', typ='forward') * 1000
print('==================================================================================================')
print(f'data={data_shape}, kernel={kernel}, num_filter={num_filter}, pad={pad}, stride={stride}, no_bias={no_bias}, layout={layout}, repeats={repeats}')
print(f'{conv_cudnn.name}-FP32 , ctx={ctx_gpu}, time={conv_cudnn_time:.2f} ms')
print(f'{quantized_conv2d.name}, ctx={ctx_gpu}, time={qconv_time:.2f} ms')
print(f'quantization speedup: {conv_cudnn_time / qconv_time:.1f}X')
print('\n')
if __name__ == '__main__':
for batch_size in [32, 64, 128]:
benchmark_convolution(data_shape=(batch_size, 64, 56, 56), kernel=(1, 1), num_filter=256,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=64,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=128,
pad=(0, 0), stride=(2, 2), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 128, 28, 28), kernel=(3, 3), num_filter=128,
pad=(1, 1), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 1024, 14, 14), kernel=(1, 1), num_filter=256,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 2048, 7, 7), kernel=(1, 1), num_filter=512,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)