blob: 5ba7740cc91828ad146a36db19fbef31ebe37fef [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import time
import mxnet as mx
from mxnet.test_utils import check_speed
def quantize_int8_helper(data):
min_data = mx.nd.min(data)
max_data = mx.nd.max(data)
return mx.nd.contrib.quantize(data, min_data, max_data, out_type='int8')
def benchmark_convolution(data_shape, kernel, num_filter, pad, stride, no_bias=True, layout='NCHW', repeats=20):
ctx_gpu = mx.gpu(0)
data = mx.sym.Variable(name="data", shape=data_shape, dtype='float32')
# conv cudnn
conv_cudnn = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
no_bias=no_bias, layout=layout, cudnn_off=False, name="conv_cudnn")
arg_shapes, _, _ = conv_cudnn.infer_shape(data=data_shape)
input_data = mx.nd.random.normal(0, 0.2, shape=data_shape, ctx=ctx_gpu)
conv_weight_name = conv_cudnn.list_arguments()[1]
args = {data.name: input_data, conv_weight_name: mx.random.normal(0, 1, shape=arg_shapes[1], ctx=ctx_gpu)}
conv_cudnn_time = check_speed(sym=conv_cudnn, location=args, ctx=ctx_gpu, N=repeats,
grad_req='null', typ='forward') * 1000
# quantized_conv2d
qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8')
weight = mx.sym.Variable(name='weight', shape=arg_shapes[1], dtype='int8')
min_data = mx.sym.Variable(name='min_data', shape=(1,), dtype='float32')
max_data = mx.sym.Variable(name='max_data', shape=(1,), dtype='float32')
min_weight = mx.sym.Variable(name='min_weight', shape=(1,), dtype='float32')
max_weight = mx.sym.Variable(name='max_weight', shape=(1,), dtype='float32')
quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=weight, min_data=min_data, max_data=max_data,
min_weight=min_weight, max_weight=max_weight,
kernel=kernel, num_filter=num_filter, pad=pad, stride=stride,
no_bias=no_bias, layout=layout, cudnn_off=False,
name='quantized_conv2d')
qargs = {qdata.name: quantize_int8_helper(input_data)[0],
min_data.name: quantize_int8_helper(input_data)[1],
max_data.name: quantize_int8_helper(input_data)[2],
weight.name: quantize_int8_helper(args[conv_weight_name])[0],
min_weight.name: quantize_int8_helper(args[conv_weight_name])[1],
max_weight.name: quantize_int8_helper(args[conv_weight_name])[2]}
qconv_time = check_speed(sym=quantized_conv2d, location=qargs, ctx=ctx_gpu, N=repeats,
grad_req='null', typ='forward') * 1000
print('==================================================================================================')
print('data=%s, kernel=%s, num_filter=%s, pad=%s, stride=%s, no_bias=%s, layout=%s, repeats=%s'
% (data_shape, kernel, num_filter, pad, stride, no_bias, layout, repeats))
print('%s , ctx=%s, time=%.2f ms' % (conv_cudnn.name + '-FP32', ctx_gpu, conv_cudnn_time))
print('%s, ctx=%s, time=%.2f ms' % (quantized_conv2d.name, ctx_gpu, qconv_time))
print('quantization speedup: %.1fX' % (conv_cudnn_time / qconv_time))
print('\n')
if __name__ == '__main__':
for batch_size in [32, 64, 128]:
benchmark_convolution(data_shape=(batch_size, 64, 56, 56), kernel=(1, 1), num_filter=256,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=64,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 256, 56, 56), kernel=(1, 1), num_filter=128,
pad=(0, 0), stride=(2, 2), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 128, 28, 28), kernel=(3, 3), num_filter=128,
pad=(1, 1), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 1024, 14, 14), kernel=(1, 1), num_filter=256,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)
benchmark_convolution(data_shape=(batch_size, 2048, 7, 7), kernel=(1, 1), num_filter=512,
pad=(0, 0), stride=(1, 1), layout='NCHW', repeats=20)