blob: 41713c3c3f5156496eb019784175365f6b3fd351 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import os
import logging
from common import modelzoo
import mxnet as mx
from mxnet.contrib.quantization import *
def download_calib_dataset(dataset_url, calib_dataset, logger=None):
if logger is not None:
logger.info('Downloading calibration dataset from %s to %s' % (dataset_url, calib_dataset))
mx.test_utils.download(dataset_url, calib_dataset)
def download_model(model_name, logger=None):
dir_path = os.path.dirname(os.path.realpath(__file__))
model_path = os.path.join(dir_path, 'model')
if logger is not None:
logger.info('Downloading model %s... into path %s' % (model_name, model_path))
return modelzoo.download_model(args.model, os.path.join(dir_path, 'model'))
def save_symbol(fname, sym, logger=None):
if logger is not None:
logger.info('Saving symbol into file at %s' % fname)
sym.save(fname)
def save_params(fname, arg_params, aux_params, logger=None):
if logger is not None:
logger.info('Saving params into file at %s' % fname)
save_dict = {('arg:%s' % k): v.as_in_context(cpu()) for k, v in arg_params.items()}
save_dict.update({('aux:%s' % k): v.as_in_context(cpu()) for k, v in aux_params.items()})
mx.nd.save(fname, save_dict)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate a calibrated quantized model from a FP32 model')
parser.add_argument('--ctx', type=str, default='gpu')
parser.add_argument('--model', type=str, choices=['imagenet1k-resnet-152', 'imagenet1k-inception-bn'],
help='currently only supports imagenet1k-resnet-152 or imagenet1k-inception-bn')
parser.add_argument('--batch-size', type=int, default=32)
parser.add_argument('--label-name', type=str, default='softmax_label')
parser.add_argument('--calib-dataset', type=str, default='data/val_256_q90.rec',
help='path of the calibration dataset')
parser.add_argument('--image-shape', type=str, default='3,224,224')
parser.add_argument('--data-nthreads', type=int, default=60,
help='number of threads for data decoding')
parser.add_argument('--num-calib-batches', type=int, default=10,
help='number of batches for calibration')
parser.add_argument('--exclude-first-conv', action='store_true', default=True,
help='excluding quantizing the first conv layer since the'
' number of channels is usually not a multiple of 4 in that layer'
' which does not satisfy the requirement of cuDNN')
parser.add_argument('--shuffle-dataset', action='store_true', default=True,
help='shuffle the calibration dataset')
parser.add_argument('--shuffle-chunk-seed', type=int, default=3982304,
help='shuffling chunk seed, see'
' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
' for more details')
parser.add_argument('--shuffle-seed', type=int, default=48564309,
help='shuffling seed, see'
' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
' for more details')
parser.add_argument('--calib-mode', type=str, default='entropy',
help='calibration mode used for generating calibration table for the quantized symbol; supports'
' 1. none: no calibration will be used. The thresholds for quantization will be calculated'
' on the fly. This will result in inference speed slowdown and loss of accuracy'
' in general.'
' 2. naive: simply take min and max values of layer outputs as thresholds for'
' quantization. In general, the inference accuracy worsens with more examples used in'
' calibration. It is recommended to use `entropy` mode as it produces more accurate'
' inference results.'
' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal'
' thresholds. This mode is expected to produce the best inference accuracy of all three'
' kinds of quantized models if the calibration dataset is representative enough of the'
' inference dataset.')
parser.add_argument('--quantized-dtype', type=str, default='int8',
choices=['int8', 'uint8'],
help='quantization destination data type for input data')
args = parser.parse_args()
if args.ctx == 'gpu':
ctx = mx.gpu(0)
elif args.ctx == 'cpu':
ctx = mx.cpu(0)
else:
raise ValueError('ctx %s is not supported in this script' % args.ctx)
logging.basicConfig()
logger = logging.getLogger('logger')
logger.setLevel(logging.INFO)
logger.info('shuffle_dataset=%s' % args.shuffle_dataset)
calib_mode = args.calib_mode
logger.info('calibration mode set to %s' % calib_mode)
# download calibration dataset
if calib_mode != 'none':
download_calib_dataset('http://data.mxnet.io/data/val_256_q90.rec', args.calib_dataset)
# download model
prefix, epoch = download_model(model_name=args.model, logger=logger)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
# get batch size
batch_size = args.batch_size
logger.info('batch size = %d for calibration' % batch_size)
# get number of batches for calibration
num_calib_batches = args.num_calib_batches
if calib_mode != 'none':
logger.info('number of batches = %d for calibration' % num_calib_batches)
# get number of threads for decoding the dataset
data_nthreads = args.data_nthreads
# get image shape
image_shape = args.image_shape
exclude_first_conv = args.exclude_first_conv
excluded_sym_names = []
if args.model == 'imagenet1k-resnet-152':
rgb_mean = '0,0,0'
if args.ctx == 'gpu':
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('sc') != -1
or name.find('fc') != -1)
else:
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('sc') != -1)
excluded_sym_names += ['flatten0', 'fc1']
if exclude_first_conv:
excluded_sym_names += ['conv0']
elif args.model == 'imagenet1k-inception-bn':
rgb_mean = '123.68,116.779,103.939'
if args.ctx == 'gpu':
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
or name.find('fc') != -1)
excluded_sym_names += ['ch_concat_3a_chconcat',
'ch_concat_3b_chconcat',
'ch_concat_3c_chconcat',
'ch_concat_4a_chconcat',
'ch_concat_4b_chconcat',
'ch_concat_4c_chconcat',
'ch_concat_4d_chconcat',
'ch_concat_4e_chconcat',
'ch_concat_5a_chconcat',
'ch_concat_5b_chconcat']
else:
calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1)
excluded_sym_names += ['flatten', 'fc1']
if exclude_first_conv:
excluded_sym_names += ['conv_1']
else:
raise ValueError('model %s is not supported in this script' % args.model)
label_name = args.label_name
logger.info('label_name = %s' % label_name)
data_shape = tuple([int(i) for i in image_shape.split(',')])
logger.info('Input data shape = %s' % str(data_shape))
logger.info('rgb_mean = %s' % rgb_mean)
rgb_mean = [float(i) for i in rgb_mean.split(',')]
mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}
if calib_mode == 'none':
logger.info('Quantizing FP32 model %s' % args.model)
qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
ctx=ctx, excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
logger=logger)
sym_name = '%s-symbol.json' % (prefix + '-quantized')
save_symbol(sym_name, qsym, logger)
else:
logger.info('Creating ImageRecordIter for reading calibration dataset')
data = mx.io.ImageRecordIter(path_imgrec=args.calib_dataset,
label_width=1,
preprocess_threads=data_nthreads,
batch_size=batch_size,
data_shape=data_shape,
label_name=label_name,
rand_crop=False,
rand_mirror=False,
shuffle=args.shuffle_dataset,
shuffle_chunk_seed=args.shuffle_chunk_seed,
seed=args.shuffle_seed,
**mean_args)
cqsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
ctx=ctx, excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, calib_data=data,
num_calib_examples=num_calib_batches * batch_size,
calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
logger=logger)
if calib_mode == 'entropy':
suffix = '-quantized-%dbatches-entropy' % num_calib_batches
elif calib_mode == 'naive':
suffix = '-quantized-%dbatches-naive' % num_calib_batches
else:
raise ValueError('unknow calibration mode %s received, only supports `none`, `naive`, and `entropy`'
% calib_mode)
sym_name = '%s-symbol.json' % (prefix + suffix)
save_symbol(sym_name, cqsym, logger)
param_name = '%s-%04d.params' % (prefix + '-quantized', epoch)
save_params(param_name, qarg_params, aux_params, logger)