example/quantization/imagenet_gen_qsym.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import argparse
 import os
 import logging
 from common import modelzoo
 import mxnet as mx
 from mxnet.contrib.quantization import *


 def download_calib_dataset(dataset_url, calib_dataset, logger=None):
     if logger is not None:
         logger.info('Downloading calibration dataset from %s to %s' % (dataset_url, calib_dataset))
     mx.test_utils.download(dataset_url, calib_dataset)


 def download_model(model_name, logger=None):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     model_path = os.path.join(dir_path, 'model')
     if logger is not None:
         logger.info('Downloading model %s... into path %s' % (model_name, model_path))
     return modelzoo.download_model(args.model, os.path.join(dir_path, 'model'))


 def save_symbol(fname, sym, logger=None):
     if logger is not None:
         logger.info('Saving symbol into file at %s' % fname)
     sym.save(fname)


 def save_params(fname, arg_params, aux_params, logger=None):
     if logger is not None:
         logger.info('Saving params into file at %s' % fname)
     save_dict = {('arg:%s' % k): v.as_in_context(cpu()) for k, v in arg_params.items()}
     save_dict.update({('aux:%s' % k): v.as_in_context(cpu()) for k, v in aux_params.items()})
     mx.nd.save(fname, save_dict)


 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Generate a calibrated quantized model from a FP32 model')
     parser.add_argument('--ctx', type=str, default='gpu')
     parser.add_argument('--model', type=str, choices=['imagenet1k-resnet-152', 'imagenet1k-inception-bn'],
                         help='currently only supports imagenet1k-resnet-152 or imagenet1k-inception-bn')
     parser.add_argument('--batch-size', type=int, default=32)
     parser.add_argument('--label-name', type=str, default='softmax_label')
     parser.add_argument('--calib-dataset', type=str, default='data/val_256_q90.rec',
                         help='path of the calibration dataset')
     parser.add_argument('--image-shape', type=str, default='3,224,224')
     parser.add_argument('--data-nthreads', type=int, default=60,
                         help='number of threads for data decoding')
     parser.add_argument('--num-calib-batches', type=int, default=10,
                         help='number of batches for calibration')
     parser.add_argument('--exclude-first-conv', action='store_true', default=True,
                         help='excluding quantizing the first conv layer since the'
                              ' number of channels is usually not a multiple of 4 in that layer'
                              ' which does not satisfy the requirement of cuDNN')
     parser.add_argument('--shuffle-dataset', action='store_true', default=True,
                         help='shuffle the calibration dataset')
     parser.add_argument('--shuffle-chunk-seed', type=int, default=3982304,
                         help='shuffling chunk seed, see'
                              ' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
                              ' for more details')
     parser.add_argument('--shuffle-seed', type=int, default=48564309,
                         help='shuffling seed, see'
                              ' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
                              ' for more details')
     parser.add_argument('--calib-mode', type=str, default='entropy',
                         help='calibration mode used for generating calibration table for the quantized symbol; supports'
                              ' 1. none: no calibration will be used. The thresholds for quantization will be calculated'
                              ' on the fly. This will result in inference speed slowdown and loss of accuracy'
                              ' in general.'
                              ' 2. naive: simply take min and max values of layer outputs as thresholds for'
                              ' quantization. In general, the inference accuracy worsens with more examples used in'
                              ' calibration. It is recommended to use `entropy` mode as it produces more accurate'
                              ' inference results.'
                              ' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal'
                              ' thresholds. This mode is expected to produce the best inference accuracy of all three'
                              ' kinds of quantized models if the calibration dataset is representative enough of the'
                              ' inference dataset.')
     parser.add_argument('--quantized-dtype', type=str, default='int8',
                         choices=['int8', 'uint8'],
                         help='quantization destination data type for input data')
     args = parser.parse_args()

     if args.ctx == 'gpu':
         ctx = mx.gpu(0)
     elif args.ctx == 'cpu':
         ctx = mx.cpu(0)
     else:
         raise ValueError('ctx %s is not supported in this script' % args.ctx)

     logging.basicConfig()
     logger = logging.getLogger('logger')
     logger.setLevel(logging.INFO)

     logger.info('shuffle_dataset=%s' % args.shuffle_dataset)

     calib_mode = args.calib_mode
     logger.info('calibration mode set to %s' % calib_mode)

     # download calibration dataset
     if calib_mode != 'none':
         download_calib_dataset('http://data.mxnet.io/data/val_256_q90.rec', args.calib_dataset)

     # download model
     prefix, epoch = download_model(model_name=args.model, logger=logger)
     sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)

     # get batch size
     batch_size = args.batch_size
     logger.info('batch size = %d for calibration' % batch_size)

     # get number of batches for calibration
     num_calib_batches = args.num_calib_batches
     if calib_mode != 'none':
         logger.info('number of batches = %d for calibration' % num_calib_batches)

     # get number of threads for decoding the dataset
     data_nthreads = args.data_nthreads

     # get image shape
     image_shape = args.image_shape

     exclude_first_conv = args.exclude_first_conv
     excluded_sym_names = []
     if args.model == 'imagenet1k-resnet-152':
         rgb_mean = '0,0,0'
         if args.ctx == 'gpu':
             calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
                                                                      or name.find('sc') != -1
                                                                      or name.find('fc') != -1)
         else:
             calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
                                                                      or name.find('sc') != -1)
             excluded_sym_names += ['flatten0', 'fc1']
         if exclude_first_conv:
             excluded_sym_names += ['conv0']
     elif args.model == 'imagenet1k-inception-bn':
         rgb_mean = '123.68,116.779,103.939'
         if args.ctx == 'gpu':
             calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
                                                                      or name.find('fc') != -1)
             excluded_sym_names += ['ch_concat_3a_chconcat',
                                    'ch_concat_3b_chconcat',
                                    'ch_concat_3c_chconcat',
                                    'ch_concat_4a_chconcat',
                                    'ch_concat_4b_chconcat',
                                    'ch_concat_4c_chconcat',
                                    'ch_concat_4d_chconcat',
                                    'ch_concat_4e_chconcat',
                                    'ch_concat_5a_chconcat',
                                    'ch_concat_5b_chconcat']
         else:
             calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1)
             excluded_sym_names += ['flatten', 'fc1']
         if exclude_first_conv:
             excluded_sym_names += ['conv_1']
     else:
         raise ValueError('model %s is not supported in this script' % args.model)

     label_name = args.label_name
     logger.info('label_name = %s' % label_name)

     data_shape = tuple([int(i) for i in image_shape.split(',')])
     logger.info('Input data shape = %s' % str(data_shape))

     logger.info('rgb_mean = %s' % rgb_mean)
     rgb_mean = [float(i) for i in rgb_mean.split(',')]
     mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}

     if calib_mode == 'none':
         logger.info('Quantizing FP32 model %s' % args.model)
         qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
                                                        ctx=ctx, excluded_sym_names=excluded_sym_names,
                                                        calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
                                                        logger=logger)
         sym_name = '%s-symbol.json' % (prefix + '-quantized')
         save_symbol(sym_name, qsym, logger)
     else:
         logger.info('Creating ImageRecordIter for reading calibration dataset')
         data = mx.io.ImageRecordIter(path_imgrec=args.calib_dataset,
                                      label_width=1,
                                      preprocess_threads=data_nthreads,
                                      batch_size=batch_size,
                                      data_shape=data_shape,
                                      label_name=label_name,
                                      rand_crop=False,
                                      rand_mirror=False,
                                      shuffle=args.shuffle_dataset,
                                      shuffle_chunk_seed=args.shuffle_chunk_seed,
                                      seed=args.shuffle_seed,
                                      **mean_args)

         cqsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
                                                         ctx=ctx, excluded_sym_names=excluded_sym_names,
                                                         calib_mode=calib_mode, calib_data=data,
                                                         num_calib_examples=num_calib_batches * batch_size,
                                                         calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
                                                         logger=logger)
         if calib_mode == 'entropy':
             suffix = '-quantized-%dbatches-entropy' % num_calib_batches
         elif calib_mode == 'naive':
             suffix = '-quantized-%dbatches-naive' % num_calib_batches
         else:
             raise ValueError('unknow calibration mode %s received, only supports `none`, `naive`, and `entropy`'
                              % calib_mode)
         sym_name = '%s-symbol.json' % (prefix + suffix)
         save_symbol(sym_name, cqsym, logger)

     param_name = '%s-%04d.params' % (prefix + '-quantized', epoch)
     save_params(param_name, qarg_params, aux_params, logger)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import argparse
	import os
	import logging
	from common import modelzoo
	import mxnet as mx
	from mxnet.contrib.quantization import *


	def download_calib_dataset(dataset_url, calib_dataset, logger=None):
	if logger is not None:
	logger.info('Downloading calibration dataset from %s to %s' % (dataset_url, calib_dataset))
	mx.test_utils.download(dataset_url, calib_dataset)


	def download_model(model_name, logger=None):
	dir_path = os.path.dirname(os.path.realpath(__file__))
	model_path = os.path.join(dir_path, 'model')
	if logger is not None:
	logger.info('Downloading model %s... into path %s' % (model_name, model_path))
	return modelzoo.download_model(args.model, os.path.join(dir_path, 'model'))


	def save_symbol(fname, sym, logger=None):
	if logger is not None:
	logger.info('Saving symbol into file at %s' % fname)
	sym.save(fname)


	def save_params(fname, arg_params, aux_params, logger=None):
	if logger is not None:
	logger.info('Saving params into file at %s' % fname)
	save_dict = {('arg:%s' % k): v.as_in_context(cpu()) for k, v in arg_params.items()}
	save_dict.update({('aux:%s' % k): v.as_in_context(cpu()) for k, v in aux_params.items()})
	mx.nd.save(fname, save_dict)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Generate a calibrated quantized model from a FP32 model')
	parser.add_argument('--ctx', type=str, default='gpu')
	parser.add_argument('--model', type=str, choices=['imagenet1k-resnet-152', 'imagenet1k-inception-bn'],
	help='currently only supports imagenet1k-resnet-152 or imagenet1k-inception-bn')
	parser.add_argument('--batch-size', type=int, default=32)
	parser.add_argument('--label-name', type=str, default='softmax_label')
	parser.add_argument('--calib-dataset', type=str, default='data/val_256_q90.rec',
	help='path of the calibration dataset')
	parser.add_argument('--image-shape', type=str, default='3,224,224')
	parser.add_argument('--data-nthreads', type=int, default=60,
	help='number of threads for data decoding')
	parser.add_argument('--num-calib-batches', type=int, default=10,
	help='number of batches for calibration')
	parser.add_argument('--exclude-first-conv', action='store_true', default=True,
	help='excluding quantizing the first conv layer since the'
	' number of channels is usually not a multiple of 4 in that layer'
	' which does not satisfy the requirement of cuDNN')
	parser.add_argument('--shuffle-dataset', action='store_true', default=True,
	help='shuffle the calibration dataset')
	parser.add_argument('--shuffle-chunk-seed', type=int, default=3982304,
	help='shuffling chunk seed, see'
	' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
	' for more details')
	parser.add_argument('--shuffle-seed', type=int, default=48564309,
	help='shuffling seed, see'
	' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
	' for more details')
	parser.add_argument('--calib-mode', type=str, default='entropy',
	help='calibration mode used for generating calibration table for the quantized symbol; supports'
	' 1. none: no calibration will be used. The thresholds for quantization will be calculated'
	' on the fly. This will result in inference speed slowdown and loss of accuracy'
	' in general.'
	' 2. naive: simply take min and max values of layer outputs as thresholds for'
	' quantization. In general, the inference accuracy worsens with more examples used in'
	' calibration. It is recommended to use `entropy` mode as it produces more accurate'
	' inference results.'
	' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal'
	' thresholds. This mode is expected to produce the best inference accuracy of all three'
	' kinds of quantized models if the calibration dataset is representative enough of the'
	' inference dataset.')
	parser.add_argument('--quantized-dtype', type=str, default='int8',
	choices=['int8', 'uint8'],
	help='quantization destination data type for input data')
	args = parser.parse_args()

	if args.ctx == 'gpu':
	ctx = mx.gpu(0)
	elif args.ctx == 'cpu':
	ctx = mx.cpu(0)
	else:
	raise ValueError('ctx %s is not supported in this script' % args.ctx)

	logging.basicConfig()
	logger = logging.getLogger('logger')
	logger.setLevel(logging.INFO)

	logger.info('shuffle_dataset=%s' % args.shuffle_dataset)

	calib_mode = args.calib_mode
	logger.info('calibration mode set to %s' % calib_mode)

	# download calibration dataset
	if calib_mode != 'none':
	download_calib_dataset('http://data.mxnet.io/data/val_256_q90.rec', args.calib_dataset)

	# download model
	prefix, epoch = download_model(model_name=args.model, logger=logger)
	sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)

	# get batch size
	batch_size = args.batch_size
	logger.info('batch size = %d for calibration' % batch_size)

	# get number of batches for calibration
	num_calib_batches = args.num_calib_batches
	if calib_mode != 'none':
	logger.info('number of batches = %d for calibration' % num_calib_batches)

	# get number of threads for decoding the dataset
	data_nthreads = args.data_nthreads

	# get image shape
	image_shape = args.image_shape

	exclude_first_conv = args.exclude_first_conv
	excluded_sym_names = []
	if args.model == 'imagenet1k-resnet-152':
	rgb_mean = '0,0,0'
	if args.ctx == 'gpu':
	calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
	or name.find('sc') != -1
	or name.find('fc') != -1)
	else:
	calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
	or name.find('sc') != -1)
	excluded_sym_names += ['flatten0', 'fc1']
	if exclude_first_conv:
	excluded_sym_names += ['conv0']
	elif args.model == 'imagenet1k-inception-bn':
	rgb_mean = '123.68,116.779,103.939'
	if args.ctx == 'gpu':
	calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1
	or name.find('fc') != -1)
	excluded_sym_names += ['ch_concat_3a_chconcat',
	'ch_concat_3b_chconcat',
	'ch_concat_3c_chconcat',
	'ch_concat_4a_chconcat',
	'ch_concat_4b_chconcat',
	'ch_concat_4c_chconcat',
	'ch_concat_4d_chconcat',
	'ch_concat_4e_chconcat',
	'ch_concat_5a_chconcat',
	'ch_concat_5b_chconcat']
	else:
	calib_layer = lambda name: name.endswith('_output') and (name.find('conv') != -1)
	excluded_sym_names += ['flatten', 'fc1']
	if exclude_first_conv:
	excluded_sym_names += ['conv_1']
	else:
	raise ValueError('model %s is not supported in this script' % args.model)

	label_name = args.label_name
	logger.info('label_name = %s' % label_name)

	data_shape = tuple([int(i) for i in image_shape.split(',')])
	logger.info('Input data shape = %s' % str(data_shape))

	logger.info('rgb_mean = %s' % rgb_mean)
	rgb_mean = [float(i) for i in rgb_mean.split(',')]
	mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}

	if calib_mode == 'none':
	logger.info('Quantizing FP32 model %s' % args.model)
	qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
	ctx=ctx, excluded_sym_names=excluded_sym_names,
	calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
	logger=logger)
	sym_name = '%s-symbol.json' % (prefix + '-quantized')
	save_symbol(sym_name, qsym, logger)
	else:
	logger.info('Creating ImageRecordIter for reading calibration dataset')
	data = mx.io.ImageRecordIter(path_imgrec=args.calib_dataset,
	label_width=1,
	preprocess_threads=data_nthreads,
	batch_size=batch_size,
	data_shape=data_shape,
	label_name=label_name,
	rand_crop=False,
	rand_mirror=False,
	shuffle=args.shuffle_dataset,
	shuffle_chunk_seed=args.shuffle_chunk_seed,
	seed=args.shuffle_seed,
	**mean_args)

	cqsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
	ctx=ctx, excluded_sym_names=excluded_sym_names,
	calib_mode=calib_mode, calib_data=data,
	num_calib_examples=num_calib_batches * batch_size,
	calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
	logger=logger)
	if calib_mode == 'entropy':
	suffix = '-quantized-%dbatches-entropy' % num_calib_batches
	elif calib_mode == 'naive':
	suffix = '-quantized-%dbatches-naive' % num_calib_batches
	else:
	raise ValueError('unknow calibration mode %s received, only supports `none`, `naive`, and `entropy`'
	% calib_mode)
	sym_name = '%s-symbol.json' % (prefix + suffix)
	save_symbol(sym_name, cqsym, logger)

	param_name = '%s-%04d.params' % (prefix + '-quantized', epoch)
	save_params(param_name, qarg_params, aux_params, logger)