example/ssd/quantization.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 from __future__ import print_function
 import os
 import sys
 import importlib
 import mxnet as mx
 from dataset.iterator import DetRecordIter
 from config.config import cfg
 from evaluate.eval_metric import MApMetric, VOC07MApMetric
 import argparse
 import logging
 import time
 from symbol.symbol_factory import get_symbol
 from symbol import symbol_builder
 from mxnet.base import SymbolHandle, check_call, _LIB, mx_uint, c_str_array
 import ctypes
 from mxnet.contrib.quantization import *

 def save_symbol(fname, sym, logger=None):
     if logger is not None:
         logger.info('Saving symbol into file at %s' % fname)
     sym.save(fname)


 def save_params(fname, arg_params, aux_params, logger=None):
     if logger is not None:
         logger.info('Saving params into file at %s' % fname)
     save_dict = {('arg:%s' % k): v.as_in_context(cpu()) for k, v in arg_params.items()}
     save_dict.update({('aux:%s' % k): v.as_in_context(cpu()) for k, v in aux_params.items()})
     mx.nd.save(fname, save_dict)


 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Generate a calibrated quantized SSD model from a FP32 model')
     parser.add_argument('--batch-size', type=int, default=32)
     parser.add_argument('--num-calib-batches', type=int, default=5,
                         help='number of batches for calibration')
     parser.add_argument('--exclude-first-conv', action='store_true', default=False,
                         help='excluding quantizing the first conv layer since the'
                              ' number of channels is usually not a multiple of 4 in that layer'
                              ' which does not satisfy the requirement of cuDNN')
     parser.add_argument('--shuffle-dataset', action='store_true', default=True,
                         help='shuffle the calibration dataset')
     parser.add_argument('--shuffle-chunk-seed', type=int, default=3982304,
                         help='shuffling chunk seed, see'
                              ' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
                              ' for more details')
     parser.add_argument('--shuffle-seed', type=int, default=48564309,
                         help='shuffling seed, see'
                              ' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
                              ' for more details')
     parser.add_argument('--calib-mode', type=str, default='naive',
                         help='calibration mode used for generating calibration table for the quantized symbol; supports'
                              ' 1. none: no calibration will be used. The thresholds for quantization will be calculated'
                              ' on the fly. This will result in inference speed slowdown and loss of accuracy'
                              ' in general.'
                              ' 2. naive: simply take min and max values of layer outputs as thresholds for'
                              ' quantization. In general, the inference accuracy worsens with more examples used in'
                              ' calibration. It is recommended to use `entropy` mode as it produces more accurate'
                              ' inference results.'
                              ' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal'
                              ' thresholds. This mode is expected to produce the best inference accuracy of all three'
                              ' kinds of quantized models if the calibration dataset is representative enough of the'
                              ' inference dataset.')
     parser.add_argument('--quantized-dtype', type=str, default='auto',
                         choices=['auto', 'int8', 'uint8'],
                         help='quantization destination data type for input data')

     args = parser.parse_args()
     ctx = mx.cpu(0)
     logging.basicConfig()
     logger = logging.getLogger('logger')
     logger.setLevel(logging.INFO)

     logger.info('shuffle_dataset=%s' % args.shuffle_dataset)

     calib_mode = args.calib_mode
     logger.info('calibration mode set to %s' % calib_mode)

     # load FP32 models
     prefix, epoch = "./model/ssd_vgg16_reduced_300", 0
     sym, arg_params, aux_params = mx.model.load_checkpoint("./model/ssd_vgg16_reduced_300", 0)

     if not 'label' in sym.list_arguments():
         label = mx.sym.Variable(name='label')
         sym = mx.sym.Group([sym, label])

     sym = sym.get_backend_symbol('MKLDNN')

     # get batch size
     batch_size = args.batch_size
     logger.info('batch size = %d for calibration' % batch_size)

     # get number of batches for calibration
     num_calib_batches = args.num_calib_batches
     if calib_mode != 'none':
         logger.info('number of batches = %d for calibration' % num_calib_batches)

     # get image shape
     image_shape = '3,300,300'

     def calib_layer(name): return not (name.endswith('_data') or
                                        name.endswith('_weight') or
                                        name.endswith('_bias') or
                                        name.endswith('_workspace'))
     # Quantization layer configs
     exclude_first_conv = args.exclude_first_conv
     excluded_sym_names = []
     rgb_mean = '123,117,104'
     for i in range(1,19):
         excluded_sym_names += ['flatten'+str(i)]
     excluded_sym_names += ['multibox_loc_pred',
                            'concat0',
                            'concat1']
     if exclude_first_conv:
         excluded_sym_names += ['conv1_1']

     label_name = 'label'
     logger.info('label_name = %s' % label_name)

     data_shape = tuple([int(i) for i in image_shape.split(',')])
     logger.info('Input data shape = %s' % str(data_shape))

     logger.info('rgb_mean = %s' % rgb_mean)
     rgb_mean = [float(i) for i in rgb_mean.split(',')]
     mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}

     if calib_mode == 'none':
         qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
                                                        ctx=ctx, excluded_sym_names=excluded_sym_names,
                                                        calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
                                                        logger=logger)
         sym_name = '%s-symbol.json' % ('./model/qssd_vgg16_reduced_300')
         param_name = '%s-%04d.params' % ('./model/qssd_vgg16_reduced_300', epoch)
         save_symbol(sym_name, qsym, logger)
     else:
         logger.info('Creating ImageRecordIter for reading calibration dataset')
         eval_iter = DetRecordIter(os.path.join(os.getcwd(), 'data', 'val.rec'),
                                   batch_size, data_shape, mean_pixels=(123, 117, 104),
                                   path_imglist="", **cfg.valid)

         qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
                                                         ctx=ctx, excluded_sym_names=excluded_sym_names,
                                                         calib_mode=calib_mode, calib_data=eval_iter,
                                                         num_calib_examples=num_calib_batches * batch_size,
                                                         calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
                                                         label_names=(label_name,), logger=logger)
         sym_name = '%s-symbol.json' % ('./model/cqssd_vgg16_reduced_300')
         param_name = '%s-%04d.params' % ('./model/cqssd_vgg16_reduced_300', epoch)
     qsym = qsym.get_backend_symbol('MKLDNN_POST_QUANTIZE')
     save_symbol(sym_name, qsym, logger)
     save_params(param_name, qarg_params, aux_params, logger)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	from __future__ import print_function
	import os
	import sys
	import importlib
	import mxnet as mx
	from dataset.iterator import DetRecordIter
	from config.config import cfg
	from evaluate.eval_metric import MApMetric, VOC07MApMetric
	import argparse
	import logging
	import time
	from symbol.symbol_factory import get_symbol
	from symbol import symbol_builder
	from mxnet.base import SymbolHandle, check_call, _LIB, mx_uint, c_str_array
	import ctypes
	from mxnet.contrib.quantization import *

	def save_symbol(fname, sym, logger=None):
	if logger is not None:
	logger.info('Saving symbol into file at %s' % fname)
	sym.save(fname)


	def save_params(fname, arg_params, aux_params, logger=None):
	if logger is not None:
	logger.info('Saving params into file at %s' % fname)
	save_dict = {('arg:%s' % k): v.as_in_context(cpu()) for k, v in arg_params.items()}
	save_dict.update({('aux:%s' % k): v.as_in_context(cpu()) for k, v in aux_params.items()})
	mx.nd.save(fname, save_dict)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Generate a calibrated quantized SSD model from a FP32 model')
	parser.add_argument('--batch-size', type=int, default=32)
	parser.add_argument('--num-calib-batches', type=int, default=5,
	help='number of batches for calibration')
	parser.add_argument('--exclude-first-conv', action='store_true', default=False,
	help='excluding quantizing the first conv layer since the'
	' number of channels is usually not a multiple of 4 in that layer'
	' which does not satisfy the requirement of cuDNN')
	parser.add_argument('--shuffle-dataset', action='store_true', default=True,
	help='shuffle the calibration dataset')
	parser.add_argument('--shuffle-chunk-seed', type=int, default=3982304,
	help='shuffling chunk seed, see'
	' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
	' for more details')
	parser.add_argument('--shuffle-seed', type=int, default=48564309,
	help='shuffling seed, see'
	' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
	' for more details')
	parser.add_argument('--calib-mode', type=str, default='naive',
	help='calibration mode used for generating calibration table for the quantized symbol; supports'
	' 1. none: no calibration will be used. The thresholds for quantization will be calculated'
	' on the fly. This will result in inference speed slowdown and loss of accuracy'
	' in general.'
	' 2. naive: simply take min and max values of layer outputs as thresholds for'
	' quantization. In general, the inference accuracy worsens with more examples used in'
	' calibration. It is recommended to use `entropy` mode as it produces more accurate'
	' inference results.'
	' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal'
	' thresholds. This mode is expected to produce the best inference accuracy of all three'
	' kinds of quantized models if the calibration dataset is representative enough of the'
	' inference dataset.')
	parser.add_argument('--quantized-dtype', type=str, default='auto',
	choices=['auto', 'int8', 'uint8'],
	help='quantization destination data type for input data')

	args = parser.parse_args()
	ctx = mx.cpu(0)
	logging.basicConfig()
	logger = logging.getLogger('logger')
	logger.setLevel(logging.INFO)

	logger.info('shuffle_dataset=%s' % args.shuffle_dataset)

	calib_mode = args.calib_mode
	logger.info('calibration mode set to %s' % calib_mode)

	# load FP32 models
	prefix, epoch = "./model/ssd_vgg16_reduced_300", 0
	sym, arg_params, aux_params = mx.model.load_checkpoint("./model/ssd_vgg16_reduced_300", 0)

	if not 'label' in sym.list_arguments():
	label = mx.sym.Variable(name='label')
	sym = mx.sym.Group([sym, label])

	sym = sym.get_backend_symbol('MKLDNN')

	# get batch size
	batch_size = args.batch_size
	logger.info('batch size = %d for calibration' % batch_size)

	# get number of batches for calibration
	num_calib_batches = args.num_calib_batches
	if calib_mode != 'none':
	logger.info('number of batches = %d for calibration' % num_calib_batches)

	# get image shape
	image_shape = '3,300,300'

	def calib_layer(name): return not (name.endswith('_data') or
	name.endswith('_weight') or
	name.endswith('_bias') or
	name.endswith('_workspace'))
	# Quantization layer configs
	exclude_first_conv = args.exclude_first_conv
	excluded_sym_names = []
	rgb_mean = '123,117,104'
	for i in range(1,19):
	excluded_sym_names += ['flatten'+str(i)]
	excluded_sym_names += ['multibox_loc_pred',
	'concat0',
	'concat1']
	if exclude_first_conv:
	excluded_sym_names += ['conv1_1']

	label_name = 'label'
	logger.info('label_name = %s' % label_name)

	data_shape = tuple([int(i) for i in image_shape.split(',')])
	logger.info('Input data shape = %s' % str(data_shape))

	logger.info('rgb_mean = %s' % rgb_mean)
	rgb_mean = [float(i) for i in rgb_mean.split(',')]
	mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}

	if calib_mode == 'none':
	qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
	ctx=ctx, excluded_sym_names=excluded_sym_names,
	calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
	logger=logger)
	sym_name = '%s-symbol.json' % ('./model/qssd_vgg16_reduced_300')
	param_name = '%s-%04d.params' % ('./model/qssd_vgg16_reduced_300', epoch)
	save_symbol(sym_name, qsym, logger)
	else:
	logger.info('Creating ImageRecordIter for reading calibration dataset')
	eval_iter = DetRecordIter(os.path.join(os.getcwd(), 'data', 'val.rec'),
	batch_size, data_shape, mean_pixels=(123, 117, 104),
	path_imglist="", **cfg.valid)

	qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
	ctx=ctx, excluded_sym_names=excluded_sym_names,
	calib_mode=calib_mode, calib_data=eval_iter,
	num_calib_examples=num_calib_batches * batch_size,
	calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
	label_names=(label_name,), logger=logger)
	sym_name = '%s-symbol.json' % ('./model/cqssd_vgg16_reduced_300')
	param_name = '%s-%04d.params' % ('./model/cqssd_vgg16_reduced_300', epoch)
	qsym = qsym.get_backend_symbol('MKLDNN_POST_QUANTIZE')
	save_symbol(sym_name, qsym, logger)
	save_params(param_name, qarg_params, aux_params, logger)