blob: d045718a38ff30dc33a92f63c904ac4210bffc26 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import logging
import os
import re
import sys
from inspect import currentframe, getframeinfo
import mxnet as mx
from mxnet import gluon
from mxnet.contrib.quantization import quantize_net
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision import transforms
from mxnet.gluon.model_zoo.vision import get_model
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, '..', '..')))
from tools.rec2idx import IndexCreator
def download_calib_dataset(dataset_url, calib_dataset, logger=None):
if logger is not None:
logger.info(f'Downloading calibration dataset from {dataset_url} to {calib_dataset}')
mx.test_utils.download(dataset_url, calib_dataset)
def get_from_gluon(model_name, classes=1000, logger=None):
dir_path = os.path.dirname(os.path.realpath(__file__))
model_path = os.path.join(dir_path, 'model')
if logger is not None:
logger.info(f'Converting model from Gluon-CV ModelZoo {model_name}... into path {model_path}')
net = get_model(name=model_name, classes=classes, pretrained=True)
prefix = os.path.join(model_path, model_name)
return net, prefix
def regex_find_excluded_symbols(patterns_dict, model_name):
for key, value in patterns_dict.items():
if re.search(key, model_name) is not None:
return value
return None
def get_exclude_symbols(model_name, exclude_first_conv):
"""Grouped supported models at the time of commit:
- alexnet
- densenet121, densenet161
- densenet169, densenet201
- inceptionv3
- mobilenet0.25, mobilenet0.5, mobilenet0.75, mobilenet1.0,
- mobilenetv2_0.25, mobilenetv2_0.5, mobilenetv2_0.75, mobilenetv2_1.0
- resnet101_v1, resnet152_v1, resnet18_v1, resnet34_v1, resnet50_v1
- resnet101_v2, resnet152_v2, resnet18_v2, resnet34_v2, resnet50_v2
- squeezenet1.0, squeezenet1.1
- vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn
"""
exclude_symbol_regex = {
'mobilenet[^v]': ['mobilenet_hybridsequential0_flatten0_flatten0', 'mobilenet_hybridsequential0_globalavgpool2d0_fwd'],
'mobilenetv2': ['mobilenetv2_hybridsequential1_flatten0_flatten0'],
# resnetv2_hybridsequential0_hybridsequential0_bottleneckv20_batchnorm0_fwd is excluded for the sake of accuracy
'resnet.*v2': ['resnetv2_hybridsequential0_flatten0_flatten0', 'resnetv2_hybridsequential0_hybridsequential0_bottleneckv20_batchnorm0_fwd'],
'squeezenet1': ['squeezenet_hybridsequential1_flatten0_flatten0'],
}
excluded_sym_names = regex_find_excluded_symbols(exclude_symbol_regex, model_name)
if excluded_sym_names is None:
excluded_sym_names = []
if exclude_first_conv:
first_conv_regex = {
'alexnet': ['alexnet_hybridsequential0_conv2d0_fwd'],
'densenet': ['densenet_hybridsequential0_conv2d0_fwd'],
'inceptionv3': ['inception3_hybridsequential0_hybridsequential0_conv2d0_fwd'],
'mobilenet[^v]': ['mobilenet_hybridsequential0_conv2d0_fwd'],
'mobilenetv2': ['mobilenetv2_hybridsequential0_conv2d0_fwd'],
'resnet.*v1': ['resnetv1_hybridsequential0_conv2d0_fwd'],
'resnet.*v2': ['resnetv2_hybridsequential0_conv2d0_fwd'],
'squeezenet1': ['squeezenet_hybridsequential0_conv2d0_fwd'],
'vgg': ['vgg_hybridsequential0_conv2d0_fwd'],
}
excluded_first_conv_sym_names = regex_find_excluded_symbols(first_conv_regex, model_name)
if excluded_first_conv_sym_names is None:
raise ValueError(f'Currently, model {model_name} is not supported in this script')
excluded_sym_names += excluded_first_conv_sym_names
return excluded_sym_names
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate a calibrated quantized model from a FP32 model with oneDNN support')
parser.add_argument('--model', type=str, default='resnet50_v1',
help='model to be quantized. If no-pretrained is set then'
'model must be provided to `model` directory in the same path'
'as this python script')
parser.add_argument('--epoch', type=int, default=0,
help='number of epochs, default is 0')
parser.add_argument('--no-pretrained', action='store_true', default=False,
help='If enabled, will not download pretrained model from MXNet or Gluon-CV modelzoo.')
parser.add_argument('--batch-size', type=int, default=32)
parser.add_argument('--calib-dataset', type=str, default='data/val_256_q90.rec',
help='path of the calibration dataset')
parser.add_argument('--image-shape', type=str, default='3,224,224',
help='number of channels, height and width of input image separated by comma')
parser.add_argument('--data-nthreads', type=int, default=0,
help='number of threads for data loading')
parser.add_argument('--num-calib-batches', type=int, default=10,
help='number of batches for calibration')
parser.add_argument('--exclude-first-conv', action='store_true', default=False,
help='excluding quantizing the first conv layer since the'
' input data may have negative value which doesn\'t support at moment')
parser.add_argument('--shuffle-dataset', action='store_true',
help='shuffle the calibration dataset')
parser.add_argument('--calib-mode', type=str, default='entropy',
help='calibration mode used for generating calibration table for the quantized symbol; supports'
' 1. none: no calibration will be used. The thresholds for quantization will be calculated'
' on the fly. This will result in inference speed slowdown and loss of accuracy'
' in general.'
' 2. naive: simply take min and max values of layer outputs as thresholds for'
' quantization. In general, the inference accuracy worsens with more examples used in'
' calibration. It is recommended to use `entropy` mode as it produces more accurate'
' inference results.'
' 3. entropy: calculate KL divergence of the fp32 output and quantized output for optimal'
' thresholds. This mode is expected to produce the best inference accuracy of all three'
' kinds of calibration modes if the calibration dataset is representative enough of the'
' inference dataset.')
parser.add_argument('--quantized-dtype', type=str, default='auto',
choices=['auto', 'int8', 'uint8'],
help='quantization destination data type for input data')
parser.add_argument('--quiet', action='store_true', default=False,
help='suppress most of log')
args = parser.parse_args()
ctx = mx.cpu(0)
logger = None
if not args.quiet:
logging.basicConfig()
logger = logging.getLogger('logger')
logger.setLevel(logging.INFO)
if logger:
logger.info(args)
logger.info(f'shuffle_dataset={args.shuffle_dataset}')
logger.info(f'calibration mode set to {args.calib_mode}')
calib_mode = args.calib_mode
# download calibration dataset
if calib_mode != 'none':
idx_file_name = os.path.splitext(args.calib_dataset)[0] + '.idx'
if not os.path.isfile(idx_file_name):
download_calib_dataset('http://data.mxnet.io/data/val_256_q90.rec', args.calib_dataset)
creator = IndexCreator(args.calib_dataset, idx_file_name)
creator.create_index()
creator.close()
# get image shape
image_shape = args.image_shape
data_shape = [(1,) + tuple(int(i) for i in image_shape.split(','))]
# check if directory for output model exists
dir_path = os.path.dirname(os.path.realpath(__file__))
dir_path = os.path.join(dir_path, 'model')
if not os.path.exists(dir_path):
os.mkdir(dir_path) # without try catch block as we expect to finish script if it fail
# download model
if not args.no_pretrained:
if logger:
logger.info('Get pre-trained model from Gluon-CV modelzoo.')
logger.info('If you want to use custom model, please set --no-pretrained.')
net, prefix = get_from_gluon(model_name=args.model, classes=1000, logger=logger)
rgb_mean = '0.485,0.456,0.406'
rgb_std = '0.229,0.224,0.225'
epoch = 0
net.hybridize()
net(mx.np.zeros(data_shape[0])) # dummy forward pass to build graph
net.export(prefix) # save model
net.hybridize(active=False) # disable hybridization - it will be handled in quantization API
else:
prefix = os.path.join(dir_path, args.model)
epoch = args.epoch
net = gluon.SymbolBlock.imports("{}-symbol.json".format(prefix), ['data'], "{}-0000.params".format(prefix))
# get batch size
batch_size = args.batch_size
if logger:
logger.info(f'batch size = {batch_size} for calibration')
# get number of batches for calibration
num_calib_batches = args.num_calib_batches
if logger:
if calib_mode == 'none':
logger.info('skip calibration step as calib_mode is none')
else:
logger.info(f'number of batches = {num_calib_batches} for calibration')
# get number of threads for decoding the dataset
data_nthreads = args.data_nthreads
exclude_first_conv = args.exclude_first_conv
if args.quantized_dtype == "uint8":
if logger:
logger.info('quantized dtype is set to uint8, will exclude first conv.')
exclude_first_conv = True
excluded_sym_names = []
if not args.no_pretrained:
excluded_sym_names += get_exclude_symbols(args.model, args.exclude_first_conv)
else:
if logger:
frameinfo = getframeinfo(currentframe())
logger.info(F'Please set proper RGB configs inside this script below {frameinfo.filename}:{frameinfo.lineno} for model {args.model}!')
# add rgb mean/std of your model.
rgb_mean = '0,0,0'
rgb_std = '0,0,0'
# add layer names you donnot want to quantize.
if logger:
frameinfo = getframeinfo(currentframe())
logger.info(F'Please set proper excluded_sym_names inside this script below {frameinfo.filename}:{frameinfo.lineno} for model {args.model} if required!')
excluded_sym_names += []
if exclude_first_conv:
excluded_sym_names += []
if logger:
logger.info(f'These layers have been excluded {excluded_sym_names}')
logger.info(f'Input data shape = {str(data_shape)}')
logger.info(f'rgb_mean = {rgb_mean}')
logger.info(f'rgb_std = {rgb_std}')
rgb_mean = [float(i) for i in rgb_mean.split(',')]
mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}
rgb_std = [float(i) for i in rgb_std.split(',')]
std_args = {'std_r': rgb_std[0], 'std_g': rgb_std[1], 'std_b': rgb_std[2]}
if calib_mode == 'none':
if logger:
logger.info(f'Quantizing FP32 model {args.model}')
qsym = quantize_net(net, ctx=ctx, exclude_layers_match=excluded_sym_names, data_shapes=data_shape,
calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
logger=logger)
suffix = '-quantized'
else:
if logger:
logger.info('Creating DataLoader for reading calibration dataset')
dataset = mx.gluon.data.vision.ImageRecordDataset(args.calib_dataset)
transformer = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=rgb_mean, std=rgb_std)])
data_loader = DataLoader(dataset.transform_first(transformer), batch_size, shuffle=args.shuffle_dataset, num_workers=data_nthreads)
qsym = quantize_net(net, ctx=ctx, exclude_layers_match=excluded_sym_names,
calib_mode=calib_mode, calib_data=data_loader, num_calib_batches=num_calib_batches,
quantized_dtype=args.quantized_dtype, logger=logger)
if calib_mode == 'entropy':
suffix = f'-quantized-{num_calib_batches}batches-entropy'
elif calib_mode == 'naive':
suffix = f'-quantized-{num_calib_batches}batches-naive'
else:
raise ValueError(f'unknown calibration mode {calib_mode} received, only supports `none`, `naive`, and `entropy`')
save_path = prefix + suffix
model_path, params_path = qsym.export(save_path, epoch)
if logger is not None:
logger.info(F'Saved quantized model into:\n{model_path}\n{params_path}')