| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Disable noisy logging from INC: |
| import logging |
| logging.disable(logging.INFO) |
| |
| import time |
| import mxnet as mx |
| from mxnet.gluon.model_zoo.vision import resnet50_v2 |
| from mxnet.gluon.data.vision import transforms |
| from mxnet.contrib.quantization import quantize_net |
| import custom_strategy |
| |
| |
| def save_model(net, data_loader, description, time_spend): |
| save_model.count += 1 |
| print( "{:21s} tuned in {:8.2f}s".format(description, time_spend)) |
| net.export("__resnet50_v2_{:02}_".format(save_model.count) + description.replace(' ', '_')) |
| |
| save_model.count = 0 |
| |
| # Preparing input data |
| start = time.time() |
| rgb_mean = (0.485, 0.456, 0.406) |
| rgb_std = (0.229, 0.224, 0.225) |
| batch_size = 64 |
| num_calib_batches = 9 |
| # Set proper path to ImageNet data set below |
| dataset = mx.gluon.data.vision.ImageRecordDataset('../imagenet/rec/val.rec') |
| # Tuning with INC on the whole data set takes too much time. Therefore, we take only a part of the whole data set |
| # as representative part of it: |
| dataset = dataset.take(num_calib_batches * batch_size) |
| transformer = transforms.Compose([transforms.Resize(256), |
| transforms.CenterCrop(224), |
| transforms.ToTensor(), |
| transforms.Normalize(mean=rgb_mean, std=rgb_std)]) |
| # Note: as the input data is used many times during tuning it is better to have it prepared earlier. |
| # Therefore, lazy parameter in transform_first is set to False. |
| val_data = mx.gluon.data.DataLoader( |
| dataset.transform_first(transformer, lazy=False), batch_size, shuffle=False) |
| val_data.batch_size = batch_size |
| time_consumed = time.time() - start |
| print("Input data prepared in {:.2f}s".format(time_consumed)) |
| |
| net = resnet50_v2(pretrained=True) |
| |
| start = time.time() |
| net.hybridize(static_alloc=True, static_shape=True) |
| time_consumed = time.time() - start |
| # Run forward path once to cache the graph - required to save the model |
| net(next(iter(val_data))[0]) |
| save_model(net, val_data, "fp32", time_consumed) |
| |
| start = time.time() |
| net.optimize_for(next(iter(val_data))[0], backend='ONEDNN', static_alloc=True, static_shape=True) |
| time_consumed = time.time() - start |
| save_model(net, val_data, "fp32 fused", time_consumed) |
| |
| start = time.time() |
| qnet = quantize_net(net, calib_mode='naive', calib_data=val_data) |
| qnet.hybridize(static_alloc=True, static_shape=True) |
| time_consumed = time.time() - start |
| save_model(qnet, val_data, 'int8 full naive', time_consumed) |
| |
| start = time.time() |
| qnet = quantize_net(net, calib_mode='entropy', calib_data=val_data) |
| qnet.hybridize(static_alloc=True, static_shape=True) |
| time_consumed = time.time() - start |
| save_model(qnet, val_data, 'int8 full entropy', time_consumed) |
| |
| start = time.time() |
| qnet = quantize_net(net, calib_mode='naive', quantize_mode='smart', calib_data=val_data) |
| qnet.hybridize(static_alloc=True, static_shape=True) |
| time_consumed = time.time() - start |
| save_model(qnet, val_data, 'int8 smart naive', time_consumed) |
| |
| start = time.time() |
| qnet = quantize_net(net, calib_mode='entropy', quantize_mode='smart', calib_data=val_data) |
| qnet.hybridize(static_alloc=True, static_shape=True) |
| time_consumed = time.time() - start |
| save_model(qnet, val_data, 'int8 smart entropy', time_consumed) |
| |
| def eval_func(model): |
| metric = mx.gluon.metric.Accuracy() |
| for x, label in val_data: |
| output = model(x) |
| metric.update(label, output) |
| accuracy = metric.get()[1] |
| return accuracy |
| |
| from neural_compressor.experimental import Quantization |
| quantizer = Quantization("resnet50v2_mse.yaml") |
| quantizer.model = net |
| quantizer.calib_dataloader = val_data |
| quantizer.eval_func = eval_func |
| for strategy in ['basic', 'mse', 'mycustom', 'bayesian']: |
| quantizer.cfg.tuning.strategy.name = strategy |
| start = time.time() |
| qnet_inc = quantizer.fit().model |
| time_consumed = time.time() - start |
| save_model(qnet_inc, val_data, "INC " + strategy, time_consumed) |