blob: 98e92335f222a5c67e0bc152eff45f143d752edc [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import print_function
import mxnet as mx
import numpy as np
import copy
from mxnet import autograd
from mxnet.gluon.model_zoo.vision import get_model
from mxnet.test_utils import assert_almost_equal
import sys
import os
import unittest
import pytest
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.insert(0, os.path.join(curr_path, '../unittest'))
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
VAL_DATA='data/val-5k-256.rec'
def download_data():
return mx.test_utils.download(
'https://repo.mxnet.io/gluon/dataset/test/val-5k-256-9e70d85e0.rec', VAL_DATA)
@mx.util.use_np
@pytest.mark.serial
# TODO(vcherepanov): mobilenet0.25 fails this test
@pytest.mark.parametrize('model_name', ['resnet50_v1', 'vgg19_bn', 'alexnet', 'densenet201', 'squeezenet1.0'])
def test_inference(model_name):
batch_size = 10
download_data()
eprint(f'testing inference on {model_name}')
data_shape = (3, 224, 224) if 'inception' not in model_name else (3, 299, 299)
dataIter = mx.io.ImageRecordIter(
path_imgrec = VAL_DATA,
label_width = 1,
preprocess_threads = 1,
batch_size = batch_size,
data_shape = data_shape,
label_name = 'softmax_label',
rand_crop = False,
rand_mirror = False)
data_batch = dataIter.next()
data = data_batch.data[0]
label = data_batch.label[0]
gpu_data = data.as_in_context(mx.gpu())
gpu_label = label.as_in_context(mx.gpu())
# This is to create a model and run the model once to initialize
# all parameters.
cpu_model = get_model(model_name)
cpu_model.initialize(ctx=mx.cpu())
cpu_model(mx.np.array(data, ctx=mx.cpu()))
gpu_model = get_model(model_name)
gpu_model.initialize(ctx=mx.gpu())
gpu_model(mx.np.array(data, ctx=mx.gpu()))
# Force the two models have the same parameters.
cpu_params = cpu_model.collect_params()
gpu_params = gpu_model.collect_params()
for k in cpu_params.keys():
cpu_param = cpu_params.get(k)
gpu_param = gpu_params.get(k)
gpu_param.set_data(cpu_param.data().as_in_context(mx.gpu()))
cpu_data = mx.np.array(data, ctx=mx.cpu())
for _ in range(5):
# Run inference.
with autograd.record(train_mode=False):
cpu_out = cpu_model(cpu_data)
gpu_out = gpu_model(gpu_data)
max_val = np.max(np.abs(cpu_out.asnumpy()))
gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val))
assert_almost_equal(cpu_out / max_val, gpu_out / gpu_max_val)
def get_nn_model(name):
if "densenet" in name:
return get_model(name, dropout=0)
else:
return get_model(name)
# Seed 1521019752 produced a failure on the Py2 oneDNN-GPU CI runner
# on 2/16/2018 that was not reproducible. Problem could be timing related or
# based on non-deterministic algo selection.
@mx.util.use_np
@pytest.mark.serial
def test_training():
# We use network models without dropout for testing.
# TODO(zhengda) mobilenet can't pass this test even without oneDNN.
all_models = ['resnet18_v1', 'densenet121']
batch_size = 10
label = mx.np.random.uniform(low=0, high=10, size=(batch_size)).astype('int32')
download_data()
dataIter = mx.io.ImageRecordIter(
path_imgrec = VAL_DATA,
label_width = 1,
preprocess_threads = 1,
batch_size = batch_size,
data_shape = (3, 224, 224),
label_name = 'softmax_label',
rand_crop = False,
rand_mirror = False)
data_batch = dataIter.next()
data = data_batch.data[0]
label = data_batch.label[0]
gpu_data = data.as_in_context(mx.gpu())
gpu_label = label.as_in_context(mx.gpu())
softmax_cross_entropy = mx.gluon.loss.SoftmaxCrossEntropyLoss()
for model_name in all_models:
eprint(f'testing {model_name}')
#data = mx.nd.random.uniform(shape=(100, 3, 224, 224))
# This is to create a model and run the model once to initialize
# all parameters.
cpu_model = get_nn_model(model_name)
cpu_model.initialize(ctx=mx.cpu())
cpu_model(mx.np.array(data, ctx=mx.cpu()))
gpu_model = get_nn_model(model_name)
gpu_model.initialize(ctx=mx.gpu())
gpu_model(mx.np.array(data, ctx=mx.gpu()))
# Force the two models have the same parameters.
cpu_params = cpu_model.collect_params()
gpu_params = gpu_model.collect_params()
for k in cpu_params.keys():
cpu_param = cpu_params.get(k)
gpu_param = gpu_params.get(k)
gpu_param.set_data(cpu_param.data().as_in_context(mx.gpu()))
cpu_trainer = mx.gluon.Trainer(cpu_params, 'sgd', {'learning_rate': 0.1})
gpu_trainer = mx.gluon.Trainer(gpu_params, 'sgd', {'learning_rate': 0.1})
# Run forward and backward once.
with autograd.record():
cpu_out = cpu_model(mx.np.array(data, ctx=mx.cpu()))
gpu_out = gpu_model(gpu_data)
cpu_loss = softmax_cross_entropy(cpu_out, label)
gpu_loss = softmax_cross_entropy(gpu_out, gpu_label)
max_val = np.max(np.abs(cpu_out.asnumpy()))
gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val))
assert_almost_equal(cpu_out / max_val, gpu_out / max_val, rtol=1e-3, atol=1e-3)
cpu_loss.backward()
gpu_loss.backward()
cpu_trainer.step(batch_size)
gpu_trainer.step(batch_size)
# Compare the parameters of the two models.
start_test = False
for k in cpu_params.keys():
print(k)
if "stage3" in k:
start_test = True
if (start_test):
k = k.replace(cpu_params.prefix, '')
cpu_param = cpu_params.get(k)
gpu_param = gpu_params.get(k)
assert_almost_equal(cpu_param.data(), gpu_param.data(), rtol=1e-3, atol=1e-3)