blob: dd4fc7e8631dfe490853f5d5146ec4475feab9a9 [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import boto3
import mxnet as mx
import os
import numpy as np
import logging
from mxnet import gluon
from mxnet.gluon import nn
import re
from mxnet.test_utils import assert_almost_equal
def cmp(x, y): # Python 3
return (x > y) - (x < y)
# Set fixed random seeds.
mx.random.seed(7)
np.random.seed(7)
logging.basicConfig(level=logging.INFO)
# get the current mxnet version we are running on
mxnet_version = mx.__version__
model_bucket_name = 'mxnet-ci-prod-backwards-compatibility-models'
data_folder = 'mxnet-model-backwards-compatibility-data'
backslash = '/'
s3 = boto3.resource('s3')
ctx = mx.cpu(0)
atol_default = 1e-5
rtol_default = 1e-5
def get_model_path(model_name):
return os.path.join(os.getcwd(), 'models', str(mxnet_version), model_name)
def save_inference_results(inference_results, model_name):
assert (isinstance(inference_results, mx.ndarray.ndarray.NDArray))
save_path = os.path.join(get_model_path(model_name), ''.join([model_name, '-inference']))
mx.npx.savez(save_path, **{'inference': inference_results})
def load_inference_results(model_name):
inf_dict = mx.npx.load(model_name+'-inference')
return inf_dict['inference']
def save_data_and_labels(test_data, test_labels, model_name):
assert (isinstance(test_data, mx.ndarray.ndarray.NDArray))
assert (isinstance(test_labels, mx.ndarray.ndarray.NDArray))
save_path = os.path.join(get_model_path(model_name), ''.join([model_name, '-data']))
mx.npx.savez(save_path, **{'data': test_data, 'labels': test_labels})
def clean_model_files(files, model_name):
files.append(model_name + '-inference')
files.append(model_name + '-data')
for file in files:
if os.path.isfile(file):
os.remove(file)
def download_model_files_from_s3(model_name, folder_name):
model_files = list()
bucket = s3.Bucket(model_bucket_name)
prefix = folder_name + backslash + model_name
model_files_meta = list(bucket.objects.filter(Prefix = prefix))
if len(model_files_meta) == 0:
logging.error('No trained models found under path : %s', prefix)
return model_files
for obj in model_files_meta:
file_name = obj.key.split('/')[2]
model_files.append(file_name)
# Download this file
bucket.download_file(obj.key, file_name)
return model_files
def get_top_level_folders_in_bucket(s3client, bucket_name):
# This function returns the top level folders in the S3Bucket.
# These folders help us to navigate to the trained model files stored for different MXNet versions.
bucket = s3client.Bucket(bucket_name)
result = bucket.meta.client.list_objects(Bucket=bucket.name, Delimiter=backslash)
folder_list = list()
if 'CommonPrefixes' not in result:
logging.error('No trained models found in S3 bucket : {} for this file. '
'Please train the models and run inference again'.format(bucket_name))
raise Exception("No trained models found in S3 bucket : {} for this file. "
"Please train the models and run inference again".format(bucket_name))
return folder_list
for obj in result['CommonPrefixes']:
folder_name = obj['Prefix'].strip(backslash)
# We only compare models from the same major versions. i.e. 1.x.x compared with latest 1.y.y etc
if str(folder_name).split('.')[0] != str(mxnet_version).split('.')[0]:
continue
# The top level folders contain MXNet Version # for trained models. Skipping the data folder here
if folder_name == data_folder:
continue
folder_list.append(obj['Prefix'].strip(backslash))
if len(folder_list) == 0:
logging.error('No trained models found in S3 bucket : {} for this file. '
'Please train the models and run inference again'.format(bucket_name))
raise Exception("No trained models found in S3 bucket : {} for this file. "
"Please train the models and run inference again".format(bucket_name))
return folder_list
def create_model_folder(model_name):
path = get_model_path(model_name)
if not os.path.exists(path):
os.makedirs(path)
@mx.util.use_np
class Net(gluon.Block):
def __init__(self, **kwargs):
super(Net, self).__init__(**kwargs)
self.conv1 = nn.Conv2D(20, kernel_size=(5, 5))
self.pool1 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
self.conv2 = nn.Conv2D(50, kernel_size=(5, 5))
self.pool2 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
self.fc1 = nn.Dense(500)
self.fc2 = nn.Dense(2)
def forward(self, x):
x = self.pool1(mx.np.tanh(self.conv1(x)))
x = self.pool2(mx.np.tanh(self.conv2(x)))
# 0 means copy over size from corresponding dimension.
# -1 means infer size from the rest of dimensions.
x = x.reshape(-1)
x = mx.np.tanh(self.fc1(x))
x = mx.np.tanh(self.fc2(x))
return x
@mx.util.use_np
class HybridNet(gluon.HybridBlock):
def __init__(self, **kwargs):
super(HybridNet, self).__init__(**kwargs)
self.conv1 = nn.Conv2D(20, kernel_size=(5, 5))
self.pool1 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
self.conv2 = nn.Conv2D(50, kernel_size=(5, 5))
self.pool2 = nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
self.fc1 = nn.Dense(500)
self.fc2 = nn.Dense(2)
def forward(self, x):
x = self.pool1(mx.np.tanh(self.conv1(x)))
x = self.pool2(mx.np.tanh(self.conv2(x)))
# 0 means copy over size from corresponding dimension.
# -1 means infer size from the rest of dimensions.
x = x.reshape(-1)
x = mx.np.tanh(self.fc1(x))
x = mx.np.tanh(self.fc2(x))
return x
class SimpleLSTMModel(gluon.Block):
def __init__(self, **kwargs):
super(SimpleLSTMModel, self).__init__(**kwargs)
self.model = mx.gluon.nn.Sequential()
self.model.add(mx.gluon.nn.Embedding(30, 10))
self.model.add(mx.gluon.rnn.LSTM(20))
self.model.add(mx.gluon.nn.Dense(100))
self.model.add(mx.gluon.nn.Dropout(0.5))
self.model.add(mx.gluon.nn.Dense(2, flatten=True, activation='tanh'))
def forward(self, x):
return self.model(x)
def compare_versions(version1, version2):
'''
https://stackoverflow.com/questions/1714027/version-number-comparison-in-python
'''
def normalize(v):
return [int(x) for x in re.sub(r'(\.0+)*$','', v).split(".")]
return cmp(normalize(version1), normalize(version2))