blob: 7657026989ed8092f771351e4abc071dc6d3f63c [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
""" CIFAR10 dataset is at https://www.cs.toronto.edu/~kriz/cifar.html.
It includes 5 binary dataset, each contains 10000 images. 1 row (1 image)
includes 1 label & 3072 pixels. 3072 pixels are 3 channels of a 32x32 image
"""
from __future__ import division
from __future__ import print_function
from builtins import zip
from builtins import str
from builtins import range
try:
import pickle
except ImportError:
import cPickle as pickle
import numpy as np
import os
import argparse
from tqdm import trange
from singa import utils
from singa import optimizer
from singa import device
from singa import tensor
from caffe import caffe_net
import cnn
import vgg
import resnet
def load_dataset(filepath):
print('Loading data file %s' % filepath)
with open(filepath, 'rb') as fd:
try:
cifar10 = pickle.load(fd, encoding='latin1')
except TypeError:
cifar10 = pickle.load(fd)
image = cifar10['data'].astype(dtype=np.uint8)
image = image.reshape((-1, 3, 32, 32))
label = np.asarray(cifar10['labels'], dtype=np.uint8)
label = label.reshape(label.size, 1)
return image, label
def load_train_data(dir_path, num_batches=5):
labels = []
batchsize = 10000
images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
for did in range(1, num_batches + 1):
fname_train_data = dir_path + "/data_batch_{}".format(did)
image, label = load_dataset(fname_train_data)
images[(did - 1) * batchsize:did * batchsize] = image
labels.extend(label)
images = np.array(images, dtype=np.float32)
labels = np.array(labels, dtype=np.int32)
return images, labels
def load_test_data(dir_path):
images, labels = load_dataset(dir_path + "/test_batch")
return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
def normalize_for_vgg(train_x, test_x):
mean = train_x.mean()
std = train_x.std()
train_x -= mean
test_x -= mean
train_x /= std
test_x /= std
return train_x, test_x
def normalize_for_alexnet(train_x, test_x):
mean = np.average(train_x, axis=0)
train_x -= mean
test_x -= mean
return train_x, test_x
def vgg_lr(epoch):
return 0.1 / float(1 << (epoch // 25))
def alexnet_lr(epoch):
if epoch < 120:
return 0.001
elif epoch < 130:
return 0.0001
else:
return 0.00001
def resnet_lr(epoch):
if epoch < 81:
return 0.1
elif epoch < 122:
return 0.01
else:
return 0.001
def caffe_lr(epoch):
if epoch < 8:
return 0.001
else:
return 0.0001
def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
use_cpu=False):
print('Start intialization............')
if use_cpu:
print('Using CPU')
dev = device.get_default_device()
else:
print('Using GPU')
dev = device.create_cuda_gpu()
net.to_device(dev)
opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
for (p, specs) in zip(net.param_names(), net.param_specs()):
opt.register(p, specs)
tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
ty = tensor.Tensor((batch_size,), dev, tensor.int32)
train_x, train_y, test_x, test_y = data
num_train_batch = train_x.shape[0] // batch_size
num_test_batch = test_x.shape[0] // batch_size
idx = np.arange(train_x.shape[0], dtype=np.int32)
for epoch in range(max_epoch):
np.random.shuffle(idx)
loss, acc = 0.0, 0.0
with trange(num_train_batch) as t:
t.set_description('Epoch={}'.format(epoch))
for b in t:
x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
tx.copy_from_numpy(x)
ty.copy_from_numpy(y)
grads, (l, a) = net.train(tx, ty)
loss += l
acc += a
for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
t.set_postfix(loss=l, accuracy=a)
info = 'Training loss = %f, training accuracy = %f, lr = %f' \
% ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch))
print(info)
loss, acc = 0.0, 0.0
for b in range(num_test_batch):
x = test_x[b * batch_size: (b + 1) * batch_size]
y = test_y[b * batch_size: (b + 1) * batch_size]
tx.copy_from_numpy(x)
ty.copy_from_numpy(y)
l, a = net.evaluate(tx, ty)
loss += l
acc += a
print('Test loss = %f, test accuracy = %f' %
((loss / num_test_batch), (acc / num_test_batch)))
net.save('model', 20) # save model params into checkpoint file
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Train dcnn for cifar10')
parser.add_argument('model', choices=['vgg', 'cnn', 'resnet', 'caffe'],
default='vgg')
parser.add_argument('data', default='cifar-10-batches-py')
parser.add_argument('--use_cpu', action='store_true')
args = parser.parse_args()
assert os.path.exists(args.data), \
'Pls download the cifar10 dataset via "download_data.py py"'
print('Loading data ..................')
train_x, train_y = load_train_data(args.data)
test_x, test_y = load_test_data(args.data)
if args.model == 'caffe':
train_x, test_x = normalize_for_alexnet(train_x, test_x)
net = caffe_net.create_net(args.use_cpu)
# for cifar10_full_train_test.prototxt
train((train_x, train_y, test_x, test_y), net, 160, alexnet_lr, 0.004,
use_cpu=args.use_cpu)
# for cifar10_quick_train_test.prototxt
# train((train_x, train_y, test_x, test_y), net, 18, caffe_lr, 0.004,
# use_cpu=args.use_cpu)
elif args.model == 'cnn':
train_x, test_x = normalize_for_alexnet(train_x, test_x)
net = cnn.create_net(args.use_cpu)
train((train_x, train_y, test_x, test_y), net, 2, alexnet_lr, 0.004,
use_cpu=args.use_cpu)
elif args.model == 'vgg':
train_x, test_x = normalize_for_vgg(train_x, test_x)
net = vgg.create_net(args.use_cpu)
train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005,
use_cpu=args.use_cpu)
else:
train_x, test_x = normalize_for_alexnet(train_x, test_x)
net = resnet.create_net(args.use_cpu)
train((train_x, train_y, test_x, test_y), net, 200, resnet_lr, 1e-4,
use_cpu=args.use_cpu)