Merge pull request #836 from lzjpaul/cnn-autograd
Update cnn autograd
diff --git a/examples/cnn/autograd/mnist_cnn.py b/examples/cnn/autograd/mnist_cnn.py
index ff2e1dc..16752ce 100644
--- a/examples/cnn/autograd/mnist_cnn.py
+++ b/examples/cnn/autograd/mnist_cnn.py
@@ -126,7 +126,7 @@
return np.array(a, "int").sum()
-# Function to all reduce NUMPY Accuracy and Loss from Multiple Devices
+# Function to all reduce NUMPY accuracy and loss from multiple devices
def reduce_variable(variable, dist_opt, reducer):
reducer.copy_from_numpy(variable)
dist_opt.all_reduce(reducer.data)
@@ -171,7 +171,7 @@
topK=False,
corr=True):
- # Define the hypermeters good for the mnist_cnn
+ # Define the hypermeters for the mnist_cnn
max_epoch = 10
batch_size = 64
sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)
@@ -188,12 +188,13 @@
test_x = test_x / 255
if DIST:
- # For Distributed GPU Training
+ # For distributed GPU training
sgd = opt.DistOpt(sgd,
nccl_id=nccl_id,
local_rank=local_rank,
world_size=world_size)
dev = device.create_cuda_gpu_on(sgd.local_rank)
+
# Dataset partition for distributed training
train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
sgd.world_size)
@@ -201,11 +202,11 @@
sgd.world_size)
world_size = sgd.world_size
else:
- # For Single GPU
+ # For single GPU
dev = device.create_cuda_gpu()
world_size = 1
- # create model
+ # Create model
model = CNN()
tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
@@ -227,7 +228,7 @@
for p, g in autograd.backward(loss):
synchronize(p, sgd)
- # Training and Evaulation Loop
+ # Training and evaulation loop
for epoch in range(max_epoch):
start_time = time.time()
np.random.shuffle(idx)
@@ -235,7 +236,7 @@
if ((DIST == False) or (sgd.global_rank == 0)):
print('Starting Epoch %d:' % (epoch))
- # Training Phase
+ # Training phase
autograd.training = True
train_correct = np.zeros(shape=[1], dtype=np.float32)
test_correct = np.zeros(shape=[1], dtype=np.float32)
@@ -263,19 +264,19 @@
sgd(loss)
if DIST:
- # Reduce the Evaluation Accuracy and Loss from Multiple Devices
+ # Reduce the evaluation accuracy and loss from multiple devices
reducer = tensor.Tensor((1,), dev, tensor.float32)
train_correct = reduce_variable(train_correct, sgd, reducer)
train_loss = reduce_variable(train_loss, sgd, reducer)
- # Output the Training Loss and Accuracy
+ # Output the training loss and accuracy
if ((DIST == False) or (sgd.global_rank == 0)):
print('Training loss = %f, training accuracy = %f' %
(train_loss, train_correct /
(num_train_batch * batch_size * world_size)),
flush=True)
- # Evaluation Phase
+ # Evaluation phase
autograd.training = False
for b in range(num_test_batch):
x = test_x[b * batch_size:(b + 1) * batch_size]
@@ -286,10 +287,10 @@
test_correct += accuracy(tensor.to_numpy(out_test), y)
if DIST:
- # Reduce the Evaulation Accuracy from Multiple Devices
+ # Reduce the evaulation accuracy from multiple devices
test_correct = reduce_variable(test_correct, sgd, reducer)
- # Output the Evaluation Accuracy
+ # Output the evaluation accuracy
if ((DIST == False) or (sgd.global_rank == 0)):
print('Evaluation accuracy = %f, Elapsed Time = %fs' %
(test_correct / (num_test_batch * batch_size * world_size),
diff --git a/examples/cnn/autograd/mnist_multiprocess.py b/examples/cnn/autograd/mnist_multiprocess.py
index f5c2763..f51344f 100644
--- a/examples/cnn/autograd/mnist_multiprocess.py
+++ b/examples/cnn/autograd/mnist_multiprocess.py
@@ -26,7 +26,7 @@
# Generate a NCCL ID to be used for collective communication
nccl_id = singa.NcclIdHolder()
- # number of GPUs to be used
+ # Number of GPUs to be used
world_size = int(sys.argv[1])
process = []
diff --git a/examples/cnn/autograd/resnet_cifar10.py b/examples/cnn/autograd/resnet_cifar10.py
index 14005bc..3c6876f 100644
--- a/examples/cnn/autograd/resnet_cifar10.py
+++ b/examples/cnn/autograd/resnet_cifar10.py
@@ -129,7 +129,7 @@
return categorical
-# Function to all reduce NUMPY Accuracy and Loss from Multiple Devices
+# Function to all reduce NUMPY accuracy and loss from multiple devices
def reduce_variable(variable, dist_opt, reducer):
reducer.copy_from_numpy(variable)
dist_opt.all_reduce(reducer.data)
@@ -159,7 +159,7 @@
nccl_id=None,
partial_update=False):
- # Define the hypermeters good for the train_cifar10
+ # Define the hypermeters for the train_cifar10
sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)
max_epoch = 5
batch_size = 32
@@ -171,12 +171,13 @@
num_classes = 10
if DIST:
- # For Distributed GPU Training
+ # For distributed GPU training
sgd = opt.DistOpt(sgd,
nccl_id=nccl_id,
local_rank=local_rank,
world_size=world_size)
dev = device.create_cuda_gpu_on(sgd.local_rank)
+
# Dataset partition for distributed training
train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
sgd.world_size)
@@ -184,7 +185,7 @@
sgd.world_size)
world_size = sgd.world_size
else:
- # For Single GPU
+ # For single GPU
dev = device.create_cuda_gpu()
world_size = 1
@@ -219,7 +220,7 @@
if ((DIST == False) or (sgd.global_rank == 0)):
print('Starting Epoch %d:' % (epoch))
- #Training Phase
+ #Training phase
autograd.training = True
train_correct = np.zeros(shape=[1], dtype=np.float32)
test_correct = np.zeros(shape=[1], dtype=np.float32)
@@ -244,12 +245,12 @@
sgd.backward_and_partial_update(loss)
if DIST:
- # Reduce the Evaluation Accuracy and Loss from Multiple Devices
+ # Reduce the evaluation accuracy and loss from multiple devices
reducer = tensor.Tensor((1,), dev, tensor.float32)
train_correct = reduce_variable(train_correct, sgd, reducer)
train_loss = reduce_variable(train_loss, sgd, reducer)
- # Output the Training Loss and Accuracy
+ # Output the training loss and accuracy
if ((DIST == False) or (sgd.global_rank == 0)):
print('Training loss = %f, training accuracy = %f' %
(train_loss, train_correct /
@@ -257,11 +258,11 @@
flush=True)
if partial_update:
- # sychronize parameters before evaluation phase
+ # Sychronize parameters before evaluation phase
for p in param:
synchronize(p, sgd)
- #Evaulation Phase
+ #Evaulation phase
autograd.training = False
for b in range(num_test_batch):
x = test_x[b * batch_size:(b + 1) * batch_size]
@@ -274,10 +275,10 @@
to_categorical(y, num_classes))
if DIST:
- # Reduce the Evaulation Accuracy from Multiple Devices
+ # Reduce the evaulation accuracy from multiple devices
test_correct = reduce_variable(test_correct, sgd, reducer)
- # Output the Evaluation Accuracy
+ # Output the evaluation accuracy
if ((DIST == False) or (sgd.global_rank == 0)):
print('Evaluation accuracy = %f, Elapsed Time = %fs' %
(test_correct / (num_test_batch * batch_size * world_size),
diff --git a/examples/cnn/autograd/sparsification_mnist.py b/examples/cnn/autograd/sparsification_mnist.py
index cc9b585..315605a 100644
--- a/examples/cnn/autograd/sparsification_mnist.py
+++ b/examples/cnn/autograd/sparsification_mnist.py
@@ -26,7 +26,7 @@
# Generate a NCCL ID to be used for collective communication
nccl_id = singa.NcclIdHolder()
- # number of GPUs to be used
+ # Number of GPUs to be used
world_size = int(sys.argv[1])
# Use sparsification with parameters