tests/nightly/test_optimizer.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import mxnet as mx

 import sys
 import os
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 sys.path.insert(0, os.path.join(curr_path, '../unittest'))
 from common import setup_module, with_seed

 # This script is testing the efficiency of LARS
 # We are training LeNet-5 at batch-size 8000 in 10 epochs above 98% accuracy
 # Which is not doable with simple SGD + momentum (from what have been tested so far)

 def lenet5():
     """LeNet-5 Symbol"""
     #pylint: disable=no-member
     data = mx.sym.Variable('data')
     conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=20)
     tanh1 = mx.sym.Activation(data=conv1, act_type="tanh")
     pool1 = mx.sym.Pooling(data=tanh1, pool_type="max",
                            kernel=(2, 2), stride=(2, 2))
     # second conv
     conv2 = mx.sym.Convolution(data=pool1, kernel=(5, 5), num_filter=50)
     tanh2 = mx.sym.Activation(data=conv2, act_type="tanh")
     pool2 = mx.sym.Pooling(data=tanh2, pool_type="max",
                            kernel=(2, 2), stride=(2, 2))
     # first fullc
     flatten = mx.sym.Flatten(data=pool2)
     fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=500)
     tanh3 = mx.sym.Activation(data=fc1, act_type="tanh")
     # second fullc
     fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10)
     # loss
     lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')
     #pylint: enable=no-member
     return lenet

 @with_seed()
 def test_lars():
     num_epochs = 10
     batch_size = 8000
     mnist = mx.test_utils.get_mnist()
     train_iter = mx.io.NDArrayIter(mnist['train_data'],
                                    mnist['train_label'],
                                    batch_size,
                                    shuffle=True)
     test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
     ctx = mx.gpu(0)
     lenet_model = mx.mod.Module(lenet5(), context=ctx)
     warmup_epochs = 1
     epoch_it = int(train_iter.num_data / batch_size)
     # LARS works best with Polynomial scheduler and warmup
     base_lr = 0.01
     optimizer_params={
         'learning_rate': base_lr,
         'lr_scheduler': mx.lr_scheduler.PolyScheduler(base_lr=base_lr,
                                                       max_update=epoch_it * num_epochs,
                                                       warmup_steps=epoch_it * warmup_epochs),
         'momentum': 0.9,
         'eta': 14.,
       }
     lenet_model.fit(train_iter,
                     eval_data=test_iter,
                     optimizer='lars',
                     optimizer_params=optimizer_params,
                     eval_metric='acc',
                     num_epoch=num_epochs)

     # predict accuracy for lenet
     acc = mx.metric.Accuracy()
     lenet_model.score(test_iter, acc)
     accuracy = acc.get()[1]
     assert accuracy > 0.98, "LeNet-5 training accuracy on MNIST was too low"

 if __name__ == '__main__':
     import nose
     nose.runmodule()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import mxnet as mx

	import sys
	import os
	curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
	sys.path.insert(0, os.path.join(curr_path, '../unittest'))
	from common import setup_module, with_seed

	# This script is testing the efficiency of LARS
	# We are training LeNet-5 at batch-size 8000 in 10 epochs above 98% accuracy
	# Which is not doable with simple SGD + momentum (from what have been tested so far)

	def lenet5():
	"""LeNet-5 Symbol"""
	#pylint: disable=no-member
	data = mx.sym.Variable('data')
	conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=20)
	tanh1 = mx.sym.Activation(data=conv1, act_type="tanh")
	pool1 = mx.sym.Pooling(data=tanh1, pool_type="max",
	kernel=(2, 2), stride=(2, 2))
	# second conv
	conv2 = mx.sym.Convolution(data=pool1, kernel=(5, 5), num_filter=50)
	tanh2 = mx.sym.Activation(data=conv2, act_type="tanh")
	pool2 = mx.sym.Pooling(data=tanh2, pool_type="max",
	kernel=(2, 2), stride=(2, 2))
	# first fullc
	flatten = mx.sym.Flatten(data=pool2)
	fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=500)
	tanh3 = mx.sym.Activation(data=fc1, act_type="tanh")
	# second fullc
	fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10)
	# loss
	lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')
	#pylint: enable=no-member
	return lenet

	@with_seed()
	def test_lars():
	num_epochs = 10
	batch_size = 8000
	mnist = mx.test_utils.get_mnist()
	train_iter = mx.io.NDArrayIter(mnist['train_data'],
	mnist['train_label'],
	batch_size,
	shuffle=True)
	test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
	ctx = mx.gpu(0)
	lenet_model = mx.mod.Module(lenet5(), context=ctx)
	warmup_epochs = 1
	epoch_it = int(train_iter.num_data / batch_size)
	# LARS works best with Polynomial scheduler and warmup
	base_lr = 0.01
	optimizer_params={
	'learning_rate': base_lr,
	'lr_scheduler': mx.lr_scheduler.PolyScheduler(base_lr=base_lr,
	max_update=epoch_it * num_epochs,
	warmup_steps=epoch_it * warmup_epochs),
	'momentum': 0.9,
	'eta': 14.,
	}
	lenet_model.fit(train_iter,
	eval_data=test_iter,
	optimizer='lars',
	optimizer_params=optimizer_params,
	eval_metric='acc',
	num_epoch=num_epochs)

	# predict accuracy for lenet
	acc = mx.metric.Accuracy()
	lenet_model.score(test_iter, acc)
	accuracy = acc.get()[1]
	assert accuracy > 0.98, "LeNet-5 training accuracy on MNIST was too low"

	if __name__ == '__main__':
	import nose
	nose.runmodule()