example/module/python_loss.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # pylint: skip-file
 import numpy as np
 import mxnet as mx
 import numba
 import logging

 # We use numba.jit to implement the loss gradient.
 @numba.jit
 def mc_hinge_grad(scores, labels):
     scores = scores.asnumpy()
     labels = labels.asnumpy().astype(int)

     n, _ = scores.shape
     grad = np.zeros_like(scores)

     for i in range(n):
         score = 1 + scores[i] - scores[i, labels[i]]
         score[labels[i]] = 0
         ind_pred = score.argmax()
         grad[i, labels[i]] -= 1
         grad[i, ind_pred] += 1

     return grad

 if __name__ == '__main__':
     n_epoch = 10
     batch_size = 100
     num_gpu = 2
     contexts = mx.context.cpu() if num_gpu < 1 else [mx.context.gpu(i) for i in range(num_gpu)]

     # build a MLP module
     data = mx.symbol.Variable('data')
     fc1 = mx.symbol.FullyConnected(data, name='fc1', num_hidden=128)
     act1 = mx.symbol.Activation(fc1, name='relu1', act_type="relu")
     fc2 = mx.symbol.FullyConnected(act1, name = 'fc2', num_hidden = 64)
     act2 = mx.symbol.Activation(fc2, name='relu2', act_type="relu")
     fc3 = mx.symbol.FullyConnected(act2, name='fc3', num_hidden=10)

     mlp = mx.mod.Module(fc3, context=contexts)
     loss = mx.mod.PythonLossModule(grad_func=mc_hinge_grad)

     mod = mx.mod.SequentialModule() \
             .add(mlp) \
             .add(loss, take_labels=True, auto_wiring=True)

     train_dataiter = mx.io.MNISTIter(
             image="data/train-images-idx3-ubyte",
             label="data/train-labels-idx1-ubyte",
             data_shape=(784,),
             batch_size=batch_size, shuffle=True, flat=True, silent=False, seed=10)
     val_dataiter = mx.io.MNISTIter(
             image="data/t10k-images-idx3-ubyte",
             label="data/t10k-labels-idx1-ubyte",
             data_shape=(784,),
             batch_size=batch_size, shuffle=True, flat=True, silent=False)

     logging.basicConfig(level=logging.DEBUG)
     mod.fit(train_dataiter, eval_data=val_dataiter,
             optimizer_params={'learning_rate':0.01, 'momentum': 0.9},
             num_epoch=n_epoch)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	# pylint: skip-file
	import numpy as np
	import mxnet as mx
	import numba
	import logging

	# We use numba.jit to implement the loss gradient.
	@numba.jit
	def mc_hinge_grad(scores, labels):
	scores = scores.asnumpy()
	labels = labels.asnumpy().astype(int)

	n, _ = scores.shape
	grad = np.zeros_like(scores)

	for i in range(n):
	score = 1 + scores[i] - scores[i, labels[i]]
	score[labels[i]] = 0
	ind_pred = score.argmax()
	grad[i, labels[i]] -= 1
	grad[i, ind_pred] += 1

	return grad

	if __name__ == '__main__':
	n_epoch = 10
	batch_size = 100
	num_gpu = 2
	contexts = mx.context.cpu() if num_gpu < 1 else [mx.context.gpu(i) for i in range(num_gpu)]

	# build a MLP module
	data = mx.symbol.Variable('data')
	fc1 = mx.symbol.FullyConnected(data, name='fc1', num_hidden=128)
	act1 = mx.symbol.Activation(fc1, name='relu1', act_type="relu")
	fc2 = mx.symbol.FullyConnected(act1, name = 'fc2', num_hidden = 64)
	act2 = mx.symbol.Activation(fc2, name='relu2', act_type="relu")
	fc3 = mx.symbol.FullyConnected(act2, name='fc3', num_hidden=10)

	mlp = mx.mod.Module(fc3, context=contexts)
	loss = mx.mod.PythonLossModule(grad_func=mc_hinge_grad)

	mod = mx.mod.SequentialModule() \
	.add(mlp) \
	.add(loss, take_labels=True, auto_wiring=True)

	train_dataiter = mx.io.MNISTIter(
	image="data/train-images-idx3-ubyte",
	label="data/train-labels-idx1-ubyte",
	data_shape=(784,),
	batch_size=batch_size, shuffle=True, flat=True, silent=False, seed=10)
	val_dataiter = mx.io.MNISTIter(
	image="data/t10k-images-idx3-ubyte",
	label="data/t10k-labels-idx1-ubyte",
	data_shape=(784,),
	batch_size=batch_size, shuffle=True, flat=True, silent=False)

	logging.basicConfig(level=logging.DEBUG)
	mod.fit(train_dataiter, eval_data=val_dataiter,
	optimizer_params={'learning_rate':0.01, 'momentum': 0.9},
	num_epoch=n_epoch)