tests/python/train/test_sparse_fm.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import mxnet as mx
 import mxnet.ndarray as nd
 from mxnet.test_utils import *
 import numpy as np

 def test_factorization_machine_module(verbose=False):
     """ Test factorization machine model with sparse operators """
     def check_factorization_machine_module(optimizer=None, num_epochs=None):
         print("check_factorization_machine_module( {} )".format(optimizer))

         def fm(factor_size, feature_dim, init):
             x = mx.symbol.Variable("data", stype='csr')
             v = mx.symbol.Variable("v", shape=(feature_dim, factor_size),
                                    init=init, stype='row_sparse')

             w1_weight = mx.symbol.var('w1_weight', shape=(feature_dim, 1),
                                       init=init, stype='row_sparse')
             w1_bias = mx.symbol.var('w1_bias', shape=(1))
             w1 = mx.symbol.broadcast_add(mx.symbol.dot(x, w1_weight), w1_bias)

             v_s = mx.symbol._internal._square_sum(data=v, axis=1, keepdims=True)
             x_s = mx.symbol.square(data=x)
             bd_sum = mx.sym.dot(x_s, v_s)

             w2 = mx.symbol.dot(x, v)
             w2_squared = 0.5 * mx.symbol.square(data=w2)

             w_all = mx.symbol.Concat(w1, w2_squared, dim=1)
             sum1 = mx.symbol.sum(data=w_all, axis=1, keepdims=True)
             sum2 = 0.5 * mx.symbol.negative(bd_sum)
             model = mx.sym.elemwise_add(sum1, sum2)

             y = mx.symbol.Variable("label")
             model = mx.symbol.LinearRegressionOutput(data=model, label=y)
             return model

         # model
         init = mx.initializer.Normal(sigma=0.01)
         factor_size = 4
         feature_dim = 10000
         model = fm(factor_size, feature_dim, init)

         # data iter
         num_batches = 5
         batch_size = 64
         num_samples = batch_size * num_batches
         # generate some random csr data
         csr_nd = rand_ndarray((num_samples, feature_dim), 'csr', 0.1)
         label = mx.nd.ones((num_samples,1))
         # the alternative is to use LibSVMIter
         train_iter = mx.io.NDArrayIter(data=csr_nd,
                                        label={'label':label},
                                        batch_size=batch_size,
                                        last_batch_handle='discard')
         # create module
         mod = mx.mod.Module(symbol=model, data_names=['data'], label_names=['label'])
         # allocate memory by given the input data and lable shapes
         mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
         # initialize parameters by uniform random numbers
         mod.init_params(initializer=init)
         if optimizer == 'sgd':
             # use Sparse SGD with learning rate 0.1 to train
             sgd = mx.optimizer.SGD(momentum=0.1, clip_gradient=5.0, learning_rate=0.01,
                                    rescale_grad=1.0/batch_size)
             mod.init_optimizer(optimizer=sgd)
             if num_epochs is None:
                 num_epochs = 10
             expected_accuracy = 0.02
         elif optimizer == 'adam':
             # use Sparse Adam to train
             adam = mx.optimizer.Adam(clip_gradient=5.0, learning_rate=0.0005,
                                      rescale_grad=1.0/batch_size)
             mod.init_optimizer(optimizer=adam)
             if num_epochs is None:
                 num_epochs = 10
             expected_accuracy = 0.05
         elif optimizer == 'adagrad':
             # use Sparse AdaGrad with learning rate 0.1 to train
             adagrad = mx.optimizer.AdaGrad(clip_gradient=5.0, learning_rate=0.01,
                                            rescale_grad=1.0/batch_size)
             mod.init_optimizer(optimizer=adagrad)
             if num_epochs is None:
                 num_epochs = 20
             expected_accuracy = 0.09
         else:
             raise AssertionError("Unsupported optimizer type '" + optimizer + "' specified")
         # use accuracy as the metric
         metric = mx.metric.create('MSE')
         # train 'num_epochs' epoch
         for epoch in range(num_epochs):
             train_iter.reset()
             metric.reset()
             for batch in train_iter:
                 mod.forward(batch, is_train=True)       # compute predictions
                 mod.update_metric(metric, batch.label)  # accumulate prediction accuracy
                 mod.backward()                          # compute gradients
                 mod.update()                            # update parameters
             print('Epoch %d, Training %s' % (epoch, metric.get()))
         if num_epochs > 1:
             assert(metric.get()[1] < expected_accuracy)

     if verbose is True:
         print("============ SGD ==========================")
         start = time.clock()
     check_factorization_machine_module('sgd')
     if verbose is True:
         print("Duration: {}".format(time.clock() - start))
         print("============ ADAM ==========================")
         start = time.clock()
     check_factorization_machine_module('adam')
     if verbose is True:
         print("Duration: {}".format(time.clock() - start))
         print("============ ADAGRAD ==========================")
         start = time.clock()
     check_factorization_machine_module('adagrad')
     if verbose is True:
         print("Duration: {}".format(time.clock() - start))

 # run as a script
 if __name__ == "__main__":
     test_factorization_machine_module()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import mxnet as mx
	import mxnet.ndarray as nd
	from mxnet.test_utils import *
	import numpy as np

	def test_factorization_machine_module(verbose=False):
	""" Test factorization machine model with sparse operators """
	def check_factorization_machine_module(optimizer=None, num_epochs=None):
	print("check_factorization_machine_module( {} )".format(optimizer))

	def fm(factor_size, feature_dim, init):
	x = mx.symbol.Variable("data", stype='csr')
	v = mx.symbol.Variable("v", shape=(feature_dim, factor_size),
	init=init, stype='row_sparse')

	w1_weight = mx.symbol.var('w1_weight', shape=(feature_dim, 1),
	init=init, stype='row_sparse')
	w1_bias = mx.symbol.var('w1_bias', shape=(1))
	w1 = mx.symbol.broadcast_add(mx.symbol.dot(x, w1_weight), w1_bias)

	v_s = mx.symbol._internal._square_sum(data=v, axis=1, keepdims=True)
	x_s = mx.symbol.square(data=x)
	bd_sum = mx.sym.dot(x_s, v_s)

	w2 = mx.symbol.dot(x, v)
	w2_squared = 0.5 * mx.symbol.square(data=w2)

	w_all = mx.symbol.Concat(w1, w2_squared, dim=1)
	sum1 = mx.symbol.sum(data=w_all, axis=1, keepdims=True)
	sum2 = 0.5 * mx.symbol.negative(bd_sum)
	model = mx.sym.elemwise_add(sum1, sum2)

	y = mx.symbol.Variable("label")
	model = mx.symbol.LinearRegressionOutput(data=model, label=y)
	return model

	# model
	init = mx.initializer.Normal(sigma=0.01)
	factor_size = 4
	feature_dim = 10000
	model = fm(factor_size, feature_dim, init)

	# data iter
	num_batches = 5
	batch_size = 64
	num_samples = batch_size * num_batches
	# generate some random csr data
	csr_nd = rand_ndarray((num_samples, feature_dim), 'csr', 0.1)
	label = mx.nd.ones((num_samples,1))
	# the alternative is to use LibSVMIter
	train_iter = mx.io.NDArrayIter(data=csr_nd,
	label={'label':label},
	batch_size=batch_size,
	last_batch_handle='discard')
	# create module
	mod = mx.mod.Module(symbol=model, data_names=['data'], label_names=['label'])
	# allocate memory by given the input data and lable shapes
	mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
	# initialize parameters by uniform random numbers
	mod.init_params(initializer=init)
	if optimizer == 'sgd':
	# use Sparse SGD with learning rate 0.1 to train
	sgd = mx.optimizer.SGD(momentum=0.1, clip_gradient=5.0, learning_rate=0.01,
	rescale_grad=1.0/batch_size)
	mod.init_optimizer(optimizer=sgd)
	if num_epochs is None:
	num_epochs = 10
	expected_accuracy = 0.02
	elif optimizer == 'adam':
	# use Sparse Adam to train
	adam = mx.optimizer.Adam(clip_gradient=5.0, learning_rate=0.0005,
	rescale_grad=1.0/batch_size)
	mod.init_optimizer(optimizer=adam)
	if num_epochs is None:
	num_epochs = 10
	expected_accuracy = 0.05
	elif optimizer == 'adagrad':
	# use Sparse AdaGrad with learning rate 0.1 to train
	adagrad = mx.optimizer.AdaGrad(clip_gradient=5.0, learning_rate=0.01,
	rescale_grad=1.0/batch_size)
	mod.init_optimizer(optimizer=adagrad)
	if num_epochs is None:
	num_epochs = 20
	expected_accuracy = 0.09
	else:
	raise AssertionError("Unsupported optimizer type '" + optimizer + "' specified")
	# use accuracy as the metric
	metric = mx.metric.create('MSE')
	# train 'num_epochs' epoch
	for epoch in range(num_epochs):
	train_iter.reset()
	metric.reset()
	for batch in train_iter:
	mod.forward(batch, is_train=True) # compute predictions
	mod.update_metric(metric, batch.label) # accumulate prediction accuracy
	mod.backward() # compute gradients
	mod.update() # update parameters
	print('Epoch %d, Training %s' % (epoch, metric.get()))
	if num_epochs > 1:
	assert(metric.get()[1] < expected_accuracy)

	if verbose is True:
	print("============ SGD ==========================")
	start = time.clock()
	check_factorization_machine_module('sgd')
	if verbose is True:
	print("Duration: {}".format(time.clock() - start))
	print("============ ADAM ==========================")
	start = time.clock()
	check_factorization_machine_module('adam')
	if verbose is True:
	print("Duration: {}".format(time.clock() - start))
	print("============ ADAGRAD ==========================")
	start = time.clock()
	check_factorization_machine_module('adagrad')
	if verbose is True:
	print("Duration: {}".format(time.clock() - start))

	# run as a script
	if __name__ == "__main__":
	test_factorization_machine_module()