examples/rnn/imdb_train.py - singa - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================

 import pickle
 import os
 import sys
 import numpy as np
 from singa import tensor
 from singa import device
 from singa import opt
 from imdb_data import pad_batch_2vec, preprocessed_imdb_data_fp
 from imdb_model import IMDBModel
 import argparse

 if not os.path.isfile(preprocessed_imdb_data_fp):
     sys.exit(
         "Imdb dataset is not found, run python3 examples/rnn/imdb_data.py to prepare data"
     )

 # load preprocessed data
 imdb_processed = None
 with open(preprocessed_imdb_data_fp, 'rb') as handle:
     imdb_processed = pickle.load(handle)

 # use argparse to get command config: max_epoch, model, data, etc. for single gpu training
 parser = argparse.ArgumentParser()
 parser.add_argument('-m',
                     '--max-epoch',
                     default=5,
                     type=int,
                     help='maximum epochs',
                     dest='max_epoch')
 parser.add_argument('-b',
                     '--batch-size',
                     default=128,
                     type=int,
                     help='batch size',
                     dest='bs')
 parser.add_argument('-l',
                     '--learning-rate',
                     default=0.01,
                     type=float,
                     help='initial learning rate',
                     dest='lr')
 # determine which gpu to use
 parser.add_argument('-i',
                     '--device-id',
                     default=0,
                     type=int,
                     help='which GPU to use',
                     dest='device_id')
 # training params
 parser.add_argument('--mode',
                     default='lstm',
                     help='relu, tanh, lstm, gru',
                     dest='mode')
 parser.add_argument('-s', '--return-sequences',
                     default=False,
                     action='store_true',
                     help='return sequences',
                     dest='return_sequences')
 parser.add_argument('-d', '--bidirectional',
                     default=False,
                     action='store_true',
                     help='bidirectional lstm',
                     dest='bidirectional')
 parser.add_argument('-n', '--num-layers',
                     default=2,
                     type=int,
                     help='num layers',
                     dest='num_layers')

 args = parser.parse_args()

 # parameters
 seq_limit = 50
 embed_size = 300
 hid = 32

 # gpu device
 dev = device.create_cuda_gpu_on(args.device_id)

 # create placeholder
 tx = tensor.Tensor((args.bs, seq_limit, embed_size), dev, tensor.float32)
 ty = tensor.Tensor((args.bs, 2), dev, tensor.float32)
 tx.gaussian(0, 1)
 ty.gaussian(0, 1)

 # create model
 m = IMDBModel(hid,
               mode=args.mode,
               return_sequences=args.return_sequences,
               bidirectional=args.bidirectional,
               num_layers=args.num_layers)
 m.set_opt(opt.SGD(args.lr, 0.9))

 m.compile([tx], is_train=True, use_graph=True, sequential=False)

 # training
 m.train()
 x_train, y_onehot_train, y_train = pad_batch_2vec(
     imdb_processed['train'], seq_limit, imdb_processed['embed_weights'])
 x_test, y_onehot_test, y_test = pad_batch_2vec(imdb_processed['val'], seq_limit,
                                                imdb_processed['embed_weights'])

 for epoch in range(args.max_epoch):
     i = 0
     l = 0
     correct = 0
     trials = 0
     while (i + 1) * args.bs < len(x_train):
         l_idx = i * args.bs
         r_idx = l_idx + args.bs
         x_batch = x_train[l_idx:r_idx]
         y_onehot_batch = y_onehot_train[l_idx:r_idx]
         y_batch = y_train[l_idx:r_idx]
         i += 1

         # reuse placeholders
         tx.copy_from_numpy(x_batch)
         ty.copy_from_numpy(y_onehot_batch)

         # train one batch
         out, loss = m(tx, ty)

         # save output
         l += tensor.to_numpy(loss)
         scores = tensor.to_numpy(out)
         correct += (y_batch == np.argmax(scores, 1)).sum()
         trials += len(y_batch)

     print("epoch %d loss %s; acc %.3f" % (epoch, l /
                                           (trials / args.bs), correct / trials))
     l = 0

 # testing:
 m.eval()

 i = 0
 correct = 0
 trials = 0
 while (i + 1) * args.bs < len(x_test):
     l_idx = i * args.bs
     r_idx = l_idx + args.bs
     x_batch = x_test[l_idx:r_idx]
     y_onehot_batch = y_onehot_test[l_idx:r_idx]
     y_batch = y_test[l_idx:r_idx]
     i += 1

     # reuse same tensors
     tx.copy_from_numpy(x_batch)
     ty.copy_from_numpy(y_onehot_batch)

     # make inference
     out = m(tx)

     # save correct predictions
     scores = tensor.to_numpy(out)
     correct += (y_batch == np.argmax(scores, 1)).sum()
     trials += len(y_batch)

 print("eval acc %.3f" % (correct / trials))
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# =============================================================================

	import pickle
	import os
	import sys
	import numpy as np
	from singa import tensor
	from singa import device
	from singa import opt
	from imdb_data import pad_batch_2vec, preprocessed_imdb_data_fp
	from imdb_model import IMDBModel
	import argparse

	if not os.path.isfile(preprocessed_imdb_data_fp):
	sys.exit(
	"Imdb dataset is not found, run python3 examples/rnn/imdb_data.py to prepare data"
	)

	# load preprocessed data
	imdb_processed = None
	with open(preprocessed_imdb_data_fp, 'rb') as handle:
	imdb_processed = pickle.load(handle)

	# use argparse to get command config: max_epoch, model, data, etc. for single gpu training
	parser = argparse.ArgumentParser()
	parser.add_argument('-m',
	'--max-epoch',
	default=5,
	type=int,
	help='maximum epochs',
	dest='max_epoch')
	parser.add_argument('-b',
	'--batch-size',
	default=128,
	type=int,
	help='batch size',
	dest='bs')
	parser.add_argument('-l',
	'--learning-rate',
	default=0.01,
	type=float,
	help='initial learning rate',
	dest='lr')
	# determine which gpu to use
	parser.add_argument('-i',
	'--device-id',
	default=0,
	type=int,
	help='which GPU to use',
	dest='device_id')
	# training params
	parser.add_argument('--mode',
	default='lstm',
	help='relu, tanh, lstm, gru',
	dest='mode')
	parser.add_argument('-s', '--return-sequences',
	default=False,
	action='store_true',
	help='return sequences',
	dest='return_sequences')
	parser.add_argument('-d', '--bidirectional',
	default=False,
	action='store_true',
	help='bidirectional lstm',
	dest='bidirectional')
	parser.add_argument('-n', '--num-layers',
	default=2,
	type=int,
	help='num layers',
	dest='num_layers')

	args = parser.parse_args()

	# parameters
	seq_limit = 50
	embed_size = 300
	hid = 32

	# gpu device
	dev = device.create_cuda_gpu_on(args.device_id)

	# create placeholder
	tx = tensor.Tensor((args.bs, seq_limit, embed_size), dev, tensor.float32)
	ty = tensor.Tensor((args.bs, 2), dev, tensor.float32)
	tx.gaussian(0, 1)
	ty.gaussian(0, 1)

	# create model
	m = IMDBModel(hid,
	mode=args.mode,
	return_sequences=args.return_sequences,
	bidirectional=args.bidirectional,
	num_layers=args.num_layers)
	m.set_opt(opt.SGD(args.lr, 0.9))

	m.compile([tx], is_train=True, use_graph=True, sequential=False)

	# training
	m.train()
	x_train, y_onehot_train, y_train = pad_batch_2vec(
	imdb_processed['train'], seq_limit, imdb_processed['embed_weights'])
	x_test, y_onehot_test, y_test = pad_batch_2vec(imdb_processed['val'], seq_limit,
	imdb_processed['embed_weights'])

	for epoch in range(args.max_epoch):
	i = 0
	l = 0
	correct = 0
	trials = 0
	while (i + 1) * args.bs < len(x_train):
	l_idx = i * args.bs
	r_idx = l_idx + args.bs
	x_batch = x_train[l_idx:r_idx]
	y_onehot_batch = y_onehot_train[l_idx:r_idx]
	y_batch = y_train[l_idx:r_idx]
	i += 1

	# reuse placeholders
	tx.copy_from_numpy(x_batch)
	ty.copy_from_numpy(y_onehot_batch)

	# train one batch
	out, loss = m(tx, ty)

	# save output
	l += tensor.to_numpy(loss)
	scores = tensor.to_numpy(out)
	correct += (y_batch == np.argmax(scores, 1)).sum()
	trials += len(y_batch)

	print("epoch %d loss %s; acc %.3f" % (epoch, l /
	(trials / args.bs), correct / trials))
	l = 0

	# testing:
	m.eval()

	i = 0
	correct = 0
	trials = 0
	while (i + 1) * args.bs < len(x_test):
	l_idx = i * args.bs
	r_idx = l_idx + args.bs
	x_batch = x_test[l_idx:r_idx]
	y_onehot_batch = y_onehot_test[l_idx:r_idx]
	y_batch = y_test[l_idx:r_idx]
	i += 1

	# reuse same tensors
	tx.copy_from_numpy(x_batch)
	ty.copy_from_numpy(y_onehot_batch)

	# make inference
	out = m(tx)

	# save correct predictions
	scores = tensor.to_numpy(out)
	correct += (y_batch == np.argmax(scores, 1)).sum()
	trials += len(y_batch)

	print("eval acc %.3f" % (correct / trials))