examples/char-rnn/sample.py - singa - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 '''Sample characters from the pre-trained model'''
 import sys
 import cPickle as pickle
 import numpy as np
 import argparse

 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
 from singa import tensor
 from singa import device
 from singa.proto import model_pb2


 def sample(model_path, nsamples=100, seed_text='', do_sample=True):
     with open(model_path, 'rb') as fd:
         d = pickle.load(fd)
         rnn_w = tensor.from_numpy(d['rnn_w'])
         idx_to_char = d['idx_to_char']
         char_to_idx = d['char_to_idx']
         vocab_size = len(idx_to_char)
         dense_w = tensor.from_numpy(d['dense_w'])
         dense_b = tensor.from_numpy(d['dense_b'])
         hidden_size = d['hidden_size']
         num_stacks = d['num_stacks']
         dropout = d['dropout']

     cuda = device.create_cuda_gpu()
     rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,
                      num_stacks=num_stacks, dropout=dropout,
                      input_sample_shape=(len(idx_to_char),))
     rnn.to_device(cuda)
     rnn.param_values()[0].copy_data(rnn_w)
     dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))
     dense.to_device(cuda)
     dense.param_values()[0].copy_data(dense_w)
     dense.param_values()[1].copy_data(dense_b)
     hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
     cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
     hx.set_value(0.0)
     cx.set_value(0.0)
     if len(seed_text) > 0:
         for c in seed_text:
             x = np.zeros((1, vocab_size), dtype=np.float32)
             x[0, char_to_idx[c]] = 1
             tx = tensor.from_numpy(x)
             tx.to_device(cuda)
             inputs = [tx, hx, cx]
             outputs = rnn.forward(model_pb2.kEval, inputs)
             y = dense.forward(model_pb2.kEval, outputs[0])
             y = tensor.softmax(y)
             hx = outputs[1]
             cx = outputs[2]
         sys.stdout.write(seed_text)
     else:
         y = tensor.Tensor((1, vocab_size), cuda)
         y.set_value(1.0 / vocab_size)

     for i in range(nsamples):
         y.to_host()
         prob = tensor.to_numpy(y)[0]
         if do_sample:
             cur = np.random.choice(vocab_size, 1, p=prob)[0]
         else:
             cur = np.argmax(prob)
         sys.stdout.write(idx_to_char[cur])
         x = np.zeros((1, vocab_size), dtype=np.float32)
         x[0, cur] = 1
         tx = tensor.from_numpy(x)
         tx.to_device(cuda)
         inputs = [tx, hx, cx]
         outputs = rnn.forward(model_pb2.kEval, inputs)
         y = dense.forward(model_pb2.kEval, outputs[0])
         y = tensor.softmax(y)
         hx = outputs[1]
         cx = outputs[2]
     print ''

 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='sample chars from char-rnn')
     parser.add_argument('model', type=int, help='the model checkpoint file')
     parser.add_argument('n', type=int, help='num of characters to sample')
     parser.add_argument('--seed', help='seed text string which warms up the '
                         ' rnn states for sampling', default='')
     args = parser.parse_args()
     assert args.n > 0, 'n must > 0'
     sample(args.model, args.n, seed_text=args.seed)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# =============================================================================
	'''Sample characters from the pre-trained model'''
	import sys
	import cPickle as pickle
	import numpy as np
	import argparse

	# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
	from singa import layer
	from singa import tensor
	from singa import device
	from singa.proto import model_pb2


	def sample(model_path, nsamples=100, seed_text='', do_sample=True):
	with open(model_path, 'rb') as fd:
	d = pickle.load(fd)
	rnn_w = tensor.from_numpy(d['rnn_w'])
	idx_to_char = d['idx_to_char']
	char_to_idx = d['char_to_idx']
	vocab_size = len(idx_to_char)
	dense_w = tensor.from_numpy(d['dense_w'])
	dense_b = tensor.from_numpy(d['dense_b'])
	hidden_size = d['hidden_size']
	num_stacks = d['num_stacks']
	dropout = d['dropout']

	cuda = device.create_cuda_gpu()
	rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,
	num_stacks=num_stacks, dropout=dropout,
	input_sample_shape=(len(idx_to_char),))
	rnn.to_device(cuda)
	rnn.param_values()[0].copy_data(rnn_w)
	dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))
	dense.to_device(cuda)
	dense.param_values()[0].copy_data(dense_w)
	dense.param_values()[1].copy_data(dense_b)
	hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
	cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
	hx.set_value(0.0)
	cx.set_value(0.0)
	if len(seed_text) > 0:
	for c in seed_text:
	x = np.zeros((1, vocab_size), dtype=np.float32)
	x[0, char_to_idx[c]] = 1
	tx = tensor.from_numpy(x)
	tx.to_device(cuda)
	inputs = [tx, hx, cx]
	outputs = rnn.forward(model_pb2.kEval, inputs)
	y = dense.forward(model_pb2.kEval, outputs[0])
	y = tensor.softmax(y)
	hx = outputs[1]
	cx = outputs[2]
	sys.stdout.write(seed_text)
	else:
	y = tensor.Tensor((1, vocab_size), cuda)
	y.set_value(1.0 / vocab_size)

	for i in range(nsamples):
	y.to_host()
	prob = tensor.to_numpy(y)[0]
	if do_sample:
	cur = np.random.choice(vocab_size, 1, p=prob)[0]
	else:
	cur = np.argmax(prob)
	sys.stdout.write(idx_to_char[cur])
	x = np.zeros((1, vocab_size), dtype=np.float32)
	x[0, cur] = 1
	tx = tensor.from_numpy(x)
	tx.to_device(cuda)
	inputs = [tx, hx, cx]
	outputs = rnn.forward(model_pb2.kEval, inputs)
	y = dense.forward(model_pb2.kEval, outputs[0])
	y = tensor.softmax(y)
	hx = outputs[1]
	cx = outputs[2]
	print ''

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='sample chars from char-rnn')
	parser.add_argument('model', type=int, help='the model checkpoint file')
	parser.add_argument('n', type=int, help='num of characters to sample')
	parser.add_argument('--seed', help='seed text string which warms up the '
	' rnn states for sampling', default='')
	args = parser.parse_args()
	assert args.n > 0, 'n must > 0'
	sample(args.model, args.n, seed_text=args.seed)