src/python/singa/net.py - singa - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 """
 Nerual net class for constructing the nets using layers and providing access
 functions for net info, e.g., parameters.
 """


 from .proto.model_pb2 import kTrain, kEval
 import tensor
 import layer
 import cPickle as pickle


 class FeedForwardNet(object):

     def __init__(self, loss=None, metric=None):
         self.loss = loss
         self.metric = metric
         self.layers = []
         self.src_of_layer = {}
         self.dst_of_layer = None
         self.ordered_layers = None

     def to_device(self, dev):
         for lyr in self.layers:
             lyr.to_device(dev)

     def add(self, lyr, src=None):
         """Append a layer into the layer list.

         This function will get the sample shape from the last layer to setup
         the newly added layer. For the first layer, it is setup outside.
         The calling function should ensure the correctness of the layer order.

         Args:
             lyr (Layer): the layer to be added
         """
         if src is not None:
             if isinstance(src, layer.Layer):
                 assert src.has_setup is True, 'the source layer must be set up'
                 self.src_of_layer[lyr.name] = [src]
             else:
                 assert type(src) == list, 'the src must be a list of layers'
                 self.src_of_layer[lyr.name] = src
                 # print 'merge------', len(src)
         else:
             assert len(self.layers) > 0 or lyr.has_setup, \
                 'Source layers are needed to set up this layer'
             if len(self.layers) > 0:
                 self.src_of_layer[lyr.name] = [self.layers[-1]]
             else:
                 self.src_of_layer[lyr.name] = []
         if lyr.has_setup is False:
             # print shape
             in_shape = self.src_of_layer[lyr.name][0].get_output_sample_shape()
             lyr.setup(in_shape)
             print lyr.name, lyr.get_output_sample_shape()
         self.layers.append(lyr)
         return lyr

     def param_values(self):
         values = []
         layers = self.layers
         if self.ordered_layers is not None:
             layers = self.ordered_layers
         for lyr in layers:
             values.extend(lyr.param_values())
         return values

     def param_specs(self):
         specs = []
         layers = self.layers
         if self.ordered_layers is not None:
             layers = self.ordered_layers
         for lyr in layers:
             specs.extend(lyr.param_specs)
         return specs

     def param_names(self):
         return [spec.name for spec in self.param_specs()]

     def train(self, x, y):
         out = self.forward(kTrain, x)
         l = self.loss.forward(kTrain, out, y)
         if self.metric is not None:
             m = self.metric.evaluate(out, y)
         return self.backward(), (l.l1(), m)

     def evaluate(self, x, y):
         """Evaluate the loss and metric of the given data"""
         out = self.forward(kEval, x)
         l = None
         m = None
         assert self.loss is not None or self.metric is not None,\
             'Cannot do evaluation, as neither loss nor metic is set'
         if self.loss is not None:
             l = self.loss.evaluate(kEval, out, y)
         if self.metric is not None:
             m = self.metric.evaluate(out, y)
         return l, m

     def predict(self, x):
         xx = self.forward(kEval, x)
         return tensor.softmax(xx)

     def topo_sort(self, cur, src_of_layer, visited=None, order=None):
         if visited is None:
             visited = {}
             for name in src_of_layer.keys():
                 visited[name] = False
             order = []
         srcs = src_of_layer[cur.name]
         for src in srcs:
             if visited[src.name] is False:
                 visited[src.name] = True
                 self.topo_sort(src, src_of_layer, visited, order)
         order.append(cur)
         visited[cur.name] = True
         return order

     def forward(self, flag, x):
         # print x.l1()
         if self.ordered_layers is None:
             self.ordered_layers = self.topo_sort(self.layers[-1],
                                                  self.src_of_layer)
         inputs = [x]
         output_of_layer = {}
         for cur in self.ordered_layers:
             srcs = self.src_of_layer[cur.name]
             disp_src = cur.name + '<--'
             for src in srcs:
                 outs = output_of_layer[src.name]
                 if type(outs) == list:
                     inputs.append(outs[0])
                 else:
                     inputs.append(outs)
                 disp_src += '+' + src.name
                 # del output_of_layer[src.name]
             # print disp_src
             if len(inputs) == 1:
                 inputs = inputs[0]
             output_of_layer[cur.name] = cur.forward(flag, inputs)
             inputs = []
             # print lyr.name, x.l1()
         # print output_of_layer
         return output_of_layer[self.ordered_layers[-1].name]

     def backward(self):
         if self.dst_of_layer is None:
             self.dst_of_layer = {}
             for cur in self.layers:
                 self.dst_of_layer[cur.name] = []
             for cur in self.ordered_layers[1:]:
                 srcs = self.src_of_layer[cur.name]
                 for src in srcs:
                     self.dst_of_layer[src.name].append(cur)
         grad = self.loss.backward()
         if len(grad.shape) > 1:
             grad /= grad.shape[0]  # average across the batch
         # print 'grad', grad.l1()
         grads = [grad]
         output_of_layer = {}
         pgrads = []
         for cur in reversed(self.ordered_layers):
             for dst in self.dst_of_layer[cur.name]:
                 outputs = output_of_layer[dst.name]
                 if type(outputs) == list:
                     grads.append(outputs[0])
                 else:
                     grads.append(outputs)
                 # del output_of_layer[dst.name]
             if len(grads) == 1:
                 grads = grads[0]
             outs, _pgrads = cur.backward(kTrain, grads)
             pgrads.append(_pgrads)
             output_of_layer[cur.name] = outs
             grads = []

         ret = []
         for pgrad in reversed(pgrads):
             ret.extend(pgrad)
         return ret

     def save(self, f):
         """Save model parameters using cpickle"""
         params = {}
         for (specs, val) in zip(self.param_specs(), self.param_values()):
             val.to_host()
             params[specs.name] = tensor.to_numpy(val)
         with open(f, 'wb') as fd:
             pickle.dump(params, fd)

     def load(self, f):
         """Load model parameters using cpickle"""
         with open(f, 'rb') as fd:
             params = pickle.load(fd)
         for (specs, val) in zip(self.param_specs(), self.param_values()):
             val.copy_from_numpy(params[specs.name])
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# =============================================================================
	"""
	Nerual net class for constructing the nets using layers and providing access
	functions for net info, e.g., parameters.
	"""


	from .proto.model_pb2 import kTrain, kEval
	import tensor
	import layer
	import cPickle as pickle


	class FeedForwardNet(object):

	def __init__(self, loss=None, metric=None):
	self.loss = loss
	self.metric = metric
	self.layers = []
	self.src_of_layer = {}
	self.dst_of_layer = None
	self.ordered_layers = None

	def to_device(self, dev):
	for lyr in self.layers:
	lyr.to_device(dev)

	def add(self, lyr, src=None):
	"""Append a layer into the layer list.

	This function will get the sample shape from the last layer to setup
	the newly added layer. For the first layer, it is setup outside.
	The calling function should ensure the correctness of the layer order.

	Args:
	lyr (Layer): the layer to be added
	"""
	if src is not None:
	if isinstance(src, layer.Layer):
	assert src.has_setup is True, 'the source layer must be set up'
	self.src_of_layer[lyr.name] = [src]
	else:
	assert type(src) == list, 'the src must be a list of layers'
	self.src_of_layer[lyr.name] = src
	# print 'merge------', len(src)
	else:
	assert len(self.layers) > 0 or lyr.has_setup, \
	'Source layers are needed to set up this layer'
	if len(self.layers) > 0:
	self.src_of_layer[lyr.name] = [self.layers[-1]]
	else:
	self.src_of_layer[lyr.name] = []
	if lyr.has_setup is False:
	# print shape
	in_shape = self.src_of_layer[lyr.name][0].get_output_sample_shape()
	lyr.setup(in_shape)
	print lyr.name, lyr.get_output_sample_shape()
	self.layers.append(lyr)
	return lyr

	def param_values(self):
	values = []
	layers = self.layers
	if self.ordered_layers is not None:
	layers = self.ordered_layers
	for lyr in layers:
	values.extend(lyr.param_values())
	return values

	def param_specs(self):
	specs = []
	layers = self.layers
	if self.ordered_layers is not None:
	layers = self.ordered_layers
	for lyr in layers:
	specs.extend(lyr.param_specs)
	return specs

	def param_names(self):
	return [spec.name for spec in self.param_specs()]

	def train(self, x, y):
	out = self.forward(kTrain, x)
	l = self.loss.forward(kTrain, out, y)
	if self.metric is not None:
	m = self.metric.evaluate(out, y)
	return self.backward(), (l.l1(), m)

	def evaluate(self, x, y):
	"""Evaluate the loss and metric of the given data"""
	out = self.forward(kEval, x)
	l = None
	m = None
	assert self.loss is not None or self.metric is not None,\
	'Cannot do evaluation, as neither loss nor metic is set'
	if self.loss is not None:
	l = self.loss.evaluate(kEval, out, y)
	if self.metric is not None:
	m = self.metric.evaluate(out, y)
	return l, m

	def predict(self, x):
	xx = self.forward(kEval, x)
	return tensor.softmax(xx)

	def topo_sort(self, cur, src_of_layer, visited=None, order=None):
	if visited is None:
	visited = {}
	for name in src_of_layer.keys():
	visited[name] = False
	order = []
	srcs = src_of_layer[cur.name]
	for src in srcs:
	if visited[src.name] is False:
	visited[src.name] = True
	self.topo_sort(src, src_of_layer, visited, order)
	order.append(cur)
	visited[cur.name] = True
	return order

	def forward(self, flag, x):
	# print x.l1()
	if self.ordered_layers is None:
	self.ordered_layers = self.topo_sort(self.layers[-1],
	self.src_of_layer)
	inputs = [x]
	output_of_layer = {}
	for cur in self.ordered_layers:
	srcs = self.src_of_layer[cur.name]
	disp_src = cur.name + '<--'
	for src in srcs:
	outs = output_of_layer[src.name]
	if type(outs) == list:
	inputs.append(outs[0])
	else:
	inputs.append(outs)
	disp_src += '+' + src.name
	# del output_of_layer[src.name]
	# print disp_src
	if len(inputs) == 1:
	inputs = inputs[0]
	output_of_layer[cur.name] = cur.forward(flag, inputs)
	inputs = []
	# print lyr.name, x.l1()
	# print output_of_layer
	return output_of_layer[self.ordered_layers[-1].name]

	def backward(self):
	if self.dst_of_layer is None:
	self.dst_of_layer = {}
	for cur in self.layers:
	self.dst_of_layer[cur.name] = []
	for cur in self.ordered_layers[1:]:
	srcs = self.src_of_layer[cur.name]
	for src in srcs:
	self.dst_of_layer[src.name].append(cur)
	grad = self.loss.backward()
	if len(grad.shape) > 1:
	grad /= grad.shape[0] # average across the batch
	# print 'grad', grad.l1()
	grads = [grad]
	output_of_layer = {}
	pgrads = []
	for cur in reversed(self.ordered_layers):
	for dst in self.dst_of_layer[cur.name]:
	outputs = output_of_layer[dst.name]
	if type(outputs) == list:
	grads.append(outputs[0])
	else:
	grads.append(outputs)
	# del output_of_layer[dst.name]
	if len(grads) == 1:
	grads = grads[0]
	outs, _pgrads = cur.backward(kTrain, grads)
	pgrads.append(_pgrads)
	output_of_layer[cur.name] = outs
	grads = []

	ret = []
	for pgrad in reversed(pgrads):
	ret.extend(pgrad)
	return ret

	def save(self, f):
	"""Save model parameters using cpickle"""
	params = {}
	for (specs, val) in zip(self.param_specs(), self.param_values()):
	val.to_host()
	params[specs.name] = tensor.to_numpy(val)
	with open(f, 'wb') as fd:
	pickle.dump(params, fd)

	def load(self, f):
	"""Load model parameters using cpickle"""
	with open(f, 'rb') as fd:
	params = pickle.load(fd)
	for (specs, val) in zip(self.param_specs(), self.param_values()):
	val.copy_from_numpy(params[specs.name])