blob: 027e78c3bba18b4631231d497898b1c991a46cec [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""
Nerual net class for constructing the nets using layers and providing access
functions for net info, e.g., parameters.
Example usages::
from singa import net as ffnet
from singa import metric
from singa import loss
from singa import layer
from singa import device
# create net and add layers
net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
net.add(layer.Conv2D('conv1', 32, 5, 1, input_sample_shape=(3,32,32,)))
net.add(layer.Activation('relu1'))
net.add(layer.MaxPooling2D('pool1', 3, 2))
net.add(layer.Flatten('flat'))
net.add(layer.Dense('dense', 10))
# init parameters
for p in net.param_values():
if len(p.shape) == 0:
p.set_value(0)
else:
p.gaussian(0, 0.01)
# move net onto gpu
dev = device.create_cuda_gpu()
net.to_device(dev)
# training (skipped)
# do prediction after training
x = tensor.Tensor((2, 3, 32, 32), dev)
x.uniform(-1, 1)
y = net.predict(x)
print tensor.to_numpy(y)
"""
from .proto.model_pb2 import kTrain, kEval
import tensor
import layer
import snapshot
import cPickle as pickle
'''For display training information, e.g L1 value of layer data'''
verbose = False
class FeedForwardNet(object):
def __init__(self, loss=None, metric=None):
'''Representing a feed-forward neural net.
Args:
loss, a Loss instance. Necessary training
metric, a Metric instance. Necessary for evaluation
'''
self.loss = loss
self.metric = metric
self.layers = []
self.src_of_layer = {}
self.dst_of_layer = None
self.ordered_layers = None
self.out_sample_shape_of_layer = {}
def to_device(self, dev):
'''Move the net onto the given device, including
all parameters and intermediate data.
'''
for lyr in self.layers:
lyr.to_device(dev)
def add(self, lyr, src=None):
"""Append a layer into the layer list.
This function will get the sample shape from the src layers to setup the
newly added layer. For the first layer, it is setup outside. The calling
function should ensure the correctness of the layer order. If src is
None, the last layer is the src layer. If there are multiple src layers,
the src is a list of the src layers.
Args:
lyr (Layer): the layer to be added
src (Layer): the source layer of lyr
"""
if src is not None:
if isinstance(src, layer.Layer):
assert src.has_setup is True, 'the source layer must be set up'
self.src_of_layer[lyr.name] = [src]
else:
assert type(src) == list, 'the src must be a list of layers'
self.src_of_layer[lyr.name] = src
# print 'merge------', len(src)
else:
assert len(self.layers) > 0 or lyr.has_setup, \
'Source layers are needed to set up this layer'
if len(self.layers) > 0:
self.src_of_layer[lyr.name] = [self.layers[-1]]
else:
self.src_of_layer[lyr.name] = []
if lyr.has_setup is False:
in_shape = []
for src in self.src_of_layer[lyr.name]:
shapes = self.out_sample_shape_of_layer[src.name]
assert len(shapes) > 0, \
'Cannot get output shape of layer %s' % lyr.name
in_shape.append(shapes[0])
shapes.pop(0)
if len(in_shape) == 1:
lyr.setup(in_shape[0])
else:
lyr.setup(in_shape)
out_shape = lyr.get_output_sample_shape()
if type(out_shape[0]) is tuple:
self.out_sample_shape_of_layer[lyr.name] = out_shape
else:
self.out_sample_shape_of_layer[lyr.name] = [out_shape]
self.layers.append(lyr)
print lyr.name, out_shape
return lyr
def param_values(self):
'''Return a list of tensors for all parameters'''
values = []
layers = self.layers
if self.ordered_layers is not None:
layers = self.ordered_layers
for lyr in layers:
values.extend(lyr.param_values())
return values
def param_specs(self):
'''Return a list of ParamSpec for all parameters'''
specs = []
layers = self.layers
if self.ordered_layers is not None:
layers = self.ordered_layers
for lyr in layers:
specs.extend(lyr.param_specs)
return specs
def param_names(self):
'''Return a list for the names of all params'''
return [spec.name for spec in self.param_specs()]
def train(self, x, y):
'''Run BP for one iteration.
Currently only support nets with a single output layer, and a single
loss objective and metric.
TODO(wangwei) consider multiple loss objectives and metrics.
Args:
x: input data, a single input Tensor or a dict: layer name -> Tensor
y: label data, a single input Tensor.
Returns:
gradients of parameters and the loss and metric values.
'''
out = self.forward(kTrain, x)
l = self.loss.forward(kTrain, out, y)
if self.metric is not None:
m = self.metric.evaluate(out, y)
return self.backward(), (l.l1(), m)
def evaluate(self, x, y):
'''Evaluate the loss and metric of the given data.
Currently only support nets with a single output layer, and a single
loss objective and metric.
TODO(wangwei) consider multiple loss objectives and metrics.
Args:
x: input data, a single input Tensor or a dict: layer name -> Tensor
y: label data, a single input Tensor.
'''
out = self.forward(kEval, x)
l = None
m = None
assert self.loss is not None or self.metric is not None,\
'Cannot do evaluation, as neither loss nor metic is set'
if self.loss is not None:
l = self.loss.evaluate(kEval, out, y)
if self.metric is not None:
m = self.metric.evaluate(out, y)
return l, m
def predict(self, x):
'''Forward the input data through each layer to get the values of the
output layers.
Currently only support nets with a single output layer
Args:
x: input data, a single input Tensor or a dict: layer name -> Tensor
Returns:
a single output tensor as the prediction result.
'''
xx = self.forward(kEval, x)
return tensor.softmax(xx)
def topo_sort(self, layers, src_of_layer):
'''Topology sort of layers.
It would try to preserve the orders of the input layers.
Args:
layers: a list of layers; the layers from the output of the same
layer (e.g., slice layer) should be added by users in correct
order; This function would not change their order.
src_of_layer: a dictionary: src layer name -> a list of src layers
Returns:
A list of ordered layer
'''
order = []
while len(order) < len(layers):
for lyr in self.layers:
if lyr not in order:
for src in src_of_layer[lyr.name]:
if src not in order:
break
order.append(lyr)
return order
def forward(self, flag, x, output=[]):
'''Forward the input(s) through every layer.
If a layer has inputs from other layers and from x, the data from x is
ordered before the data from other layers, e.g., if layer 1 -> layer 2,
and x['layer 2'] has data, then the input of layer 2 is
flatten([x['layer 2'], output of layer 1])
Args:
flag: True for training; False for evaluation; could also be
model_pb2.kTrain or model_pb2.kEval, or other values for future
use.
x: a single SINGA tensor or a dictionary: layer name-> singa tensor
output(list): a list of layer names whose output would be returned
in addition to the default output
Returns:
if there is only one output layer, return its output tensor(s);
else return a dictionary: layer name -> output tensor(s)
'''
if self.ordered_layers is None:
self.ordered_layers = self.topo_sort(self.layers, self.src_of_layer)
if type(x) is dict:
input_of_layer = x
else:
assert isinstance(x, tensor.Tensor), \
'The inputs of a net should be dict or a single tensor'
input_of_layer = {self.ordered_layers[0].name: x}
output_of_layer = {} # outputs generated by each layer
ret = {} # outputs to return
for cur in self.ordered_layers:
inputs = []
if cur.name in input_of_layer:
if type(input_of_layer[cur.name]) is list:
inputs.extend(input_of_layer[cur.name])
else:
inputs.append(input_of_layer[cur.name])
srcs = self.src_of_layer[cur.name]
disp_src = ''
for src in srcs:
outs = output_of_layer[src.name]
if type(outs) == list:
assert len(outs) > 0, \
'the output from layer %s is empty' % src.name
inputs.append(outs[0])
outs.pop(0)
if len(outs) == 0:
output_of_layer.pop(src.name)
else:
inputs.append(outs)
output_of_layer[cur.name] = []
output_of_layer.pop(src.name)
if len(inputs) == 1:
inputs = inputs[0]
out = cur.forward(flag, inputs)
if verbose:
disp_src = '+'.join([src.name for src in srcs])
disp_src += '-->' + cur.name
if type(out) is list:
print '%s: %s' % (disp_src,
' '.join([str(o.l1()) for o in out]))
else:
print '%s: %f' % (disp_src, out.l1())
output_of_layer[cur.name] = out
if cur.name in output:
ret[cur.name] = out
# print lyr.name, x.l1()
# print output_of_layer
ret.update(output_of_layer)
if len(ret) == 1:
return ret.values()[0]
else:
return ret
def backward(self):
'''Run back-propagation after forward-propagation.
Returns:
a list of gradient tensor for all parameters
'''
if self.dst_of_layer is None:
self.dst_of_layer = {}
for cur in self.layers:
self.dst_of_layer[cur.name] = []
for cur in self.ordered_layers[1:]:
srcs = self.src_of_layer[cur.name]
for src in srcs:
self.dst_of_layer[src.name].append(cur)
grad = self.loss.backward()
if len(grad.shape) > 1:
grad /= grad.shape[0] # average across the batch
# print 'grad', grad.l1()
grads = [grad]
output_of_layer = {}
pgrads = []
for cur in reversed(self.ordered_layers):
for dst in self.dst_of_layer[cur.name]:
outputs = output_of_layer[dst.name]
if type(outputs) == list:
assert len(outputs) > 0, \
'the gradient from layer %s is empty' % dst.name
grads.append(outputs[0])
outputs.pop(0)
else:
grads.append(outputs)
output_of_layer[dst.name] = []
# del output_of_layer[dst.name]
if len(grads) == 1:
grads = grads[0]
outs, _pgrads = cur.backward(kTrain, grads)
pgrads.append(_pgrads)
if verbose:
disp_src = '+'.join(
[dst.name for dst in self.dst_of_layer[cur.name]])
disp_src += '-->' + cur.name
if type(outs) is list:
print '%s: %s' % (disp_src,
' '.join([str(o.l1()) for o in outs]))
else:
print '%s: %f' % (disp_src, outs.l1())
if type(outs) is list:
output_of_layer[cur.name] = outs[::-1]
else:
output_of_layer[cur.name] = outs
grads = []
ret = []
for pgrad in reversed(pgrads):
ret.extend(pgrad)
return ret
def save(self, f, buffer_size=10, use_pickle=False):
'''Save model parameters using io/snapshot.
Args:
f: file name
buffer_size: size (MB) of the IO, default setting is 10MB; Please
make sure it is larger than any single parameter object.
use_pickle(Boolean): if true, it would use pickle for dumping;
otherwise, it would use protobuf for serialization, which uses
less space.
'''
if use_pickle:
params = {}
for (specs, val) in zip(self.param_specs(), self.param_values()):
val.to_host()
params[specs.name] = tensor.to_numpy(val)
with open(f, 'wb') as fd:
pickle.dump(params, fd)
else:
sp = snapshot.Snapshot(f, True, buffer_size)
for (specs, val) in zip(self.param_specs(), self.param_values()):
val.to_host()
sp.write(specs.name, val)
def load(self, f, buffer_size=10, use_pickle=False):
'''Load model parameters using io/snapshot.
Please refer to the argument description in save().
'''
if use_pickle:
print 'NOTE: If your model was saved using Snapshot, '\
'then set use_pickle=False for loading it'
with open(f, 'rb') as fd:
params = pickle.load(fd)
for (specs, val) in zip(self.param_specs(),
self.param_values()):
try:
val.copy_from_numpy(params[specs.name])
except AssertionError as err:
print 'Error from copying values for param: %s' % specs.name
print 'shape of param vs checkpoint', val.shape, params[specs.name].shape
raise err
else:
print 'NOTE: If your model was saved using pickle, '\
'then set use_pickle=True for loading it'
sp = snapshot.Snapshot(f, False, buffer_size)
params = sp.read()
for (specs, val) in zip(self.param_specs(), self.param_values()):
val.copy_data(params[specs.name])