| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================= |
| """ |
| Nerual net class for constructing the nets using layers and providing access |
| functions for net info, e.g., parameters. |
| """ |
| |
| |
| from .proto.model_pb2 import kTrain, kEval |
| import tensor |
| import layer |
| import cPickle as pickle |
| |
| |
| class FeedForwardNet(object): |
| |
| def __init__(self, loss=None, metric=None): |
| self.loss = loss |
| self.metric = metric |
| self.layers = [] |
| self.src_of_layer = {} |
| self.dst_of_layer = None |
| self.ordered_layers = None |
| |
| def to_device(self, dev): |
| for lyr in self.layers: |
| lyr.to_device(dev) |
| |
| def add(self, lyr, src=None): |
| """Append a layer into the layer list. |
| |
| This function will get the sample shape from the last layer to setup |
| the newly added layer. For the first layer, it is setup outside. |
| The calling function should ensure the correctness of the layer order. |
| |
| Args: |
| lyr (Layer): the layer to be added |
| """ |
| if src is not None: |
| if isinstance(src, layer.Layer): |
| assert src.has_setup is True, 'the source layer must be set up' |
| self.src_of_layer[lyr.name] = [src] |
| else: |
| assert type(src) == list, 'the src must be a list of layers' |
| self.src_of_layer[lyr.name] = src |
| # print 'merge------', len(src) |
| else: |
| assert len(self.layers) > 0 or lyr.has_setup, \ |
| 'Source layers are needed to set up this layer' |
| if len(self.layers) > 0: |
| self.src_of_layer[lyr.name] = [self.layers[-1]] |
| else: |
| self.src_of_layer[lyr.name] = [] |
| if lyr.has_setup is False: |
| # print shape |
| in_shape = self.src_of_layer[lyr.name][0].get_output_sample_shape() |
| lyr.setup(in_shape) |
| print lyr.name, lyr.get_output_sample_shape() |
| self.layers.append(lyr) |
| return lyr |
| |
| def param_values(self): |
| values = [] |
| layers = self.layers |
| if self.ordered_layers is not None: |
| layers = self.ordered_layers |
| for lyr in layers: |
| values.extend(lyr.param_values()) |
| return values |
| |
| def param_specs(self): |
| specs = [] |
| layers = self.layers |
| if self.ordered_layers is not None: |
| layers = self.ordered_layers |
| for lyr in layers: |
| specs.extend(lyr.param_specs) |
| return specs |
| |
| def param_names(self): |
| return [spec.name for spec in self.param_specs()] |
| |
| def train(self, x, y): |
| out = self.forward(kTrain, x) |
| l = self.loss.forward(kTrain, out, y) |
| if self.metric is not None: |
| m = self.metric.evaluate(out, y) |
| return self.backward(), (l.l1(), m) |
| |
| def evaluate(self, x, y): |
| """Evaluate the loss and metric of the given data""" |
| out = self.forward(kEval, x) |
| l = None |
| m = None |
| assert self.loss is not None or self.metric is not None,\ |
| 'Cannot do evaluation, as neither loss nor metic is set' |
| if self.loss is not None: |
| l = self.loss.evaluate(kEval, out, y) |
| if self.metric is not None: |
| m = self.metric.evaluate(out, y) |
| return l, m |
| |
| def predict(self, x): |
| xx = self.forward(kEval, x) |
| return tensor.softmax(xx) |
| |
| def topo_sort(self, cur, src_of_layer, visited=None, order=None): |
| if visited is None: |
| visited = {} |
| for name in src_of_layer.keys(): |
| visited[name] = False |
| order = [] |
| srcs = src_of_layer[cur.name] |
| for src in srcs: |
| if visited[src.name] is False: |
| visited[src.name] = True |
| self.topo_sort(src, src_of_layer, visited, order) |
| order.append(cur) |
| visited[cur.name] = True |
| return order |
| |
| def forward(self, flag, x): |
| # print x.l1() |
| if self.ordered_layers is None: |
| self.ordered_layers = self.topo_sort(self.layers[-1], |
| self.src_of_layer) |
| inputs = [x] |
| output_of_layer = {} |
| for cur in self.ordered_layers: |
| srcs = self.src_of_layer[cur.name] |
| disp_src = cur.name + '<--' |
| for src in srcs: |
| outs = output_of_layer[src.name] |
| if type(outs) == list: |
| inputs.append(outs[0]) |
| else: |
| inputs.append(outs) |
| disp_src += '+' + src.name |
| # del output_of_layer[src.name] |
| # print disp_src |
| if len(inputs) == 1: |
| inputs = inputs[0] |
| output_of_layer[cur.name] = cur.forward(flag, inputs) |
| inputs = [] |
| # print lyr.name, x.l1() |
| # print output_of_layer |
| return output_of_layer[self.ordered_layers[-1].name] |
| |
| def backward(self): |
| if self.dst_of_layer is None: |
| self.dst_of_layer = {} |
| for cur in self.layers: |
| self.dst_of_layer[cur.name] = [] |
| for cur in self.ordered_layers[1:]: |
| srcs = self.src_of_layer[cur.name] |
| for src in srcs: |
| self.dst_of_layer[src.name].append(cur) |
| grad = self.loss.backward() |
| if len(grad.shape) > 1: |
| grad /= grad.shape[0] # average across the batch |
| # print 'grad', grad.l1() |
| grads = [grad] |
| output_of_layer = {} |
| pgrads = [] |
| for cur in reversed(self.ordered_layers): |
| for dst in self.dst_of_layer[cur.name]: |
| outputs = output_of_layer[dst.name] |
| if type(outputs) == list: |
| grads.append(outputs[0]) |
| else: |
| grads.append(outputs) |
| # del output_of_layer[dst.name] |
| if len(grads) == 1: |
| grads = grads[0] |
| outs, _pgrads = cur.backward(kTrain, grads) |
| pgrads.append(_pgrads) |
| output_of_layer[cur.name] = outs |
| grads = [] |
| |
| ret = [] |
| for pgrad in reversed(pgrads): |
| ret.extend(pgrad) |
| return ret |
| |
| def save(self, f): |
| """Save model parameters using cpickle""" |
| params = {} |
| for (specs, val) in zip(self.param_specs(), self.param_values()): |
| val.to_host() |
| params[specs.name] = tensor.to_numpy(val) |
| with open(f, 'wb') as fd: |
| pickle.dump(params, fd) |
| |
| def load(self, f): |
| """Load model parameters using cpickle""" |
| with open(f, 'rb') as fd: |
| params = pickle.load(fd) |
| for (specs, val) in zip(self.param_specs(), self.param_values()): |
| val.copy_from_numpy(params[specs.name]) |