blob: 1bf112cc0947a23e1a26cfd4dd009eae751dc7ff [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SINGA_MODEL_FEED_FORWARD_NET_H_
#define SINGA_MODEL_FEED_FORWARD_NET_H_
#include "singa/model/layer.h"
#include "singa/model/loss.h"
#include "singa/model/metric.h"
#include "singa/model/updater.h"
#include <thread>
#include <memory>
namespace singa {
/// The feed-forward neural net.
/// It provides functions for constructing the layers, access layer parameters,
/// and conducting training, evaluation and prediction.
class FeedForwardNet {
public:
FeedForwardNet() = default;
/// Delete all layers.
~FeedForwardNet();
/// Add a layer with the assumption that
/// 1. this function is called in correct order, i.e., the layers are added
/// following the topological order.
/// 2. this layer has already been setup (Setup function is called outside).
/// The layer will be freed in the destructor of FeedForwardNet.
std::shared_ptr<Layer> Add(std::shared_ptr<Layer> layer);
// TODO(wangwei) add ConcatenateLayer and SliceLayer
// AddConcatenateLayer(vector<Layer*> src, Layer *dst);
// AddSliceLayer(Layer* layer, vector<Layer*> dst);
/// Add a layer by providing its configuration, and setup it.
/// Assume the layer is added in corret order.
/// For the first layer, 'sample_shape' (the input sample shape) is necessary
/// for calling Setup().
std::shared_ptr<Layer> Add(const LayerConf& conf,
const Shape* sample_shape = nullptr);
/// Set some fields used for training and evaluating the neural net.
/// This method will instantiate an Updater ,then wrap the Optimier into
/// Updater and always register the parameters of the net instance.
/// If the neural net is constructed for evaluation only, then 'opt' is not
/// necessary; But for training, both 'opt' and 'loss' are necessary.
/// 'shuffle' indicates shuffling training samples within one epoch it is
/// valid using Train(). If to_register is set true, parameter will be
/// registered in Updater.;
void Compile(bool shuffle, Optimizer* opt, Loss* loss, Metric* metric);
/// Set some fields used for training and evaluating the neural net.
/// This method is mainly used in parallel training, where we need
/// multiple neuralnet instances.
/// If the neural net is constructed for evaluation only, then 'updater' is
/// not
/// necessary; But for training, both 'opt' and 'loss' are necessary.
/// 'shuffle' indicates shuffling training samples within one epoch it is
/// valid using Train(). If to_register is set true, parameter will be
/// registered in Updater.;
void Compile(bool shuffle, bool to_register, std::shared_ptr<Updater> updater,
Loss* loss, Metric* metric);
/// Conduct the training giving the training data 'x' and label 'y'.
/// 'val_split' of training data is used for
/// validation. Validation is performance before every epoch.
/// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is
/// typically used for small training datasets, e.g., cifar10 and MNIST which
/// can be stored in main memory.
void Train(size_t batchsize, int nb_epoch, const Tensor& x, const Tensor& y,
float val_split = 0.0f);
/// Conduct the training given the training and validation data.
/// Validation is performance before every epoch.
/// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is
/// typically used for small training datasets, e.g., cifar10 and MNIST which
/// can be stored in main memory.
void Train(size_t batchsize, int nb_epoch, const Tensor& x, const Tensor& y,
const Tensor& val_x, const Tensor& val_y);
/// Train the neural net over one batch of training data.
const std::pair<float, float> TrainOnBatch(int epoch, const Tensor& x,
const Tensor& y);
/// Evaluate the neural net with given data.
/// Returns one tensor for loss values and one tensor for metric values;
/// Each sample would have a loss value and a metric value (if 'metic' is set
/// in Compile()).'batchsize' is used for controlling the memory footprint.
/// It should be smaller than the total number of samples.
/// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is
/// typically used for small training datasets, e.g., cifar10 and MNIST which
/// can be stored in main memory.
std::pair<Tensor, Tensor> Evaluate(const Tensor& x, const Tensor& y,
size_t batchsize = 128);
/// Evaluate the neural net for one batch of data
std::pair<Tensor, Tensor> EvaluateOnBatch(const Tensor& x, const Tensor& y);
/// Predict the probability distributation over candicate classes for each
/// data sample. 'batchsize' is used for controlling the memory footprint.
/// It should be smaller than the total number of samples.
/// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is
/// typically used for small training datasets, e.g., cifar10 and MNIST which
/// can be stored in main memory.
const Tensor Predict(const Tensor& x, size_t batchsize = 128);
/// Predict for one batch data.
const Tensor PredictOnBatch(const Tensor& x);
/// Forward layers one by one using the data batch 'x'.
/// Returns the prediction results (from the last layer).
const Tensor Forward(int flag, const Tensor& x);
/// Backward layers one by one using the gradient batch 'grad'.
/// Returns the parameter gradients.
const vector<Tensor> Backward(int flag, const Tensor& grad);
/// Clone the neuaral net by cloning every layer to the given device.
/// If 'device' is nullptr, then clone it one the current device.
FeedForwardNet Clone(std::shared_ptr<Device> device);
/// Move the layer data to the given device.
void ToDevice(std::shared_ptr<Device> device);
void ToHost() { ToDevice(defaultDevice); }
/// Set the data type of each layer.
void AsType(DataType dtype);
/// A wrapper method to spawn a thread to execute Train() method.
std::thread TrainThread(size_t batchsize, int nb_epoch, const Tensor& x,
const Tensor& y, const Tensor& val_x,
const Tensor& val_y) {
return std::thread(
[=]() { Train(batchsize, nb_epoch, x, y, val_x, val_y); });
}
/// A wrapper method to spawn a thread to execute Train() method.
std::thread TrainThread(size_t batchsize, int nb_epoch, const Tensor& x,
const Tensor& y) {
return std::thread([=]() { Train(batchsize, nb_epoch, x, y); });
}
const vector<std::shared_ptr<Layer>> layers() const { return layers_; }
const vector<string> GetParamNames() const;
const vector<ParamSpec> GetParamSpecs() const;
const vector<Tensor> GetParamValues() const;
protected:
vector<std::shared_ptr<Layer>> layers_;
std::shared_ptr<Updater> updater_;
Loss* loss_;
Metric* metric_;
bool shuffle_ = true;
Device* device_ = nullptr;
DataType dtype_ = kFloat32;
};
} /* singa */
#endif // SINGA_MODEL_FEED_FORWARD_NET_H_