blob: c8ea3fc31b67a1c670eb05b82c3af8340542955a [file] [log] [blame]
/************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
#ifndef SINGA_NEURALNET_LAYER_H_
#define SINGA_NEURALNET_LAYER_H_
#include <string>
#include <vector>
#include "singa/proto/common.pb.h"
#include "singa/proto/job.pb.h"
#include "singa/utils/common.h"
#include "singa/utils/blob.h"
#include "singa/utils/param.h"
namespace singa {
using std::vector;
using std::string;
// TODO(wangwei) make AuxType a template argument for Layer.
using AuxType = int;
inline const string AddUnrollingPrefix(int unroll_idx, const string& name) {
return std::to_string(unroll_idx) + "#" + name;
}
inline const string AddPartitionSuffix(int partition_idx, const string& name) {
return name + "@" + std::to_string(partition_idx);
}
inline const string AddPrefixSuffix(int unroll_idx, int partition_idx,
const string& name) {
return std::to_string(unroll_idx) + "#" + name + "@" +
std::to_string(partition_idx);
}
/**
* Base layer class.
*
* Subclasses should implement at least
* Layer::ComputeFeature() and Layer::ComputGradient()
* functions in accordance with the NeuralNet::TrainOneBatch function.
*/
class Layer {
public:
/**
* Create a sub-layer instance based on proto.type();
*
* @param proto configuration of the layer instance.
* @return pointer to the newly created layer instance.
*/
static Layer* Create(const LayerProto& proto);
Layer() {}
virtual ~Layer() {}
/**
* Create for python binding, production test mode
*
*/
static Layer* CreateLayer(const string str);
static void SetupLayer(Layer* layer, const string str, const vector<Layer*>& srclayers);
/**
* Setup layer properties.
*
* Setup members e.g., shapes of Param objects based on the layer
* configuration and connected layers.
* It should check the partition setting when setup the properties.
*
* @param conf layer configuration.
* @param srclayers source layers that connect to this layer.
*/
virtual void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) {
layer_conf_ = conf;
datavec_.push_back(&data_);
gradvec_.push_back(&grad_);
}
/**
* Compute features of this layer based on connected layers.
*
* @param[in] flag set by the TrainOneBatch function, e.g., to indicate the
* running phase (kForward|kTrain, kForward|kTest, etc).
* @param[in] srclayers source layers that connect to this layer.
*/
virtual void ComputeFeature(int flag, const vector<Layer*>& srclayers) = 0;
/**
* Compute gradients for parameters associated with this layer.
* It may also compute the gradients of the loss w.r.t the source layers.
*
* \copydetails ComputeFeature().
*/
virtual void ComputeGradient(int flag, const vector<Layer*>& srclayers) = 0;
/**
* Layers that have paramters must override this function to return all Param
* objects associated with this layer.
*
* @return parameters associated with this layer.
*/
virtual const std::vector<Param*> GetParams() const {
return std::vector<Param*> {};
}
virtual void SetParams(std::vector<Param*>) {}
/**
* Return the connection type between one neuron of this layer and its source
* layer.
*
* Currently support two connection types: kOneToOne, and kOneToAll.
* - kOneToOne indicates the neuron depends on only one neuron from src layer.
* - kOneToAll indicates the neuron depends on all neurons from src layer.
* TODO(wangwei) support kOneToMany.
*
* @param[in] k index of source layer, current only support k = 0.
* @return connection type.
*/
virtual ConnectionType src_neuron_connection(int k) const {
// CHECK_LT(k, srclayers_.size());
return kOneToOne;
}
/**
* Return the connection type of this layer and all dst layers.
*
* Currently support two connection types: kOneToOne, and kOneToMany.
* - kOneToOne indicates the users implement the ComputeFeature and
* ComputeGradient function considering only one dst layer. In this case,
* a SplitLayer will be added automatically to connect this layer with all
* dest layer.
* - kOneToMany indicates this layer has already considered multiple dst
* layers in the implementation.
*
* @return connection type default is kOneToOne.
*/
virtual ConnectionType dst_layer_connection() const {
return kOneToOne;
}
/**
* To display layer info, e.g., aggreated loss/accuracy, or norm of feature
* vector and norm of parameters.
*
* @param[in] debug whether print the debug info
* @param[in] flag used to get the calling phase, e.g., forward of training
* (kForward | kTrain).
* @return info string about this layer, which is printed into the log.
*/
virtual const std::string ToString(bool debug, int flag);
/**
* @return partition dimension of this layer,
* - -1 for no partition.
* - 0 for partition on the data dimension, i.e., partitioning the mini-batch
* into sub-mini-batches.
* - 1 for partition this layer on feature dimension, i.e., the feature
* vector of each instance is partitioned into sub-vectors.
*/
inline int partition_dim() const {
CHECK_LE(layer_conf_.partition_dim(), 1);
return layer_conf_.partition_dim();
}
/**
* @return the partition ID (i.e., the worker ID to whom is layer is
* dispatched) of this layer, which is a sublayer partitioned from the
* original layer.
*/
inline int partition_id() const { return layer_conf_.partition_id(); }
/**
* @return total number of partitions (i.e., sub-layers) of the original
* layer of this layer.
*/
inline int num_partitions() const { return layer_conf_.num_partitions(); }
/**
* @return the type of this layer, only valid for built-in layer (types).
*/
inline LayerType type() const { return layer_conf_.type(); }
/**
* @return user-defined layer type.
*/
inline const std::string& user_type() const {
return layer_conf_.user_type();
}
/**
* Return name of this layer
*/
inline const std::string& name() const { return layer_conf_.name(); }
/**
* Return the index of the unrolled layer within the unrolling group, which
* should be [0, max_unrolling_length)
*/
inline const int unroll_index() const { return layer_conf_.unroll_index(); }
/**
* @return a const ref for Blob vector storing feature values of this layer.
*/
virtual const vector<Blob<float>*>& data() {
return datavec_;
}
/**
* @param[in] from pointer to one of the dst layer. For some layers, they have
* more than one data Blob. In this case, this argument identifies the layer
* that is requesting the data Blob.
* @return a const ref for Blob storing feature values of this layer.
* @deprecated {This function will be deleted, use
* virtual const vector<Blob<float>>& data() const or
* virtual const Blob<float>& data(int k) const instead}.
*/
virtual const Blob<float>& data(const Layer* from) {
return data_;
}
/**
* @return a const ref for the kth Blob.
* TODO(wangwei) if make this function const, there will be a warning
* indicating that data(const Layer*) and this function are ambiguous for
* data(0).
*/
virtual const Blob<float>& data(int k) {
return *datavec_.at(k);
}
/**
* @see data().
* @return the pointer to the Blob storing feature values of this layer.
* @deprecated {This function will be deleted, use
* virtual Blob<float>* mutable_data(int k) instead}.
*/
virtual Blob<float>* mutable_data(const Layer* from) {
return &data_;
}
/**
* @return the pointer to the kth Blob.
*/
virtual Blob<float>* mutable_data(int k) {
return datavec_.at(k);
}
/**
* @return auxiliary data, e.g., image label.
*/
virtual const vector<AuxType>& aux_data(const Layer* from = nullptr) {
return aux_data_;
}
/**
* @see data().
* @return the const ref of the Blob for the gradient of this layer, mainly
* used in BP algorithm.
* @deprecated {This function will be deleted, use
* virtual const vector<Blob<float>>& grad() const or
* virtual const Blob<float>& grad(int k) const instead}.
*/
virtual const Blob<float>& grad(const Layer* from) {
return grad_;
}
/**
* @see data().
* @return the const ref of the Blob vector for the gradient of this layer.
*/
virtual const vector<Blob<float>*>& grad() const {
return gradvec_;
}
/**
* @return the const ref of the kth Blob for the gradient of this layer.
*/
virtual const Blob<float>& grad(int k) const {
return *gradvec_.at(k);
}
/**
* @see data().
* @return a pointer to the Blob storing gradients of this layer, mainly
* used in BP algorithm.
*/
virtual Blob<float>* mutable_grad(const Layer* from) {
return &grad_;
}
/**
* @see data().
* @return a pointer to the kth Blob storing gradients of this layer, mainly
* used in BP algorithm.
*/
virtual Blob<float>* mutable_grad(int k) {
return gradvec_.at(k);
}
protected:
LayerProto layer_conf_;
Blob<float> data_, grad_;
vector<AuxType> aux_data_;
vector<Blob<float>*> datavec_, gradvec_;
};
/**************** Layer categories *****************/
/**
* Base layer for connecting layers when neural net is partitioned.
*/
class ConnectionLayer : virtual public Layer {
// defined as a layer category
};
/**
* Base layer for getting input data. May include layers for loading records,
* parsing records.
*/
class InputLayer : virtual public Layer {
public:
void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
ConnectionType dst_layer_connection() const override { return kOneToMany; }
Blob<float>* mutable_grad(const Layer* layer) override {
return nullptr;
// LOG(FATAL) << "Input layer has no gradient blob";
}
const Blob<float>& grad(const Layer* from) override {
return grad_;
// LOG(FATAL) << "Input layer has no gradient blob";
}
};
using SingleLabelImageRecord = RecordProto;
/**
* Base layer for feature transformation, e.g., ConvolutionLayer, PoolingLayer,
* etc.
*/
class NeuronLayer : virtual public Layer {
// defined as a layer category
};
/**
* Base layer for calculating loss and doing BackPropagation.
*/
class LossLayer : virtual public Layer {
public:
Blob<float>* mutable_grad(const Layer* layer) override {
return nullptr;
// LOG(FATAL) << "Loss layer has no gradient blob";
}
const Blob<float>& grad(const Layer* from) override {
return grad_;
// LOG(FATAL) << "Loss layer has no gradient blob";
}
};
/**
* Base layer for collecting features into disk file, HTTP stream, etc.
*/
class OutputLayer : virtual public Layer {
public:
void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
Blob<float>* mutable_grad(const Layer* layer) override {
return nullptr;
// LOG(FATAL) << "Output layer has no gradient blob";
}
const Blob<float>& grad(const Layer* from) override {
return grad_;
// LOG(FATAL) << "Output layer has no gradient blob";
}
};
} // namespace singa
#endif // SINGA_NEURALNET_LAYER_H_