blob: ca07a19d6842871df3471f7be71d3fc59ec06c3d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SINGA_MODEL_LAYER_H_
#define SINGA_MODEL_LAYER_H_
#include <vector>
#include <string>
#include <stack>
#include <utility>
#include <memory>
#include "singa/core/tensor.h"
#include "singa/proto/model.pb.h"
#include "singa/utils/factory.h"
namespace singa {
typedef vector<size_t> Shape;
/// The base layer class.
/// Generally, a layer conducts feature transformation against a set of Tensor
/// to generate a set of Tensor. Each layer may have some parameters.
class Layer {
public:
Layer() = default;
/// Set meta data fields from a string representing a proto message.
/// 'in_shape' is the shape of the input feature for one sample
void Setup(const Shape& in_shape, const string& proto_str) {
LayerConf conf;
conf.ParseFromString(proto_str);
this->Setup(in_shape, conf);
}
/// 'in_shapes' is the shape of the input feature for one sample
void Setup(const vector<Shape>& in_shapes, const string& proto_str) {
LayerConf conf;
conf.ParseFromString(proto_str);
this->Setup(in_shapes, conf);
}
// ============= Following Functions could be override =====================
/// Destruct objects created by this layer.
virtual ~Layer() {};
/// Each layer sub-class would optionaly have a type name.
/// Used for debugging and logging.
virtual const std::string layer_type() const { return "Unknown"; }
/// Set meta data fields configured in 'conf' (a proto message).
/// Some layers would use input tensor shapes for setting its parameter
/// shapes (e.g, desen layer and convolution layer). 'in_shape' provides such
/// shape info. It represents the shape of the Tensor (with a single sample)
/// from the last layer.
/// After calling Setup, the shape info of parameters should be accssed
/// correctly. Internal buffer/fields are set assuming batchsize is 1.
virtual void Setup(const Shape& in_sample, const LayerConf& conf) {
name_ = conf.name();
// TODO(wangwei) load param values from checkpoint files.
}
/// Used for layers that have multiple input tensors, e.g., concatenate layer.
virtual void Setup(const vector<Shape>& in_samples, const LayerConf& conf) {
name_ = conf.name();
// TODO(wangwei) load param values from checkpoint files.
}
/// Return the shape of the generated Tensor without the batchsize dimension
virtual const Shape GetOutputSampleShape() const {
LOG(FATAL) << "Pls override this function";
return vector<size_t>{};
}
/// Return the shape of the k-th generated tensor without the batchsize
/// dimension. Used for layers that generate multiple tensors.
virtual const Shape GetOutputSampleShape(int k) {
LOG(FATAL) << "Pls override this function";
return vector<size_t>{};
}
/// Do feature transformation for the given 'input' tensor (denoted as x).
/// 'flag' is either kTrain or kEval for feed-forward nets, and
/// would be used for other phases of training other nets. For example, when
/// training RBM, we may create an alias of this function as ComputeFeature
/// where flag could be kPositive and kNegative.
/// It will return a Tensor (denoted as y).
/// If the 'input' or 'output' is required for computing the gradients in
/// Backward(), then buffer them as internal data.
virtual const Tensor Forward(int flag, const Tensor& input) {
LOG(FATAL) << "Not implemented";
Tensor t;
return t;
}
/// \copydoc Forward(int flag, const Tensor& input)
/// Accept multiple input tensors and generate multiple output tensors.
/// If there is only one input tensor, it will call Forward(int, const
/// Tensor&) by default. Users can override this function for layers who
/// generate more than one outputs.
virtual const vector<Tensor> Forward(int flag, const vector<Tensor>& inputs) {
vector<Tensor> ret;
if (inputs.size() == 1) ret.push_back(Forward(flag, inputs.at(0)));
LOG(FATAL) << "Not implemented";
return ret;
}
/// Compute gradients of this layer.
/// Specifically, there are two types of gradients:
/// 1. gradient of the preceding layer, i.e., dx.
/// 2. gradients of parameters of this layer, e.g., dw for weight matrix.
/// 1 is an empty tensor if there is no preceding layer or there is no need to
/// compute dx (e.g., x is from a data layer); 2 is an empty vector if this
// layer has no parameters.
/// 'flag' is either kTrain or kEval for feed-forward nets, and
/// would be used for other phases when training other nets.
/// 'grad' is a Tensor for gradient (dy) from the upper layer.
virtual const std::pair<Tensor, vector<Tensor>> Backward(int flag,
const Tensor& grad) {
LOG(FATAL) << "Not implemented!";
Tensor t;
return std::make_pair(t, vector<Tensor>{});
}
/// \copydoc Backward(int, const vector<Tensor>&)
/// For Forward(int, const vector<Tensor>&)
virtual const std::pair<vector<Tensor>, vector<Tensor>> Backward(
int flag, const vector<Tensor>& grads) {
vector<Tensor> input_grad, param_grad;
if (grads.size() == 1u) {
auto ret = Backward(flag, grads.at(0));
input_grad.push_back(ret.first);
param_grad = ret.second;
} else {
LOG(FATAL) << "Not implemented";
}
return std::make_pair(input_grad, param_grad);
}
/// Clone the layer to the given device. Layer data (e.g., parameters) are
/// deep copied. If 'device' is nullptr, then clone it one the current device.
// virtual Layer* Clone(std::shared_ptr<Device> device);
/// Move the layer (including its parameters and other internal Tensor) onto
/// the given device
virtual void ToDevice(std::shared_ptr<Device> device) {
}
/// Set the data type of Tensor in this layer.
virtual void AsType(DataType dtype) {
}
/// Serialize the layer info (including params) into a LayerConf proto message
virtual void ToProto(LayerConf* conf) const {
//conf->set_name(name_);
//for (const auto& spec : param_specs_) {
// ParamSpec* p = conf->add_param();
// p->CopyFrom(spec);
//}
// TODO(wangwei) add param values into conf;
}
// ========================================================================
/// Serialize the layer info, including params_, into a string representing
/// a LayerParameter message.
std::string ToProtoStr() const {
LayerConf conf;
ToProto(&conf);
string str;
conf.SerializeToString(&str);
return str;
}
/// Return specs/configuration of all parameter instances of this layer.
/// \ref ParamSpec.
const vector<ParamSpec> param_specs() { return param_specs_; }
/// Return the i-th ParamSpec.
const ParamSpec& param_specs(size_t i) {
CHECK_LT(i, param_specs_.size());
return param_specs_.at(i);
}
/// Return pointers to parameter Tensor s.
virtual const vector<Tensor> param_values() {
return vector<Tensor>{};
}
/// Return names of all parmaeters.
const vector<string> param_names() {
vector<string> pname;
for (const auto& spec : param_specs_) pname.push_back(spec.name());
return pname;
}
/// Return the 'i'-th parameter name.
const string& param_name(size_t i) {
CHECK_LT(i, param_specs_.size());
return param_specs_.at(i).name();
}
/// Each layer instance would optionally have a name.
/// Used for debugging and logging.
const std::string name() const { return name_; }
protected:
std::string name_;
vector<ParamSpec> param_specs_;
};
/// Name should be formated as cudnn_xxx, singacpp_xxx, singacuda_xxx,
/// singacl_xxx, where xxx is the real layer type, e.g., convolution, relu, etc.
/// xxx should only have lower case letters.
/// if the implmentation is transparent to cpp/cuda/opencl, then register all
/// possible identifiers. For instance, Dropout is registered three times,
/// RegisterLayerClass("singacpp_dropout", Dropout)
/// RegisterLayerClass("singacl_dropout", Dropout)
/// RegisterLayerClass("singacuda_dropout", Dropout)
/// to be compatible with previous commits, the following identifier is
/// registered. Better avoid using it, as it would be deprecated.
/// RegisterLayerClass("singa_dropout", Dropout)
#define RegisterLayerClass(Name, SubLayer) \
static Registra<Layer, SubLayer> Name##SubLayer(#Name);
inline std::shared_ptr<Layer> CreateLayer(const std::string type) {
std::shared_ptr<Layer> layer(Factory<Layer>::Create(type));
return layer;
}
inline const std::vector<std::string> GetRegisteredLayers() {
vector<std::string> ret;
for (const string type : Factory<Layer>::GetIDs()) {
auto layer = CreateLayer(type);
ret.push_back("Register type: " + type);
}
return ret;
}
} // namespace singa
#endif // SINGA_MODEL_LAYER_H_