include/singa/neuralnet/layer.h - singa - Git at Google

 /************************************************************
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *************************************************************/

 #ifndef SINGA_NEURALNET_LAYER_H_
 #define SINGA_NEURALNET_LAYER_H_

 #include <string>
 #include <vector>
 #include "singa/proto/common.pb.h"
 #include "singa/proto/job.pb.h"
 #include "singa/utils/common.h"
 #include "singa/utils/blob.h"
 #include "singa/utils/param.h"

 namespace singa {
 using std::vector;
 using std::string;

 // TODO(wangwei) make AuxType a template argument for Layer.
 using AuxType = int;

 inline const string AddUnrollingPrefix(int unroll_idx, const string& name) {
   return std::to_string(unroll_idx) + "#" + name;
 }
 inline const string AddPartitionSuffix(int partition_idx, const string& name) {
   return name + "@" + std::to_string(partition_idx);
 }


 inline const string AddPrefixSuffix(int unroll_idx, int partition_idx,
     const string& name) {
   return std::to_string(unroll_idx) + "#" + name + "@" +
     std::to_string(partition_idx);
 }
 /**
  * Base layer class.
  *
  * Subclasses should implement at least
  * Layer::ComputeFeature() and Layer::ComputGradient()
  * functions in accordance with the NeuralNet::TrainOneBatch function.
  */

 class Layer {
  public:
   /**
    * Create a sub-layer instance based on proto.type();
    *
    * @param proto configuration of the layer instance.
    * @return pointer to the newly created layer instance.
    */
   static Layer* Create(const LayerProto& proto);

   Layer() {}
   virtual ~Layer() {}

   /**
    * Create for python binding, production test mode
    *
    */
   static Layer* CreateLayer(const string str);
   static void SetupLayer(Layer* layer, const string str, const vector<Layer*>& srclayers);

   /**
    * Setup layer properties.
    *
    * Setup members e.g., shapes of Param objects based on the layer
    * configuration and connected layers.
    * It should check the partition setting when setup the properties.
    *
    * @param conf layer configuration.
    * @param srclayers source layers that connect to this layer.
    */
   virtual void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) {
     layer_conf_ = conf;
     datavec_.push_back(&data_);
     gradvec_.push_back(&grad_);
   }


   /**
    * Compute features of this layer based on connected layers.
    *
    * @param[in] flag set by the TrainOneBatch function, e.g., to indicate the
    * running phase (kForward|kTrain, kForward|kTest, etc).
    * @param[in] srclayers source layers that connect to this layer.
    */
   virtual void ComputeFeature(int flag, const vector<Layer*>& srclayers) = 0;
   /**
    * Compute gradients for parameters associated with this layer.
    * It may also compute the gradients of the loss w.r.t the source layers.
    *
    * \copydetails ComputeFeature().
    */
   virtual void ComputeGradient(int flag, const vector<Layer*>& srclayers) = 0;
   /**
    * Layers that have paramters must override this function to return all Param
    * objects associated with this layer.
    *
    * @return parameters associated with this layer.
    */
   virtual const std::vector<Param*> GetParams() const {
     return std::vector<Param*> {};
   }
   virtual void SetParams(std::vector<Param*>) {}
   /**
    * Return the connection type between one neuron of this layer and its source
    * layer.
    *
    * Currently support two connection types: kOneToOne, and kOneToAll.
    * - kOneToOne indicates the neuron depends on only one neuron from src layer.
    * - kOneToAll indicates the neuron depends on all neurons from src layer.
    * TODO(wangwei) support kOneToMany.
    *
    * @param[in] k index of source layer, current only support k = 0.
    * @return connection type.
    */
   virtual ConnectionType src_neuron_connection(int k) const {
     // CHECK_LT(k, srclayers_.size());
     return kOneToOne;
   }
   /**
    * Return the connection type of this layer and all dst layers.
    *
    * Currently support two connection types: kOneToOne, and kOneToMany.
    * - kOneToOne indicates the users implement the ComputeFeature and
    * ComputeGradient function considering only one dst layer. In this case,
    * a SplitLayer will be added automatically to connect this layer with all
    * dest layer.
    * - kOneToMany indicates this layer has already considered multiple dst
    *   layers in the implementation.
    *
    * @return connection type default is kOneToOne.
    */
   virtual ConnectionType dst_layer_connection() const {
     return kOneToOne;
   }
   /**
    * To display layer info, e.g., aggreated loss/accuracy, or norm of feature
    * vector and norm of parameters.
    *
    * @param[in] debug whether print the debug info
    * @param[in] flag used to get the calling phase, e.g., forward of training
    * (kForward | kTrain).
    * @return info string about this layer, which is printed into the log.
    */
   virtual const std::string ToString(bool debug, int flag);
   /**
    * @return partition dimension of this layer,
    * - -1 for no partition.
    * -  0 for partition on the data dimension, i.e., partitioning the mini-batch
    *    into sub-mini-batches.
    * -  1 for partition this layer on feature dimension, i.e., the feature
    *    vector of each instance is partitioned into sub-vectors.
    */
   inline int partition_dim() const {
     CHECK_LE(layer_conf_.partition_dim(), 1);
     return layer_conf_.partition_dim();
   }
   /**
    * @return the partition ID (i.e., the worker ID to whom is layer is
    * dispatched) of this layer, which is a sublayer partitioned from the
    * original layer.
    */
   inline int partition_id() const { return layer_conf_.partition_id(); }
   /**
    * @return total number of partitions (i.e., sub-layers) of the original
    * layer of this layer.
    */
   inline int num_partitions() const { return layer_conf_.num_partitions(); }
   /**
    * @return the type of this layer, only valid for built-in layer (types).
    */
   inline LayerType type() const { return layer_conf_.type(); }
   /**
    * @return user-defined layer type.
    */
   inline const std::string& user_type() const {
     return layer_conf_.user_type();
   }
   /**
    * Return name of this layer
    */
   inline const std::string& name() const { return layer_conf_.name(); }
   /**
    * Return the index of the unrolled layer within the unrolling group, which
    * should be [0, max_unrolling_length)
    */
   inline const int unroll_index() const { return layer_conf_.unroll_index(); }

   /**
    * @return a const ref for Blob vector storing feature values of this layer.
    */
   virtual const vector<Blob<float>*>& data() {
     return datavec_;
   }

   /**
    * @param[in] from pointer to one of the dst layer. For some layers, they have
    * more than one data Blob. In this case, this argument identifies the layer
    * that is requesting the data Blob.
    * @return a const ref for Blob storing feature values of this layer.
    * @deprecated {This function will be deleted, use
    * virtual const vector<Blob<float>>& data() const or
    * virtual const Blob<float>& data(int k) const instead}.
    */
   virtual const Blob<float>& data(const Layer* from) {
     return data_;
   }
   /**
    * @return a const ref for the kth Blob.
    * TODO(wangwei) if make this function const, there will be a warning
    * indicating that data(const Layer*) and this function are ambiguous for
    * data(0).
    */
   virtual const Blob<float>& data(int k) {
     return *datavec_.at(k);
   }

   /**
    * @see data().
    * @return the pointer to the Blob storing feature values of this layer.
    * @deprecated {This function will be deleted, use
    * virtual Blob<float>* mutable_data(int k) instead}.
    */
   virtual Blob<float>* mutable_data(const Layer* from) {
     return &data_;
   }
   /**
    * @return the pointer to the kth Blob.
    */
   virtual Blob<float>* mutable_data(int k) {
     return datavec_.at(k);
   }
   /**
    * @return auxiliary data, e.g., image label.
    */
   virtual const vector<AuxType>& aux_data(const Layer* from = nullptr) {
     return aux_data_;
   }
   /**
    * @see data().
    * @return the const ref of the Blob for the gradient of this layer, mainly
    * used in BP algorithm.
    * @deprecated {This function will be deleted, use
    * virtual const vector<Blob<float>>& grad() const or
    * virtual const Blob<float>& grad(int k) const instead}.
    */
   virtual const Blob<float>& grad(const Layer* from) {
     return grad_;
   }
   /**
    * @see data().
    * @return the const ref of the Blob vector for the gradient of this layer.
    */
   virtual const vector<Blob<float>*>& grad() const {
     return gradvec_;
   }
   /**
    * @return the const ref of the kth Blob for the gradient of this layer.
    */
   virtual const Blob<float>& grad(int k) const {
     return *gradvec_.at(k);
   }
   /**
    * @see data().
    * @return a pointer to the Blob storing gradients of this layer, mainly
    * used in BP algorithm.
    */
   virtual Blob<float>* mutable_grad(const Layer* from) {
     return &grad_;
   }
   /**
    * @see data().
    * @return a pointer to the kth Blob storing gradients of this layer, mainly
    * used in BP algorithm.
    */
   virtual Blob<float>* mutable_grad(int k) {
     return gradvec_.at(k);
   }

  protected:
   LayerProto layer_conf_;
   Blob<float> data_, grad_;
   vector<AuxType> aux_data_;
   vector<Blob<float>*> datavec_, gradvec_;
 };
 /**************** Layer categories *****************/
 /**
  * Base layer for connecting layers when neural net is partitioned.
  */
 class ConnectionLayer : virtual public Layer {
   // defined as a layer category
 };


 /**
  * Base layer for getting input data. May include layers for loading records,
  * parsing records.
  */
 class InputLayer : virtual public Layer {
  public:
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
   ConnectionType dst_layer_connection() const override { return kOneToMany; }
   Blob<float>* mutable_grad(const Layer* layer) override {
     return nullptr;
     // LOG(FATAL) << "Input layer has no gradient blob";
   }
   const Blob<float>& grad(const Layer* from) override {
     return grad_;
     // LOG(FATAL) << "Input layer has no gradient blob";
   }
 };

 using SingleLabelImageRecord = RecordProto;

 /**
  * Base layer for feature transformation, e.g., ConvolutionLayer, PoolingLayer,
  * etc.
  */
 class NeuronLayer : virtual public Layer {
   // defined as a layer category
 };


 /**
  * Base layer for calculating loss and doing BackPropagation.
  */
 class LossLayer : virtual public Layer {
  public:
   Blob<float>* mutable_grad(const Layer* layer) override {
     return nullptr;
     // LOG(FATAL) << "Loss layer has no gradient blob";
   }
   const Blob<float>& grad(const Layer* from) override {
     return grad_;
     // LOG(FATAL) << "Loss layer has no gradient blob";
   }
 };

 /**
  * Base layer for collecting features into disk file, HTTP stream, etc.
  */
 class OutputLayer : virtual public Layer {
  public:
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
   Blob<float>* mutable_grad(const Layer* layer) override {
     return nullptr;
     // LOG(FATAL) << "Output layer has no gradient blob";
   }
   const Blob<float>& grad(const Layer* from) override {
     return grad_;
     // LOG(FATAL) << "Output layer has no gradient blob";
   }
 };


 }  // namespace singa
 #endif  // SINGA_NEURALNET_LAYER_H_
	/************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/

	#ifndef SINGA_NEURALNET_LAYER_H_
	#define SINGA_NEURALNET_LAYER_H_

	#include <string>
	#include <vector>
	#include "singa/proto/common.pb.h"
	#include "singa/proto/job.pb.h"
	#include "singa/utils/common.h"
	#include "singa/utils/blob.h"
	#include "singa/utils/param.h"

	namespace singa {
	using std::vector;
	using std::string;

	// TODO(wangwei) make AuxType a template argument for Layer.
	using AuxType = int;

	inline const string AddUnrollingPrefix(int unroll_idx, const string& name) {
	return std::to_string(unroll_idx) + "#" + name;
	}
	inline const string AddPartitionSuffix(int partition_idx, const string& name) {
	return name + "@" + std::to_string(partition_idx);
	}


	inline const string AddPrefixSuffix(int unroll_idx, int partition_idx,
	const string& name) {
	return std::to_string(unroll_idx) + "#" + name + "@" +
	std::to_string(partition_idx);
	}
	/**
	* Base layer class.
	*
	* Subclasses should implement at least
	* Layer::ComputeFeature() and Layer::ComputGradient()
	* functions in accordance with the NeuralNet::TrainOneBatch function.
	*/

	class Layer {
	public:
	/**
	* Create a sub-layer instance based on proto.type();
	*
	* @param proto configuration of the layer instance.
	* @return pointer to the newly created layer instance.
	*/
	static Layer* Create(const LayerProto& proto);

	Layer() {}
	virtual ~Layer() {}

	/**
	* Create for python binding, production test mode
	*
	*/
	static Layer* CreateLayer(const string str);
	static void SetupLayer(Layer* layer, const string str, const vector<Layer*>& srclayers);

	/**
	* Setup layer properties.
	*
	* Setup members e.g., shapes of Param objects based on the layer
	* configuration and connected layers.
	* It should check the partition setting when setup the properties.
	*
	* @param conf layer configuration.
	* @param srclayers source layers that connect to this layer.
	*/
	virtual void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) {
	layer_conf_ = conf;
	datavec_.push_back(&data_);
	gradvec_.push_back(&grad_);
	}


	/**
	* Compute features of this layer based on connected layers.
	*
	* @param[in] flag set by the TrainOneBatch function, e.g., to indicate the
	* running phase (kForward\|kTrain, kForward\|kTest, etc).
	* @param[in] srclayers source layers that connect to this layer.
	*/
	virtual void ComputeFeature(int flag, const vector<Layer*>& srclayers) = 0;
	/**
	* Compute gradients for parameters associated with this layer.
	* It may also compute the gradients of the loss w.r.t the source layers.
	*
	* \copydetails ComputeFeature().
	*/
	virtual void ComputeGradient(int flag, const vector<Layer*>& srclayers) = 0;
	/**
	* Layers that have paramters must override this function to return all Param
	* objects associated with this layer.
	*
	* @return parameters associated with this layer.
	*/
	virtual const std::vector<Param*> GetParams() const {
	return std::vector<Param*> {};
	}
	virtual void SetParams(std::vector<Param*>) {}
	/**
	* Return the connection type between one neuron of this layer and its source
	* layer.
	*
	* Currently support two connection types: kOneToOne, and kOneToAll.
	* - kOneToOne indicates the neuron depends on only one neuron from src layer.
	* - kOneToAll indicates the neuron depends on all neurons from src layer.
	* TODO(wangwei) support kOneToMany.
	*
	* @param[in] k index of source layer, current only support k = 0.
	* @return connection type.
	*/
	virtual ConnectionType src_neuron_connection(int k) const {
	// CHECK_LT(k, srclayers_.size());
	return kOneToOne;
	}
	/**
	* Return the connection type of this layer and all dst layers.
	*
	* Currently support two connection types: kOneToOne, and kOneToMany.
	* - kOneToOne indicates the users implement the ComputeFeature and
	* ComputeGradient function considering only one dst layer. In this case,
	* a SplitLayer will be added automatically to connect this layer with all
	* dest layer.
	* - kOneToMany indicates this layer has already considered multiple dst
	* layers in the implementation.
	*
	* @return connection type default is kOneToOne.
	*/
	virtual ConnectionType dst_layer_connection() const {
	return kOneToOne;
	}
	/**
	* To display layer info, e.g., aggreated loss/accuracy, or norm of feature
	* vector and norm of parameters.
	*
	* @param[in] debug whether print the debug info
	* @param[in] flag used to get the calling phase, e.g., forward of training
	* (kForward \| kTrain).
	* @return info string about this layer, which is printed into the log.
	*/
	virtual const std::string ToString(bool debug, int flag);
	/**
	* @return partition dimension of this layer,
	* - -1 for no partition.
	* - 0 for partition on the data dimension, i.e., partitioning the mini-batch
	* into sub-mini-batches.
	* - 1 for partition this layer on feature dimension, i.e., the feature
	* vector of each instance is partitioned into sub-vectors.
	*/
	inline int partition_dim() const {
	CHECK_LE(layer_conf_.partition_dim(), 1);
	return layer_conf_.partition_dim();
	}
	/**
	* @return the partition ID (i.e., the worker ID to whom is layer is
	* dispatched) of this layer, which is a sublayer partitioned from the
	* original layer.
	*/
	inline int partition_id() const { return layer_conf_.partition_id(); }
	/**
	* @return total number of partitions (i.e., sub-layers) of the original
	* layer of this layer.
	*/
	inline int num_partitions() const { return layer_conf_.num_partitions(); }
	/**
	* @return the type of this layer, only valid for built-in layer (types).
	*/
	inline LayerType type() const { return layer_conf_.type(); }
	/**
	* @return user-defined layer type.
	*/
	inline const std::string& user_type() const {
	return layer_conf_.user_type();
	}
	/**
	* Return name of this layer
	*/
	inline const std::string& name() const { return layer_conf_.name(); }
	/**
	* Return the index of the unrolled layer within the unrolling group, which
	* should be [0, max_unrolling_length)
	*/
	inline const int unroll_index() const { return layer_conf_.unroll_index(); }

	/**
	* @return a const ref for Blob vector storing feature values of this layer.
	*/
	virtual const vector<Blob<float>*>& data() {
	return datavec_;
	}

	/**
	* @param[in] from pointer to one of the dst layer. For some layers, they have
	* more than one data Blob. In this case, this argument identifies the layer
	* that is requesting the data Blob.
	* @return a const ref for Blob storing feature values of this layer.
	* @deprecated {This function will be deleted, use
	* virtual const vector<Blob<float>>& data() const or
	* virtual const Blob<float>& data(int k) const instead}.
	*/
	virtual const Blob<float>& data(const Layer* from) {
	return data_;
	}
	/**
	* @return a const ref for the kth Blob.
	* TODO(wangwei) if make this function const, there will be a warning
	* indicating that data(const Layer*) and this function are ambiguous for
	* data(0).
	*/
	virtual const Blob<float>& data(int k) {
	return *datavec_.at(k);
	}

	/**
	* @see data().
	* @return the pointer to the Blob storing feature values of this layer.
	* @deprecated {This function will be deleted, use
	* virtual Blob<float>* mutable_data(int k) instead}.
	*/
	virtual Blob<float>* mutable_data(const Layer* from) {
	return &data_;
	}
	/**
	* @return the pointer to the kth Blob.
	*/
	virtual Blob<float>* mutable_data(int k) {
	return datavec_.at(k);
	}
	/**
	* @return auxiliary data, e.g., image label.
	*/
	virtual const vector<AuxType>& aux_data(const Layer* from = nullptr) {
	return aux_data_;
	}
	/**
	* @see data().
	* @return the const ref of the Blob for the gradient of this layer, mainly
	* used in BP algorithm.
	* @deprecated {This function will be deleted, use
	* virtual const vector<Blob<float>>& grad() const or
	* virtual const Blob<float>& grad(int k) const instead}.
	*/
	virtual const Blob<float>& grad(const Layer* from) {
	return grad_;
	}
	/**
	* @see data().
	* @return the const ref of the Blob vector for the gradient of this layer.
	*/
	virtual const vector<Blob<float>*>& grad() const {
	return gradvec_;
	}
	/**
	* @return the const ref of the kth Blob for the gradient of this layer.
	*/
	virtual const Blob<float>& grad(int k) const {
	return *gradvec_.at(k);
	}
	/**
	* @see data().
	* @return a pointer to the Blob storing gradients of this layer, mainly
	* used in BP algorithm.
	*/
	virtual Blob<float>* mutable_grad(const Layer* from) {
	return &grad_;
	}
	/**
	* @see data().
	* @return a pointer to the kth Blob storing gradients of this layer, mainly
	* used in BP algorithm.
	*/
	virtual Blob<float>* mutable_grad(int k) {
	return gradvec_.at(k);
	}

	protected:
	LayerProto layer_conf_;
	Blob<float> data_, grad_;
	vector<AuxType> aux_data_;
	vector<Blob<float>*> datavec_, gradvec_;
	};
	/************** Layer categories ***************/
	/**
	* Base layer for connecting layers when neural net is partitioned.
	*/
	class ConnectionLayer : virtual public Layer {
	// defined as a layer category
	};


	/**
	* Base layer for getting input data. May include layers for loading records,
	* parsing records.
	*/
	class InputLayer : virtual public Layer {
	public:
	void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
	ConnectionType dst_layer_connection() const override { return kOneToMany; }
	Blob<float>* mutable_grad(const Layer* layer) override {
	return nullptr;
	// LOG(FATAL) << "Input layer has no gradient blob";
	}
	const Blob<float>& grad(const Layer* from) override {
	return grad_;
	// LOG(FATAL) << "Input layer has no gradient blob";
	}
	};

	using SingleLabelImageRecord = RecordProto;

	/**
	* Base layer for feature transformation, e.g., ConvolutionLayer, PoolingLayer,
	* etc.
	*/
	class NeuronLayer : virtual public Layer {
	// defined as a layer category
	};


	/**
	* Base layer for calculating loss and doing BackPropagation.
	*/
	class LossLayer : virtual public Layer {
	public:
	Blob<float>* mutable_grad(const Layer* layer) override {
	return nullptr;
	// LOG(FATAL) << "Loss layer has no gradient blob";
	}
	const Blob<float>& grad(const Layer* from) override {
	return grad_;
	// LOG(FATAL) << "Loss layer has no gradient blob";
	}
	};

	/**
	* Base layer for collecting features into disk file, HTTP stream, etc.
	*/
	class OutputLayer : virtual public Layer {
	public:
	void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
	Blob<float>* mutable_grad(const Layer* layer) override {
	return nullptr;
	// LOG(FATAL) << "Output layer has no gradient blob";
	}
	const Blob<float>& grad(const Layer* from) override {
	return grad_;
	// LOG(FATAL) << "Output layer has no gradient blob";
	}
	};


	} // namespace singa
	#endif // SINGA_NEURALNET_LAYER_H_