examples/rnnlm/rnnlm.h - singa - Git at Google

 /************************************************************
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *************************************************************/

 #ifndef EXAMPLES_RNNLM_RNNLM_H_
 #define EXAMPLES_RNNLM_RNNLM_H_

 #include <string>
 #include <vector>
 #include "singa/singa.h"
 #include "./rnnlm.pb.h"

 namespace rnnlm {
 using std::vector;
 using singa::LayerProto;
 using singa::Layer;
 using singa::Param;
 using singa::Blob;
 using singa::Metric;
 /**
  * Base RNN layer. May make it a base layer of SINGA.
  */
 class RNNLayer : virtual public singa::Layer {
  public:
   /**
    * The recurrent layers may be unrolled different times for different
    * iterations, depending on the applications. For example, the ending word
    * of a sentence may stop the unrolling; unrolling also stops when the max
    * window size is reached. Every layer must reset window_ in its
    * ComputeFeature function.
    *
    * @return the effective BPTT length, which is <= max_window.
    */
   inline int window() { return window_; }

  protected:
   //!< effect window size for BPTT
   int window_;
 };

 /**
  * Input layer that get read records from data shard
  */
 class DataLayer : public RNNLayer, public singa::InputLayer {
  public:
   ~DataLayer();
   void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   int max_window() const {
     return max_window_;
   }

  private:
   int max_window_;
   singa::io::Store* store_ = nullptr;
 };


 /**
  * LabelLayer that read records_[1] to records_[window_] from DataLayer to
  * offer label information
 class LabelLayer : public RNNLayer {
  public:
   void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
 };
  */


 /**
  * Word embedding layer that get one row from the embedding matrix for each
  * word based on the word index
  */
 class EmbeddingLayer : public RNNLayer {
  public:
   ~EmbeddingLayer();
   void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
   const std::vector<Param*> GetParams() const override {
     std::vector<Param*> params{embed_};
     return params;
   }


  private:
   int word_dim_;
   int vocab_size_;
   //!< word embedding matrix of size vocab_size_ x word_dim_
   Param* embed_;
 };


 /**
  * hid[t] = sigmoid(hid[t-1] * W + src[t])
  */
 class HiddenLayer : public RNNLayer {
  public:
   ~HiddenLayer();
   void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;

   const std::vector<Param*> GetParams() const override {
     std::vector<Param*> params{weight_};
     return params;
   }


  private:
   Param* weight_;
 };

 /**
  * p(word at t+1 is from class c) = softmax(src[t]*Wc)[c]
  * p(w|c) = softmax(src[t]*Ww[Start(c):End(c)])
  * p(word at t+1 is w)=p(word at t+1 is from class c)*p(w|c)
  */
 class LossLayer : public RNNLayer {
  public:
   ~LossLayer();
   void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;

   const std::string ToString(bool debug, int flag) override;
   const std::vector<Param*> GetParams() const override {
     std::vector<Param*> params{word_weight_, class_weight_};
     return params;
   }

  private:
   std::vector<Blob<float>> pword_;
   Blob<float> pclass_;
   Param* word_weight_, *class_weight_;
   float loss_, ppl_;
   int num_;
 };
 }  // namespace rnnlm
 #endif  // EXAMPLES_RNNLM_RNNLM_H_
	/************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/

	#ifndef EXAMPLES_RNNLM_RNNLM_H_
	#define EXAMPLES_RNNLM_RNNLM_H_

	#include <string>
	#include <vector>
	#include "singa/singa.h"
	#include "./rnnlm.pb.h"

	namespace rnnlm {
	using std::vector;
	using singa::LayerProto;
	using singa::Layer;
	using singa::Param;
	using singa::Blob;
	using singa::Metric;
	/**
	* Base RNN layer. May make it a base layer of SINGA.
	*/
	class RNNLayer : virtual public singa::Layer {
	public:
	/**
	* The recurrent layers may be unrolled different times for different
	* iterations, depending on the applications. For example, the ending word
	* of a sentence may stop the unrolling; unrolling also stops when the max
	* window size is reached. Every layer must reset window_ in its
	* ComputeFeature function.
	*
	* @return the effective BPTT length, which is <= max_window.
	*/
	inline int window() { return window_; }

	protected:
	//!< effect window size for BPTT
	int window_;
	};

	/**
	* Input layer that get read records from data shard
	*/
	class DataLayer : public RNNLayer, public singa::InputLayer {
	public:
	~DataLayer();
	void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
	void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
	int max_window() const {
	return max_window_;
	}

	private:
	int max_window_;
	singa::io::Store* store_ = nullptr;
	};


	/**
	* LabelLayer that read records_[1] to records_[window_] from DataLayer to
	* offer label information
	class LabelLayer : public RNNLayer {
	public:
	void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
	void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
	void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
	};
	*/


	/**
	* Word embedding layer that get one row from the embedding matrix for each
	* word based on the word index
	*/
	class EmbeddingLayer : public RNNLayer {
	public:
	~EmbeddingLayer();
	void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
	void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
	void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
	const std::vector<Param*> GetParams() const override {
	std::vector<Param*> params{embed_};
	return params;
	}


	private:
	int word_dim_;
	int vocab_size_;
	//!< word embedding matrix of size vocab_size_ x word_dim_
	Param* embed_;
	};


	/**
	* hid[t] = sigmoid(hid[t-1] * W + src[t])
	*/
	class HiddenLayer : public RNNLayer {
	public:
	~HiddenLayer();
	void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
	void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
	void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;

	const std::vector<Param*> GetParams() const override {
	std::vector<Param*> params{weight_};
	return params;
	}


	private:
	Param* weight_;
	};

	/**
	* p(word at t+1 is from class c) = softmax(src[t]*Wc)[c]
	* p(w\|c) = softmax(src[t]*Ww[Start(c):End(c)])
	* p(word at t+1 is w)=p(word at t+1 is from class c)*p(w\|c)
	*/
	class LossLayer : public RNNLayer {
	public:
	~LossLayer();
	void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
	void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
	void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;

	const std::string ToString(bool debug, int flag) override;
	const std::vector<Param*> GetParams() const override {
	std::vector<Param*> params{word_weight_, class_weight_};
	return params;
	}

	private:
	std::vector<Blob<float>> pword_;
	Blob<float> pclass_;
	Param* word_weight_, *class_weight_;
	float loss_, ppl_;
	int num_;
	};
	} // namespace rnnlm
	#endif // EXAMPLES_RNNLM_RNNLM_H_