src/joshua/decoder/ff/lm/kenlm/lm/virtual_interface.hh - joshua - Git at Google

 #ifndef LM_VIRTUAL_INTERFACE_H
 #define LM_VIRTUAL_INTERFACE_H

 #include "lm/return.hh"
 #include "lm/word_index.hh"
 #include "util/string_piece.hh"

 #include <string>
 #include <string.h>

 namespace lm {
 namespace base {

 template <class T, class U, class V> class ModelFacade;

 /* Vocabulary interface.  Call Index(string) and get a word index for use in
  * calling Model.  It provides faster convenience functions for <s>, </s>, and
  * <unk> although you can also find these using Index.
  *
  * Some models do not load the mapping from index to string.  If you need this,
  * check if the model Vocabulary class implements such a function and access it
  * directly.
  *
  * The Vocabulary object is always owned by the Model and can be retrieved from
  * the Model using BaseVocabulary() for this abstract interface or
  * GetVocabulary() for the actual implementation (in which case you'll need the
  * actual implementation of the Model too).
  */
 class Vocabulary {
   public:
     virtual ~Vocabulary();

     WordIndex BeginSentence() const { return begin_sentence_; }
     WordIndex EndSentence() const { return end_sentence_; }
     WordIndex NotFound() const { return not_found_; }

     /* Most implementations allow StringPiece lookups and need only override
      * Index(StringPiece).  SRI requires null termination and overrides all
      * three methods.
      */
     virtual WordIndex Index(const StringPiece &str) const = 0;
     virtual WordIndex Index(const std::string &str) const {
       return Index(StringPiece(str));
     }
     virtual WordIndex Index(const char *str) const {
       return Index(StringPiece(str));
     }

   protected:
     // Call SetSpecial afterward.
     Vocabulary() {}

     Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) {
       SetSpecial(begin_sentence, end_sentence, not_found);
     }

     void SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found);

     WordIndex begin_sentence_, end_sentence_, not_found_;

   private:
     // Disable copy constructors.  They're private and undefined.
     // Ersatz boost::noncopyable.
     Vocabulary(const Vocabulary &);
     Vocabulary &operator=(const Vocabulary &);
 };

 /* There are two ways to access a Model.
  *
  *
  * OPTION 1: Access the Model directly (e.g. lm::ngram::Model in model.hh).
  *
  * Every Model implements the scoring function:
  * float Score(
  *   const Model::State &in_state,
  *   const WordIndex new_word,
  *   Model::State &out_state) const;
  *
  * It can also return the length of n-gram matched by the model:
  * FullScoreReturn FullScore(
  *   const Model::State &in_state,
  *   const WordIndex new_word,
  *   Model::State &out_state) const;
  *
  *
  * There are also accessor functions:
  * const State &BeginSentenceState() const;
  * const State &NullContextState() const;
  * const Vocabulary &GetVocabulary() const;
  * unsigned int Order() const;
  *
  * NB: In case you're wondering why the model implementation looks like it's
  * missing these methods, see facade.hh.
  *
  * This is the fastest way to use a model and presents a normal State class to
  * be included in a hypothesis state structure.
  *
  *
  * OPTION 2: Use the virtual interface below.
  *
  * The virtual interface allow you to decide which Model to use at runtime
  * without templatizing everything on the Model type.  However, each Model has
  * its own State class, so a single State cannot be efficiently provided (it
  * would require using the maximum memory of any Model's State or memory
  * allocation with each lookup).  This means you become responsible for
  * allocating memory with size StateSize() and passing it to the Score or
  * FullScore functions provided here.
  *
  * For example, cdec has a std::string containing the entire state of a
  * hypothesis.  It can reserve StateSize bytes in this string for the model
  * state.
  *
  * All the State objects are POD, so it's ok to use raw memory for storing
  * State.
  * in_state and out_state must not have the same address.
  */
 class Model {
   public:
     virtual ~Model();

     size_t StateSize() const { return state_size_; }
     const void *BeginSentenceMemory() const { return begin_sentence_memory_; }
     void BeginSentenceWrite(void *to) const { memcpy(to, begin_sentence_memory_, StateSize()); }
     const void *NullContextMemory() const { return null_context_memory_; }
     void NullContextWrite(void *to) const { memcpy(to, null_context_memory_, StateSize()); }

     // Requires in_state != out_state
     virtual float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0;

     // Requires in_state != out_state
     virtual FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0;

     // Prefer to use FullScore.  The context words should be provided in reverse order.
     virtual FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const = 0;

     unsigned char Order() const { return order_; }

     const Vocabulary &BaseVocabulary() const { return *base_vocab_; }

   private:
     template <class T, class U, class V> friend class ModelFacade;
     explicit Model(size_t state_size) : state_size_(state_size) {}

     const size_t state_size_;
     const void *begin_sentence_memory_, *null_context_memory_;

     const Vocabulary *base_vocab_;

     unsigned char order_;

     // Disable copy constructors.  They're private and undefined.
     // Ersatz boost::noncopyable.
     Model(const Model &);
     Model &operator=(const Model &);
 };

 } // mamespace base
 } // namespace lm

 #endif // LM_VIRTUAL_INTERFACE_H
	#ifndef LM_VIRTUAL_INTERFACE_H
	#define LM_VIRTUAL_INTERFACE_H

	#include "lm/return.hh"
	#include "lm/word_index.hh"
	#include "util/string_piece.hh"

	#include <string>
	#include <string.h>

	namespace lm {
	namespace base {

	template <class T, class U, class V> class ModelFacade;

	/* Vocabulary interface. Call Index(string) and get a word index for use in
	* calling Model. It provides faster convenience functions for <s>, </s>, and
	* <unk> although you can also find these using Index.
	*
	* Some models do not load the mapping from index to string. If you need this,
	* check if the model Vocabulary class implements such a function and access it
	* directly.
	*
	* The Vocabulary object is always owned by the Model and can be retrieved from
	* the Model using BaseVocabulary() for this abstract interface or
	* GetVocabulary() for the actual implementation (in which case you'll need the
	* actual implementation of the Model too).
	*/
	class Vocabulary {
	public:
	virtual ~Vocabulary();

	WordIndex BeginSentence() const { return begin_sentence_; }
	WordIndex EndSentence() const { return end_sentence_; }
	WordIndex NotFound() const { return not_found_; }

	/* Most implementations allow StringPiece lookups and need only override
	* Index(StringPiece). SRI requires null termination and overrides all
	* three methods.
	*/
	virtual WordIndex Index(const StringPiece &str) const = 0;
	virtual WordIndex Index(const std::string &str) const {
	return Index(StringPiece(str));
	}
	virtual WordIndex Index(const char *str) const {
	return Index(StringPiece(str));
	}

	protected:
	// Call SetSpecial afterward.
	Vocabulary() {}

	Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) {
	SetSpecial(begin_sentence, end_sentence, not_found);
	}

	void SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found);

	WordIndex begin_sentence_, end_sentence_, not_found_;

	private:
	// Disable copy constructors. They're private and undefined.
	// Ersatz boost::noncopyable.
	Vocabulary(const Vocabulary &);
	Vocabulary &operator=(const Vocabulary &);
	};

	/* There are two ways to access a Model.
	*
	*
	* OPTION 1: Access the Model directly (e.g. lm::ngram::Model in model.hh).
	*
	* Every Model implements the scoring function:
	* float Score(
	* const Model::State &in_state,
	* const WordIndex new_word,
	* Model::State &out_state) const;
	*
	* It can also return the length of n-gram matched by the model:
	* FullScoreReturn FullScore(
	* const Model::State &in_state,
	* const WordIndex new_word,
	* Model::State &out_state) const;
	*
	*
	* There are also accessor functions:
	* const State &BeginSentenceState() const;
	* const State &NullContextState() const;
	* const Vocabulary &GetVocabulary() const;
	* unsigned int Order() const;
	*
	* NB: In case you're wondering why the model implementation looks like it's
	* missing these methods, see facade.hh.
	*
	* This is the fastest way to use a model and presents a normal State class to
	* be included in a hypothesis state structure.
	*
	*
	* OPTION 2: Use the virtual interface below.
	*
	* The virtual interface allow you to decide which Model to use at runtime
	* without templatizing everything on the Model type. However, each Model has
	* its own State class, so a single State cannot be efficiently provided (it
	* would require using the maximum memory of any Model's State or memory
	* allocation with each lookup). This means you become responsible for
	* allocating memory with size StateSize() and passing it to the Score or
	* FullScore functions provided here.
	*
	* For example, cdec has a std::string containing the entire state of a
	* hypothesis. It can reserve StateSize bytes in this string for the model
	* state.
	*
	* All the State objects are POD, so it's ok to use raw memory for storing
	* State.
	* in_state and out_state must not have the same address.
	*/
	class Model {
	public:
	virtual ~Model();

	size_t StateSize() const { return state_size_; }
	const void *BeginSentenceMemory() const { return begin_sentence_memory_; }
	void BeginSentenceWrite(void *to) const { memcpy(to, begin_sentence_memory_, StateSize()); }
	const void *NullContextMemory() const { return null_context_memory_; }
	void NullContextWrite(void *to) const { memcpy(to, null_context_memory_, StateSize()); }

	// Requires in_state != out_state
	virtual float BaseScore(const void in_state, const WordIndex new_word, void out_state) const = 0;

	// Requires in_state != out_state
	virtual FullScoreReturn BaseFullScore(const void in_state, const WordIndex new_word, void out_state) const = 0;

	// Prefer to use FullScore. The context words should be provided in reverse order.
	virtual FullScoreReturn BaseFullScoreForgotState(const WordIndex context_rbegin, const WordIndex context_rend, const WordIndex new_word, void *out_state) const = 0;

	unsigned char Order() const { return order_; }

	const Vocabulary &BaseVocabulary() const { return *base_vocab_; }

	private:
	template <class T, class U, class V> friend class ModelFacade;
	explicit Model(size_t state_size) : state_size_(state_size) {}

	const size_t state_size_;
	const void begin_sentence_memory_, null_context_memory_;

	const Vocabulary *base_vocab_;

	unsigned char order_;

	// Disable copy constructors. They're private and undefined.
	// Ersatz boost::noncopyable.
	Model(const Model &);
	Model &operator=(const Model &);
	};

	} // mamespace base
	} // namespace lm

	#endif // LM_VIRTUAL_INTERFACE_H