src/kenlm/lm/value_build.cc - joshua - Git at Google

 #include "lm/value_build.hh"

 #include "lm/model.hh"
 #include "lm/read_arpa.hh"

 namespace lm {
 namespace ngram {

 template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
   UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");
   Config for_lower = config;
   for_lower.write_mmap = NULL;
   for_lower.rest_lower_files.clear();

   // Unigram models aren't supported, so this is a custom loader.
   // TODO: optimize the unigram loading?
   {
     util::FilePiece uni(config.rest_lower_files[0].c_str());
     std::vector<uint64_t> number;
     ReadARPACounts(uni, number);
     UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());
     ReadNGramHeader(uni, 1);
     unigrams_.resize(number[0]);
     unigrams_[0] = config.unknown_missing_logprob;
     PositiveProbWarn warn;
     for (uint64_t i = 0; i < number[0]; ++i) {
       WordIndex w;
       Prob entry;
       ReadNGram(uni, 1, vocab, &w, entry, warn);
       unigrams_[w] = entry.prob;
     }
   }

   try {
     for (unsigned int i = 2; i < order; ++i) {
       models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));
       UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);
     }
   } catch (...) {
     for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
       delete *i;
     }
     models_.clear();
     throw;
   }

   // TODO: force/check same vocab.
 }

 template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
   for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
     delete *i;
   }
 }

 template class LowerRestBuild<ProbingModel>;

 } // namespace ngram
 } // namespace lm
	#include "lm/value_build.hh"

	#include "lm/model.hh"
	#include "lm/read_arpa.hh"

	namespace lm {
	namespace ngram {

	template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
	UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");
	Config for_lower = config;
	for_lower.write_mmap = NULL;
	for_lower.rest_lower_files.clear();

	// Unigram models aren't supported, so this is a custom loader.
	// TODO: optimize the unigram loading?
	{
	util::FilePiece uni(config.rest_lower_files[0].c_str());
	std::vector<uint64_t> number;
	ReadARPACounts(uni, number);
	UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());
	ReadNGramHeader(uni, 1);
	unigrams_.resize(number[0]);
	unigrams_[0] = config.unknown_missing_logprob;
	PositiveProbWarn warn;
	for (uint64_t i = 0; i < number[0]; ++i) {
	WordIndex w;
	Prob entry;
	ReadNGram(uni, 1, vocab, &w, entry, warn);
	unigrams_[w] = entry.prob;
	}
	}

	try {
	for (unsigned int i = 2; i < order; ++i) {
	models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));
	UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);
	}
	} catch (...) {
	for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
	delete *i;
	}
	models_.clear();
	throw;
	}

	// TODO: force/check same vocab.
	}

	template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
	for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
	delete *i;
	}
	}

	template class LowerRestBuild<ProbingModel>;

	} // namespace ngram
	} // namespace lm