blob: e777bf0983ff42e6df926de15ec8c6b12b942e91 [file] [log] [blame]
#include "lm/interpolate/split_worker.hh"
#include "lm/common/ngram.hh"
namespace lm {
namespace interpolate {
SplitWorker::SplitWorker(std::size_t order, util::stream::Chain &backoff_chain,
util::stream::Chain &sort_chain)
: order_(order) {
backoff_chain >> backoff_input_;
sort_chain >> sort_input_;
}
void SplitWorker::Run(const util::stream::ChainPosition &position) {
// input: ngram record (id, prob, and backoff)
// output: a float to the backoff_input stream
// an ngram id and a float to the sort_input stream
for (util::stream::Stream stream(position); stream; ++stream) {
NGram<ProbBackoff> ngram(stream.Get(), order_);
// write id and prob to the sort stream
float prob = ngram.Value().prob;
lm::WordIndex *out = reinterpret_cast<lm::WordIndex *>(sort_input_.Get());
for (const lm::WordIndex *it = ngram.begin(); it != ngram.end(); ++it) {
*out++ = *it;
}
*reinterpret_cast<float *>(out) = prob;
++sort_input_;
// write backoff to the backoff output stream
float boff = ngram.Value().backoff;
*reinterpret_cast<float *>(backoff_input_.Get()) = boff;
++backoff_input_;
}
sort_input_.Poison();
backoff_input_.Poison();
}
}
}