blob: 604fa22e6a1662797c235573f8aab0485b474b2b [file] [log] [blame]
#include "lm/builder/output.hh"
#include "lm/common/model_buffer.hh"
#include "lm/common/print.hh"
#include "util/file_stream.hh"
#include "util/stream/multi_stream.hh"
#include <iostream>
namespace lm { namespace builder {
OutputHook::~OutputHook() {}
Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
: buffer_(file_base, keep_buffer, output_q) {}
void Output::SinkProbs(util::stream::Chains &chains) {
Apply(PROB_PARALLEL_HOOK, chains);
if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
chains >> util::stream::kRecycle;
chains.Wait(true);
return;
}
buffer_.Sink(chains, header_.counts_pruned);
chains >> util::stream::kRecycle;
chains.Wait(false);
if (Have(PROB_SEQUENTIAL_HOOK)) {
std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
buffer_.Source(chains);
Apply(PROB_SEQUENTIAL_HOOK, chains);
chains >> util::stream::kRecycle;
chains.Wait(true);
}
}
void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
entry->Sink(header_, VocabFile(), chains);
}
}
void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
if (verbose_header_) {
util::FileStream out(file_.get(), 50);
out << "# Input file: " << info.input_file << '\n';
out << "# Token count: " << info.token_count << '\n';
out << "# Smoothing: Modified Kneser-Ney" << '\n';
}
chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
}
}} // namespaces