blob: 280cadd07193c6dd6d82be88f22f9d8ebed401c2 [file] [log] [blame]
/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
/*
* newgiza
* Copyright (C) Qin Gao 2007 <qing@cs.cmu.edu>
*
* newgiza is free software.
*
* You may redistribute it and/or modify it under the terms of the
* GNU General Public License, as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* newgiza is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with newgiza. If not, write to:
* The Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301, USA.
*/
#ifndef _TTABLEDIFF_HPP_
#define _TTABLEDIFF_HPP_
#include "TTables.h"
#include <sstream>
#include <string>
#include "types.h"
using namespace std;
#ifdef WIN32
typedef hash_map<wordPairIds, COUNT, hashpair> wordpair_hash;
#else
typedef hash_map<wordPairIds, COUNT, hashpair, equal_to<wordPairIds> > wordpair_hash;
#endif
/*!
This class is meant to create a difference file in order to make
GIZA paralell.
*/
template <class COUNT,class PROB>
class CTTableDiff{
private:
INT32 noEnglishWords; // total number of unique source words
INT32 noFrenchWords; // total number of unique target words
/*!
Store only the counting*/
wordpair_hash ef;
public:
INT32 SaveToFile(const char* filename){
ofstream ofs(filename);
if(!ofs.is_open()){
return -1;
}else{
wordpair_hash::iterator it;
for( it = ef.begin() ; it != ef.end(); it++){
ofs << it->first.first << " " << it->first.second << " "
<< it->second << std::endl;
}
}
return SUCCESS;
}
INT32 LoadFromFile(const char* filename){
ef.clear();
ifstream ifs(filename);
if(!ifs.is_open()){
return -1;
}
string sline;
while(!ifs.eof()){
sline = "";
std::getline(ifs,sline);
if(sline.length()){
//cout << sline << endl;
stringstream ss(sline.c_str());
WordIndex we=-1,wf=-1;
COUNT ct=-1 ;
ss >> we >> wf >> ct;
if(we==-1||wf==-1||ct==-1)
continue;
ef[wordPairIds(we,wf)] = ct;
}
}
return SUCCESS;
}
COUNT * GetPtr(WordIndex e, WordIndex f){
// look up this pair and return its position
wordpair_hash::iterator i = ef.find(wordPairIds(e, f));
if(i != ef.end()) // if it exists, return a pointer to it.
return(&((*i).second));
else return(0) ; // else return NULL pointer
}
void incCount(WordIndex e, WordIndex f, COUNT inc)
// increments the count of the given word pair. if the pair does not exist,
// it creates it with the given value.
{
if( inc )
ef[wordPairIds(e, f)] += inc ;
}
INT32 AugmentTTable(tmodel<COUNT,PROB>& ttable){
wordpair_hash::iterator it;
for( it = ef.begin() ; it != ef.end(); it++){
ttable.incCount(it->first.first,it->first.second,it->second);
}
return SUCCESS;
}
protected:
};
#endif // _TTABLEDIFF_HPP_