| /* |
| |
| Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI) |
| |
| This file is part of GIZA++ ( extension of GIZA ). |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License |
| as published by the Free Software Foundation; either version 2 |
| of the License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
| USA. |
| |
| */ |
| #ifndef _d5tables_h_define |
| #define _d5tables_h_define |
| #include <math.h> |
| #include "D4Tables.h" |
| |
| extern float d5modelsmooth_countoffset; |
| extern float d5modelsmooth_factor; |
| |
| #define UNSEENPROB (1.0/vacancies_total) |
| |
| class d5model |
| { |
| private: |
| typedef Vector < pair < COUNT,PROB > >Vpff; |
| map< m4_key,Vpff,compare1 > D1; |
| map< m4_key,Vpff,compareb1 > Db1; |
| public: |
| d4model&d4m; |
| WordClasses ewordclasses,fwordclasses; |
| template<class MAPPER> |
| void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile) |
| { |
| ifstream estrm(efile.c_str()),fstrm(ffile.c_str()); |
| if( !estrm ) |
| cerr << "ERROR: can not read classes from " << efile << endl; |
| else |
| ewordclasses.read(estrm,m1); |
| if( !fstrm ) |
| cerr << "ERROR: can not read classes from " << ffile << endl; |
| else |
| fwordclasses.read(fstrm,m2); |
| } |
| d5model (d4model&_d4m) |
| :D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m) |
| {} |
| COUNT &getCountRef_first (PositionIndex vacancies_j, |
| PositionIndex vacancies_jp, int F, |
| PositionIndex l, PositionIndex m, |
| PositionIndex vacancies_total) |
| { |
| massert(vacancies_j>0); |
| massert(vacancies_total>0); |
| //massert(vacancies_jp<=vacancies_total); |
| massert(vacancies_j <=vacancies_total); |
| massert(vacancies_total<=m); |
| m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total); |
| map<m4_key,Vpff,compare1 >::iterator p=D1.find(key); |
| if(p==D1.end()) |
| p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length |
| massert(p!=D1.end()); |
| return (p->second)[vacancies_j].first; |
| } |
| COUNT &getCountRef_bigger (PositionIndex vacancies_j, |
| PositionIndex vacancies_jp, int F, |
| PositionIndex l, PositionIndex m, |
| PositionIndex vacancies_total) |
| { |
| massert(vacancies_j>0); |
| massert(vacancies_total>0); |
| massert (vacancies_jp <= vacancies_j); |
| massert (vacancies_j-vacancies_jp <= vacancies_total); |
| m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total); |
| map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key); |
| if(p==Db1.end()) |
| p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length |
| massert(p!=Db1.end()); |
| return (p->second)[vacancies_j - vacancies_jp].first; |
| } |
| PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp, |
| int F, PositionIndex l, PositionIndex m, |
| PositionIndex vacancies_total) const |
| { |
| massert(vacancies_j>0); |
| massert(vacancies_total>0); |
| //massert(vacancies_jp<=vacancies_total); |
| massert(vacancies_j <=vacancies_total); |
| massert(vacancies_total<=m); |
| m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total); |
| map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key); |
| if( p==D1.end() ) |
| return UNSEENPROB; |
| else |
| return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second); |
| } |
| PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp, |
| int F, PositionIndex l, PositionIndex m, |
| PositionIndex vacancies_total) const |
| { |
| massert(vacancies_j>0); |
| massert(vacancies_total>0); |
| massert (vacancies_jp <= vacancies_j); |
| massert (vacancies_j-vacancies_jp <= vacancies_total); |
| m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total); |
| map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key); |
| if(p==Db1.end()) |
| return UNSEENPROB; |
| else |
| return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second); |
| } |
| void normalizeTable () |
| { |
| int nParams=0; |
| for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i) |
| { |
| Vpff&d1=i->second; |
| COUNT sum=0.0; |
| for(PositionIndex i=0;i<d1.size();i++) |
| sum+=d1[i].first+d5modelsmooth_countoffset; |
| for(PositionIndex i=0;i<d1.size();i++) |
| { |
| d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size()); |
| nParams++; |
| } |
| } |
| for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i) |
| { |
| Vpff&db1=i->second; |
| double sum=0.0; |
| for(PositionIndex i=0;i<db1.size();i++) |
| sum+=db1[i].first+d5modelsmooth_countoffset; |
| for(PositionIndex i=0;i<db1.size();i++) |
| { |
| db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size()); |
| nParams++; |
| } |
| } |
| cout << "D5 table contains " << nParams << " parameters.\n"; |
| } |
| |
| friend ostream&operator<<(ostream&out,d5model&d5m) |
| { |
| out << "# Translation tables for Model 5 .\n"; |
| out << "# Table for head of cept.\n"; |
| for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i) |
| { |
| const Vpff&d1=i->second; |
| COUNT sum=0.0; |
| for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first; |
| if ( sum ) |
| { |
| for(unsigned ii=0;ii<d1.size();ii++) |
| { |
| print1_m5(out,i->first,d5m.ewordclasses,d5m.fwordclasses); |
| out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n'; |
| } |
| out << endl; |
| } |
| } |
| out << "# Table for non-head of cept.\n"; |
| for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i) |
| { |
| const Vpff&db1=i->second; |
| double sum=0.0; |
| for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first; |
| if( sum ) |
| { |
| for(unsigned ii=0;ii<db1.size();ii++) |
| { |
| printb1_m5(out,i->first,d5m.fwordclasses); |
| out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n'; |
| } |
| out << endl; |
| } |
| } |
| return out; |
| } |
| void readProbTable(const char*x) |
| { |
| ifstream f(x); |
| string l; |
| while(getline(f,l)) |
| { |
| if(l.length()&&l[0]=='#') |
| continue; |
| istringstream is(l.c_str()); |
| string E,F; |
| int v1,v2,ii; |
| double prob,count; |
| if(is>>E>>F>>v1>>v2>>ii>>prob>>count) |
| { |
| //cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl; |
| if( count>0 ) |
| if( E=="-1") |
| getCountRef_bigger(ii,0,fwordclasses(F),1000,1000,v2)+=count; |
| else |
| getCountRef_first(ii,v1,fwordclasses(F),1000,1000,v2)+=count; |
| } |
| } |
| normalizeTable(); |
| ofstream of("M5FILE"); |
| of << (*this); |
| } |
| void clear() |
| { |
| for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i) |
| { |
| Vpff&d1=i->second; |
| for(PositionIndex i=0;i<d1.size();i++) |
| d1[i].first=0.0; |
| } |
| for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i) |
| { |
| Vpff&db1=i->second; |
| for(PositionIndex i=0;i<db1.size();i++) |
| db1[i].first=0.0; |
| } |
| } |
| }; |
| |
| #endif |
| |
| |
| |