| /* |
| |
| Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI) |
| |
| This file is part of GIZA++ ( extension of GIZA ). |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License |
| as published by the Free Software Foundation; either version 2 |
| of the License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
| USA. |
| |
| */ |
| #ifndef _d4tables_h_define |
| #define _d4tables_h_define |
| #include <math.h> |
| #include "WordClasses.h" |
| #include "Globals.h" |
| #include "myassert.h" |
| #include "syncObj.h" |
| extern float d4modelsmooth_factor; |
| |
| class m4_key { |
| public: |
| int deps; |
| int l; |
| int m; |
| int F; |
| int E; |
| int prevj; |
| int vacancies1, vacancies2; |
| m4_key(int _deps, int _l, int _m, int _F, int _E, int _prevj, int _v1, |
| int _v2) : |
| deps(_deps), l(_l), m(_m), F(_F), E(_E), prevj(_prevj), |
| vacancies1(_v1), vacancies2(_v2) { |
| } |
| friend ostream&print1(ostream&out, const m4_key&x, const WordClasses&wce, |
| const WordClasses&wcf) { |
| if (x.deps&DEP_MODEL_l) |
| out << "l: " << x.l<<' '; |
| if (x.deps&DEP_MODEL_m) |
| out << "m: " << x.m<<' '; |
| if (x.deps&DEP_MODEL_F) |
| out << "F: " << wcf.classString(x.F)<< ' '; |
| if (x.deps&DEP_MODEL_E) |
| out << "E: " << wce.classString(x.E)<< ' '; |
| // if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' '; |
| if (x.vacancies1!=-1) |
| out << "v1: " << x.vacancies1 << ' '; |
| if (x.vacancies2!=-1) |
| out << "v2: " << x.vacancies2 << ' '; |
| return out << '\n'; |
| } |
| |
| friend ostream&print1_m5(ostream&out, const m4_key&x, |
| const WordClasses&wce, const WordClasses&wcf) { |
| out << ((x.deps&DEP_MODEL_E) ? wce.classString(x.E) : string("0")) |
| << ' '; |
| out << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) : string("0")) |
| << ' '; |
| out << x.vacancies1 << ' '; |
| out << x.vacancies2 << ' '; |
| return out; |
| } |
| |
| friend ostream&printb1(ostream&out, const m4_key&x, const WordClasses&wce, |
| const WordClasses&wcf) { |
| if (x.deps&DEP_MODELb_l) |
| out << "l: " << x.l<<' '; |
| if (x.deps&DEP_MODELb_m) |
| out << "m: " << x.m<<' '; |
| if (x.deps&DEP_MODELb_F) |
| out << "F: " << wcf.classString(x.F) << ' '; |
| if (x.deps&DEP_MODELb_E) |
| out << "E: " << wce.classString(x.E) << ' '; |
| if (x.vacancies1!=-1) |
| out << "v1: " << x.vacancies1 << ' '; |
| if (x.vacancies2!=-1) |
| out << "v2: " << x.vacancies2 << ' '; |
| return out << '\n'; |
| } |
| friend ostream&printb1_m5(ostream&out, const m4_key&x, |
| const WordClasses&wcf) { |
| out << "-1 " << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) |
| : string("0"))<< ' '; |
| out << x.vacancies1 << ' '; |
| out << x.vacancies2 << ' '; |
| return out; |
| } |
| }; |
| |
| class compare1 { |
| private: |
| int deps; |
| public: |
| compare1(int _deps) : |
| deps(_deps) { |
| } |
| bool operator()(const m4_key&a, const m4_key&b) const { |
| if (deps&DEP_MODEL_l) { |
| if (a.l<b.l) |
| return 1; |
| if (b.l<a.l) |
| return 0; |
| } |
| if (deps&DEP_MODEL_m) { |
| if (a.m<b.m) |
| return 1; |
| if (b.m<a.m) |
| return 0; |
| } |
| if (deps&DEP_MODEL_F) { |
| if (a.F<b.F) |
| return 1; |
| if (b.F<a.F) |
| return 0; |
| } |
| if (deps&DEP_MODEL_E) { |
| if (a.E<b.E) |
| return 1; |
| if (b.E<a.E) |
| return 0; |
| } |
| //if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;} |
| if (a.vacancies1<b.vacancies1) |
| return 1; |
| if (b.vacancies1<a.vacancies1) |
| return 0; |
| if (a.vacancies2<b.vacancies2) |
| return 1; |
| if (b.vacancies2<a.vacancies2) |
| return 0; |
| return 0; |
| } |
| }; |
| |
| class compareb1 { |
| private: |
| int deps; |
| public: |
| compareb1(int _deps) : |
| deps(_deps) { |
| } |
| bool operator()(const m4_key&a, const m4_key&b) const { |
| if (deps&DEP_MODELb_l) { |
| if (a.l<b.l) |
| return 1; |
| if (b.l<a.l) |
| return 0; |
| } |
| if (deps&DEP_MODELb_m) { |
| if (a.m<b.m) |
| return 1; |
| if (b.m<a.m) |
| return 0; |
| } |
| if (deps&DEP_MODELb_F) { |
| if (a.F<b.F) |
| return 1; |
| if (b.F<a.F) |
| return 0; |
| } |
| if (deps&DEP_MODELb_E) { |
| if (a.E<b.E) |
| return 1; |
| if (b.E<a.E) |
| return 0; |
| } |
| //if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;} |
| if (a.vacancies1<b.vacancies1) |
| return 1; |
| if (b.vacancies1<a.vacancies1) |
| return 0; |
| if (a.vacancies2<b.vacancies2) |
| return 1; |
| if (b.vacancies2<a.vacancies2) |
| return 0; |
| return 0; |
| } |
| }; |
| |
| inline void tokenize(const string&in, Vector<string>&out) { |
| string s; |
| istrstream l(in.c_str()); |
| while (l>>s) |
| out.push_back(s); |
| } |
| |
| class d4model { |
| public: |
| typedef Vector<pair<COUNT,PROB> > Vpff; |
| map<m4_key,Vpff,compare1 > D1; |
| map<m4_key,Vpff,compareb1> Db1; |
| PositionIndex msl; |
| WordClasses* ewordclasses; |
| WordClasses* fwordclasses; |
| template<class MAPPER> void makeWordClasses(const MAPPER&m1, |
| const MAPPER&m2, string efile, string ffile, const vcbList& elist, |
| const vcbList& flist) { |
| ifstream estrm(efile.c_str()), fstrm(ffile.c_str()); |
| if ( !estrm) { |
| cerr << "ERROR: can not read " << efile << endl; |
| } else |
| ewordclasses->read(estrm, m1,elist); |
| if ( !fstrm) |
| cerr << "ERROR: can not read " << ffile << endl; |
| else |
| fwordclasses->read(fstrm, m2,flist); |
| } |
| d4model(PositionIndex _msl, WordClasses& e, WordClasses& f) : |
| D1(compare1(M4_Dependencies)), Db1(compareb1(M4_Dependencies)), |
| msl(_msl),ewordclasses(&e),fwordclasses(&f) { |
| } |
| |
| protected: |
| inline COUNT&getCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l, |
| int m) { |
| assert(j>=1); |
| m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1); |
| map<m4_key,Vpff,compare1 >::iterator p=D1.find(key); |
| if (p==D1.end()) |
| p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| assert(p!=D1.end()); |
| return (p->second)[j-j_cp+msl].first; |
| }; |
| |
| inline COUNT&getCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F, |
| int l, int m) { |
| assert(j>=1); |
| assert(j_prev>=1); |
| m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1); |
| map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key); |
| if (p==Db1.end()) |
| p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| assert(p!=Db1.end()); |
| return (p->second)[j-j_prev+msl].first; |
| }; |
| Mutex lock_f,lock_b; |
| public: |
| inline void augCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l, |
| int m, const COUNT& v){ |
| lock_f.lock(); |
| getCountRef_first(j,j_cp,E,F,l,m)+=v; |
| lock_f.unlock(); |
| } |
| |
| inline void augCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F, |
| int l, int m, const COUNT& v){ |
| lock_b.lock(); |
| getCountRef_bigger(j,j_prev,E,F,l,m)+=v; |
| lock_b.unlock(); |
| } |
| |
| |
| |
| void merge(d4model &d) { |
| map<m4_key,Vpff,compare1 >::iterator it; |
| for (it = d.D1.begin(); it!=d.D1.end(); it++) { |
| map<m4_key,Vpff,compare1 >::iterator p=D1.find(it->first); |
| if (p==D1.end()) |
| p=D1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| int i; |
| for (i=0; i<it->second.size(); i++) { |
| p->second[i].second+=it->second[i].second; |
| } |
| } |
| #ifdef WIN32 |
| map<m4_key,Vpff,compareb1 >::iterator it1; |
| for (it1 = d.Db1.begin(); it1!=d.Db1.end(); it1++) { |
| map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(it->first); |
| if (p==Db1.end()) |
| p=Db1.insert(make_pair(it1->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| int i; |
| for (i=0; i<it->second.size(); i++) { |
| p->second[i].second+=it1->second[i].second; |
| } |
| } |
| #else |
| for (it = d.Db1.begin(); it!=d.Db1.end(); it++) { |
| map<m4_key,Vpff,compare1 >::iterator p=Db1.find(it->first); |
| if (p==Db1.end()) |
| p=Db1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| int i; |
| for (i=0; i<it->second.size(); i++) { |
| p->second[i].second+=it->second[i].second; |
| } |
| } |
| #endif |
| } |
| |
| bool augCount(const char* fD1, const char* fDb) { |
| ifstream ifsd(fD1); |
| int deps; |
| int l; |
| int m; |
| int F; |
| int E; |
| int prevj; |
| int vacancies1, vacancies2; |
| int len; |
| double count; |
| if (!ifsd) { |
| cerr << "Failed in " << fD1 << endl; |
| return false; |
| } |
| { |
| while (ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1 |
| >>vacancies2>>len) { |
| m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1, |
| vacancies2); |
| map<m4_key,Vpff,compare1 >::iterator p=D1.find(key); |
| if (p==D1.end()) |
| p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| assert(p!=D1.end()); |
| int i; |
| for (i=0; i<len; i++) { |
| ifsd >> count; |
| p->second[i].first+=count; |
| } |
| |
| } |
| } |
| ifstream ifsd1(fDb); |
| if (!ifsd1) { |
| cerr << "Failed in " << fDb << endl; |
| return false; |
| } |
| { |
| while (ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1 |
| >>vacancies2>>len) { |
| m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1, |
| vacancies2); |
| map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key); |
| if (p==Db1.end()) |
| p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| assert(p!=D1.end()); |
| int i; |
| for (i=0; i<len; i++) { |
| ifsd1 >> count; |
| p->second[i].first+=count; |
| } |
| |
| } |
| } |
| return true; |
| } |
| |
| bool readProbTable(const char* fD1, const char* fDb){ |
| ifstream ifsd(fD1); |
| int deps; |
| int l; |
| int m; |
| int F; |
| int E; |
| int prevj; |
| int vacancies1,vacancies2; |
| int len; |
| double count; |
| if(!ifsd){ |
| cerr << "Failed in " << fD1 << endl; |
| return false; |
| } |
| { |
| while(ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){ |
| m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2); |
| map<m4_key,Vpff,compare1 >::iterator p=D1.find(key); |
| if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| assert(p!=D1.end()); |
| int i; |
| for(i=0;i<len;i++){ |
| ifsd >> count; |
| p->second[i].second=count; |
| } |
| |
| } |
| } |
| ifstream ifsd1(fDb); |
| if(!ifsd1){ |
| cerr << "Failed in " << fDb << endl; |
| return false; |
| } |
| { |
| while(ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){ |
| m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2); |
| map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key); |
| if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first; |
| assert(p!=D1.end()); |
| int i; |
| for(i=0;i<len;i++){ |
| ifsd1 >> count; |
| p->second[i].second=count; |
| } |
| |
| } |
| } |
| return true; |
| } |
| |
| |
| bool printProbTable(const char* fD1, const char* fDb) { |
| ofstream ofsd(fD1); |
| if (!ofsd.is_open()) { |
| return false; |
| } |
| { |
| map<m4_key,Vpff,compare1 >::iterator it; |
| for (it = D1.begin(); it!=D1.end(); it++) { |
| ofsd << it->first.deps << " " << it->first.l << " " |
| << it->first.m << " " << it->first.F << " " |
| << it->first.E << " " << it->first.prevj << " " |
| << it->first.vacancies1 << " " << it->first.vacancies2 |
| << " " << it->second.size() << " "; |
| int i; |
| for (i=0; i<it->second.size(); i++) { |
| ofsd << it->second[i].second << " "; |
| } |
| ofsd << endl; |
| } |
| |
| } |
| |
| ofstream ofsdb(fDb); |
| if (!ofsdb.is_open()) { |
| return false; |
| } |
| |
| map<m4_key,Vpff,compareb1 >::iterator it; |
| for (it = Db1.begin(); it!=Db1.end(); it++) { |
| ofsdb << it->first.deps << " " << it->first.l << " " << it->first.m |
| << " " << it->first.F << " " << it->first.E << " " |
| << it->first.prevj << " " << it->first.vacancies1 << " " |
| << it->first.vacancies2 << " " << it->second.size()<< endl; |
| int i; |
| for (i=0; i<it->second.size(); i++) { |
| ofsdb << it->second[i].second << " "; |
| } |
| ofsdb << endl; |
| } |
| return true; |
| } |
| |
| bool dumpCount(const char* fD1, const char* fDb){ |
| ofstream ofsd(fD1); |
| if(!ofsd.is_open()){ |
| return false; |
| } |
| { |
| map<m4_key,Vpff,compare1 >::iterator it; |
| for(it = D1.begin(); it!=D1.end();it++){ |
| ofsd << it->first.deps << " " |
| << it->first.l << " " |
| << it->first.m << " " |
| << it->first.F << " " |
| << it->first.E << " " |
| << it->first.prevj << " " |
| << it->first.vacancies1 << " " |
| << it->first.vacancies2 << " " |
| << it->second.size() << " "; |
| int i; |
| for(i=0;i<it->second.size();i++){ |
| ofsd << it->second[i].first << " "; |
| } |
| ofsd << endl; |
| } |
| |
| } |
| |
| ofstream ofsdb(fDb); |
| if(!ofsdb.is_open()){ |
| return false; |
| } |
| |
| map<m4_key,Vpff,compareb1 >::iterator it; |
| for(it = Db1.begin(); it!=Db1.end();it++){ |
| ofsdb << it->first.deps << " " |
| << it->first.l << " " |
| << it->first.m << " " |
| << it->first.F << " " |
| << it->first.E << " " |
| << it->first.prevj << " " |
| << it->first.vacancies1 << " " |
| << it->first.vacancies2 << " " |
| << it->second.size()<< endl; |
| int i; |
| for(i=0;i<it->second.size();i++){ |
| ofsdb << it->second[i].first << " "; |
| } |
| ofsdb << endl; |
| } |
| return true; |
| } |
| map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E, |
| int F, int l, int m) const { |
| return D1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1)); |
| } |
| PROB getProb_first_withiterator(WordIndex j, WordIndex j_cp, int m, |
| const map<m4_key,Vpff,compare1 >::const_iterator& p) const { |
| assert(j>=1); |
| //assert(j_cp>=0); |
| assert(j<=msl); |
| assert(j_cp<=msl); |
| if (p==D1.end()) { |
| return PROB_SMOOTH; |
| } else { |
| massert((p->second)[j-j_cp+msl].second<=1.0); |
| return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1 |
| -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second); |
| } |
| } |
| |
| PROB getProb_first(WordIndex j, WordIndex j_cp, int E, int F, int l, int m) const { |
| assert(j>=1); |
| //assert(j_cp>=0); |
| assert(j<=msl); |
| assert(j_cp<=msl); |
| m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1); |
| map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key); |
| if (p==D1.end()) { |
| return PROB_SMOOTH; |
| } else { |
| massert((p->second)[j-j_cp+msl].second<=1.0); |
| return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1 |
| -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second); |
| } |
| } |
| map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E, |
| int F, int l, int m) const { |
| return Db1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1)); |
| } |
| PROB getProb_bigger_withiterator(WordIndex j, WordIndex j_prev, int m, |
| const map<m4_key,Vpff,compareb1 >::const_iterator&p) const { |
| massert(j>=1); |
| massert(j_prev>=1); |
| massert(j>j_prev); |
| massert(j<=msl); |
| massert(j_prev<=msl); |
| if (p==Db1.end()) { |
| return PROB_SMOOTH; |
| } else { |
| massert((p->second)[j-j_prev+msl].second<=1.0 ); |
| return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1 |
| -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second); |
| } |
| } |
| |
| PROB getProb_bigger(WordIndex j, WordIndex j_prev, int E, int F, int l, |
| int m) const { |
| massert(j>=1); |
| massert(j_prev>=1); |
| massert(j>j_prev); |
| massert(j<=msl); |
| massert(j_prev<=msl); |
| m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1); |
| map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key); |
| if (p==Db1.end()) { |
| return PROB_SMOOTH; |
| } else { |
| massert((p->second)[j-j_prev+msl].second<=1.0 ); |
| return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1 |
| -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second); |
| } |
| } |
| |
| void normalizeTable() { |
| int nParams=0; |
| for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) { |
| Vpff&d1=i->second; |
| double sum=0.0; |
| for (PositionIndex i=0; i<d1.size(); i++) |
| sum+=d1[i].first; |
| for (PositionIndex i=0; i<d1.size(); i++) { |
| d1[i].second=sum ? (d1[i].first/sum) : (1.0/d1.size()); |
| nParams++; |
| } |
| } |
| for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) { |
| Vpff&db1=i->second; |
| double sum=0.0; |
| for (PositionIndex i=0; i<db1.size(); i++) |
| sum+=db1[i].first; |
| for (PositionIndex i=0; i<db1.size(); i++) { |
| db1[i].second=sum ? (db1[i].first/sum) : (1.0/db1.size()); |
| nParams++; |
| } |
| } |
| cout << "D4 table contains " << nParams << " parameters.\n"; |
| } |
| |
| void clear() { |
| for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) { |
| Vpff&d1=i->second; |
| for (PositionIndex i=0; i<d1.size(); i++) |
| d1[i].first=0.0; |
| } |
| for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) { |
| Vpff&db1=i->second; |
| for (PositionIndex i=0; i<db1.size(); i++) |
| db1[i].first=0.0; |
| } |
| } |
| |
| /*void printProbTable(const char*fname1,const char*fname2) |
| { |
| ofstream out(fname1); |
| double ssum=0.0; |
| out << "# Translation tables for Model 4 .\n"; |
| out << "# Table for head of cept.\n"; |
| for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i){ |
| const Vpff&d1=i->second; |
| double sum=0.0; |
| for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first; |
| if ( sum ){ |
| print1(out,i->first,ewordclasses,fwordclasses); |
| out << "SUM: " << sum << ' '<< '\n'; |
| for(unsigned ii=0;ii<d1.size();ii++) |
| if( d1[ii].first ) |
| out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n'; |
| out << endl; |
| } |
| ssum+=sum; |
| } |
| out << "# Table for non-head of cept.\n"; |
| for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) |
| { |
| const Vpff&db1=i->second; |
| double sum=0.0; |
| for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first; |
| if( sum ){ |
| printb1(out,i->first,ewordclasses,fwordclasses); |
| out << "SUM: " << sum << ' '<<'\n'; |
| for(unsigned ii=0;ii<db1.size();ii++) |
| if( db1[ii].first ) |
| { |
| out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n'; |
| } |
| out << endl; |
| } |
| ssum+=sum; |
| } |
| out << endl << "FULL-SUM: " << ssum << endl; |
| if( M4_Dependencies==76 ){ |
| ofstream out2(fname2); |
| for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i) |
| { |
| const Vpff&d1=i->second; |
| for(unsigned ii=0;ii<d1.size();ii++) |
| if( d1[ii].first ) |
| out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n'; |
| } |
| for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) { |
| const Vpff&db1=i->second; |
| for(unsigned ii=0;ii<db1.size();ii++) |
| if( db1[ii].first ) |
| out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n'; |
| } |
| } |
| }*/ |
| |
| bool readProbTable(const char *fname) { |
| cerr << "Reading D4Tables from " << fname << endl; |
| ifstream file(fname); |
| string line; |
| do { |
| getline(file, line); |
| } while (line.length()&&line[0]=='#'); |
| |
| do { |
| while (line.length()==0) |
| getline(file, line); |
| if (line[0]=='#') |
| break; |
| Vector<string> linestr; |
| tokenize(line, linestr); |
| m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1); |
| for (unsigned int i=0; i<linestr.size(); i+=2) { |
| if (linestr[i]=="l:") { |
| k.l=atoi(linestr[i+1].c_str()); |
| iassert(M4_Dependencies&DEP_MODEL_l); |
| } |
| if (linestr[i]=="m:") { |
| k.m=atoi(linestr[i+1].c_str()); |
| iassert(M4_Dependencies&DEP_MODEL_m); |
| } |
| if (linestr[i]=="F:") { |
| k.F=(*fwordclasses)(linestr[i+1]); |
| iassert(M4_Dependencies&DEP_MODEL_F); |
| } |
| if (linestr[i]=="E:") { |
| k.E=(*ewordclasses)(linestr[i+1]); |
| iassert(M4_Dependencies&DEP_MODEL_E); |
| } |
| //if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);} |
| } |
| string str; |
| double sum; |
| file >> str >> sum; |
| iassert(str=="SUM:"); |
| if (str!="SUM:") |
| cerr << "ERROR: string is " << str << " and not sum " << endl; |
| |
| do { |
| int value; |
| double count; |
| getline(file, line); |
| istrstream twonumbers(line.c_str()); |
| if (twonumbers >> value >> count) { |
| if (D1.count(k)==0) |
| D1.insert(make_pair(k, Vpff(msl*2+1, pair<COUNT, PROB>( |
| 0.0, 0.0)))); |
| D1[k][value+msl]=make_pair(count, count/sum); |
| } |
| } while (line.length()); |
| } while (file); |
| do { |
| getline(file, line); |
| } while (line.length()&&line[0]=='#'); |
| do { |
| while (line.length()==0) |
| getline(file, line); |
| if (line[0]=='#') |
| break; |
| Vector<string> linestr; |
| tokenize(line, linestr); |
| m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1); |
| bool sumRead=0; |
| for (unsigned int i=0; i<linestr.size(); i+=2) { |
| if (linestr[i]=="l:") { |
| k.l=atoi(linestr[i+1].c_str()); |
| iassert(M4_Dependencies&DEP_MODELb_l); |
| } else if (linestr[i]=="m:") { |
| k.m=atoi(linestr[i+1].c_str()); |
| iassert(M4_Dependencies&DEP_MODELb_m); |
| } else if (linestr[i]=="F:") { |
| k.F=(*fwordclasses)(linestr[i+1]); |
| iassert(M4_Dependencies&DEP_MODELb_F); |
| } else if (linestr[i]=="E:") { |
| k.E=(*ewordclasses)(linestr[i+1]); |
| iassert(M4_Dependencies&DEP_MODELb_E); |
| } else if (linestr[i]=="SUM:") { |
| cerr << "Warning: obviously no dependency.\n"; |
| sumRead=1; |
| } else if (linestr[i]=="FULL-SUM:") { |
| break; |
| } else { |
| cerr << "ERROR: error in reading d4 tables: " << linestr[i] |
| << ' ' << linestr[i+1] << endl; |
| } |
| } |
| string str; |
| double sum; |
| if (sumRead==0) |
| file >> str >> sum; |
| else { |
| str=linestr[0]; |
| sum=atof(linestr[1].c_str()); |
| } |
| if (str!="SUM:") |
| cerr << "ERROR: should read SUM but read " << str << endl; |
| do { |
| int value; |
| double count; |
| getline(file, line); |
| istrstream twonumbers(line.c_str()); |
| if (twonumbers >> value >> count) { |
| if (Db1.count(k)==0) |
| Db1.insert(make_pair(k, Vpff(msl*2+1, |
| pair<COUNT, PROB>(0.0, 0.0)))); |
| Db1[k][value+msl]=make_pair(count, count/sum); |
| } |
| } while (file&&line.length()); |
| } while (file); |
| return 1; |
| } |
| }; |
| |
| #endif |