| /* |
| |
| Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI) |
| |
| This file is part of GIZA++ ( extension of GIZA ). |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License |
| as published by the Free Software Foundation; either version 2 |
| of the License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
| USA. |
| |
| */ |
| #ifndef WordClasses_h_DEFINED |
| #define WordClasses_h_DEFINED |
| #include <map> |
| #include <string> |
| #include <set> |
| #include "vocab.h" |
| |
| class WordClasses |
| { |
| private: |
| map<string,string> Sw2c; |
| map<string,int> Sc2int; |
| Vector<string> Sint2c; |
| Vector<int> w2c; |
| unsigned int classes; |
| public: |
| WordClasses() |
| : classes(1) |
| { |
| Sint2c.push_back("0"); |
| Sc2int["0"]=0; |
| } |
| template<class MAPPER> bool read(istream&in,const MAPPER&m,const vcbList& vcb) |
| { |
| string sline; |
| int maxword=0; |
| int readWord=0, putWord=0; |
| while(getline(in,sline)) |
| { |
| readWord ++; |
| string word,wclass; |
| istrstream iline(sline.c_str()); |
| iline>>word>>wclass; |
| |
| if( !Sc2int.count(wclass) ) |
| { |
| Sc2int[wclass]=classes++; |
| Sint2c.push_back(wclass); |
| assert(classes==Sint2c.size()); |
| } |
| if(vcb.has_word(word)){ |
| maxword=max(m(word),maxword); |
| assert(Sw2c.count(word)==0); |
| Sw2c[word]=wclass; |
| putWord++; |
| } |
| } |
| w2c=Vector<int>(maxword+1,0); |
| for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i) |
| w2c[m(i->first)]=Sc2int[i->second]; |
| cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl; |
| cout << "Actual number of read words: " << readWord << " stored words: " << putWord << endl; |
| return 1; |
| } |
| int getClass(int w)const |
| { |
| if(w>=0&&int(w)<int(w2c.size()) ) |
| return w2c[w]; |
| else |
| return 0; |
| } |
| int operator()(const string&x)const |
| { |
| if( Sc2int.count(x) ) |
| return Sc2int.find(x)->second; |
| else |
| { |
| cerr << "WARNING: class " << x << " not found.\n"; |
| return 0; |
| } |
| } |
| string classString(unsigned int cnr)const |
| { |
| if( cnr<Sint2c.size()) |
| return Sint2c[cnr]; |
| else |
| return string("0"); |
| } |
| }; |
| |
| #endif |