| /* |
| |
| EGYPT Toolkit for Statistical Machine Translation |
| Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky. |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License |
| as published by the Free Software Foundation; either version 2 |
| of the License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
| USA. |
| |
| */ |
| #include "ATables.h" |
| #include "Globals.h" |
| #include "myassert.h" |
| #include "Parameter.h" |
| |
| GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional alignment table for IBM-2 and IBM-3",PARLEV_MODELS,1); |
| GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0); |
| |
| template <class VALTYPE> |
| void amodel<VALTYPE>::printTable(const char *filename) const{ |
| // print amodel to file with the name filename (it'll be created or overwritten |
| // format : for a table : |
| // aj j l m val |
| // where aj is source word pos, j target word pos, l source sentence length, |
| // m is target sentence length. |
| // |
| //return; |
| if (is_distortion) |
| cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n'; |
| else |
| cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n'; |
| |
| ofstream of(filename); |
| double ssum=0.0; |
| for(WordIndex l=0; l < MaxSentLength; l++){ |
| for(WordIndex m=0;m<MaxSentLength;m++){ |
| if( CompactADTable && l!=m ) |
| continue; |
| unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; |
| unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; |
| if( is_distortion==0 ){ |
| for(WordIndex j=1;j<=M; j++){ |
| double sum=0.0; |
| for(WordIndex i=0;i<=L; i++){ |
| VALTYPE x=getValue(i, j, L, M); |
| if( x>PROB_SMOOTH ){ |
| of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n'; |
| sum+=x; |
| } |
| } |
| ssum+=sum; |
| } |
| }else{ |
| for(WordIndex i=0;i<=L;i++){ |
| double sum=0.0; |
| for(WordIndex j=1;j<=M;j++){ |
| VALTYPE x=getValue(j, i, L, M); |
| if( x>PROB_SMOOTH ){ |
| of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n'; |
| sum+=x; |
| } |
| } |
| ssum+=sum; |
| } |
| } |
| } |
| } |
| } |
| |
| template <class VALTYPE> |
| void amodel<VALTYPE>::printRealTable(const char *filename) const{ |
| // print amodel to file with the name filename (it'll be created or overwritten |
| // format : for a table : |
| // aj j l m val |
| // where aj is source word pos, j target word pos, l source sentence length, |
| // m is target sentence length. |
| // |
| //return; |
| if (is_distortion) |
| cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n'; |
| else |
| cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n'; |
| |
| ofstream of(filename); |
| for(WordIndex l=0; l < MaxSentLength; l++){ |
| for(WordIndex m=0;m<MaxSentLength;m++){ |
| if( CompactADTable && l!=m ) |
| continue; |
| unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; |
| unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; |
| if( is_distortion==0 ){ |
| for(WordIndex j=1;j<=M; j++){ |
| for(WordIndex i=0;i<=L; i++){ |
| VALTYPE x=getValue(i, j, L, M); |
| if( x>MINCOUNTINCREASE ) |
| of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n'; |
| } |
| } |
| }else{ |
| for(WordIndex i=0;i<=L;i++){ |
| for(WordIndex j=1;j<=M;j++){ |
| VALTYPE x=getValue(j, i, L, M); |
| if( x>MINCOUNTINCREASE ) |
| of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n'; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| extern short NoEmptyWord; |
| |
| template <class VALTYPE> |
| bool amodel<VALTYPE>::readTable(const char *filename){ |
| /* This function reads the a table from a file. |
| Each line is of the format: aj j l m val |
| where aj is the source word position, j the target word position, |
| l the source sentence length, and m the target sentence length |
| |
| This function also works for a d table, where the positions |
| of aj and i are swapped. Both the a and d tables are 4 dimensional |
| hashes; this function will simply read in the four values and keep |
| them in that order when hashing the fifth value. |
| NAS, 7/11/99 |
| */ |
| ifstream inf(filename); |
| cout << "Reading a/d table from " << filename << "\n"; |
| if(!inf){ |
| cerr << "\nERROR: Cannot open " << filename<<"\n"; |
| return false; |
| } |
| WordIndex w, x, l, m; |
| VALTYPE prob; |
| while(inf >> w >> x >> l >> m >> prob ) |
| // the NULL word is added to the length |
| // of the sentence in the tables, but discount it when you write the tables. |
| setValue(w, x, l, m, prob); |
| return true; |
| } |
| |
| template <class VALTYPE> |
| bool amodel<VALTYPE>::readAugTable(const char *filename){ |
| /* This function reads the a table from a file. |
| Each line is of the format: aj j l m val |
| where aj is the source word position, j the target word position, |
| l the source sentence length, and m the target sentence length |
| |
| This function also works for a d table, where the positions |
| of aj and i are swapped. Both the a and d tables are 4 dimensional |
| hashes; this function will simply read in the four values and keep |
| them in that order when hashing the fifth value. |
| NAS, 7/11/99 |
| */ |
| ifstream inf(filename); |
| cout << "Reading a/d table from " << filename << "\n"; |
| if(!inf){ |
| cerr << "\nERROR: Cannot open " << filename<<"\n"; |
| return false; |
| } |
| WordIndex w, x, l, m; |
| VALTYPE prob; |
| while(inf >> w >> x >> l >> m >> prob ) |
| // the NULL word is added to the length |
| // of the sentence in the tables, but discount it when you write the tables. |
| addValue(w, x, l, m, prob); |
| return true; |
| } |
| |
| template <class VALTYPE> |
| bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){ |
| cout << "start merging " <<"\n"; |
| for(WordIndex l=0; l < MaxSentLength; l++){ |
| for(WordIndex m=0;m<MaxSentLength;m++){ |
| if( CompactADTable && l!=m ) |
| continue; |
| unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; |
| unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; |
| if( is_distortion==0 ){ |
| for(WordIndex j=1;j<=M; j++){ |
| for(WordIndex i=0;i<=L; i++){ |
| VALTYPE x=am.getValue(i, j, L, M); |
| addValue(i,j,L,M,x); |
| } |
| } |
| }else{ |
| for(WordIndex i=0;i<=L;i++){ |
| for(WordIndex j=1;j<=M;j++){ |
| VALTYPE x=am.getValue(j, i, L, M); |
| addValue(j,i,L,M,x); |
| } |
| } |
| } |
| } |
| } |
| return true; |
| } |
| |
| |
| template class amodel<COUNT> ; |
| //template class amodel<PROB> ; |