src/giza-pp/GIZA++-v2/ForwardBackward.cpp - joshua - Git at Google

 /*

 Copyright (C) 1999,2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)

 This file is part of GIZA++ ( extension of GIZA ).

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 USA.

 */
 #ifndef NO_TRAINING
 #include "ForwardBackward.h"
 #include "Globals.h"
 #include "myassert.h"
 #include "HMMTables.h"
 #include "mymath.h"


 double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
   const int I=net.size1(),J=net.size2(),N=I*J;
   Array<double> alpha(N,0),beta(N,0),sum(J);
   for(int i=0;i<I;i++)
     beta[N-I+i]=net.getBetainit(i);
   double * cur_beta=conv<double>(beta.begin())+N-I-1;
   for(int j=J-2;j>=0;--j)
     for(int ti=I-1;ti>=0;--ti,--cur_beta) {
       const double *next_beta=conv<double>(beta.begin())+(j+1)*I;
       const double *alprob=&net.outProb(j,ti,0),*next_node=&net.nodeProb(0,j+1);
       for(int ni=0;ni<I;++ni,(next_node+=J)){
 	massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
 	massert(next_node == &net.nodeProb(ni,j+1));
 	/*	if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
 	  cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
 	  << next_beta-beta.begin() << ") alprob:" << (*alprob) << "  lexprob:" << (*next_node) << endl;*/
 	(*cur_beta)+=(*next_beta++)*(*alprob++)*(*next_node);
       }
     }
   for(int i=0;i<I;i++)
     alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
   double* cur_alpha=conv<double>(alpha.begin())+I;
   cur_beta=conv<double>(beta.begin())+I;
   for(int j=1;j<J;j++){
     Array2<double>&e=E[ (E.size()==1)?0:(j-1) ];
     if( (E.size()!=1) || j==1 )
       {
 	e.resize(I,I);
 	fill(e.begin(),e.end(),0.0);
       }

     for(int ti=0;ti<I;++ti,++cur_alpha,++cur_beta) {
       const double * prev_alpha=conv<double>(alpha.begin())+I*(j-1);
       double *cur_e= &e(ti,0);
       double this_node=net.nodeProb(ti,j);
       const double* alprob= &net.outProb(j-1,0,ti);
       for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
 	massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
 	massert(&e(ti,pi)==cur_e);
 	const double alpha_increment= *prev_alpha*(*alprob)*this_node;
 	(*cur_alpha)+=alpha_increment;
 	(*cur_e++)+=alpha_increment*(*cur_beta);
       }
     }
   }
   g.resize(N);
   transform(alpha.begin(),alpha.end(),beta.begin(),g.begin(),multiplies<double>());
   double bsum=0,esum=0,esum2;
   for(int i=0;i<I;i++)
     bsum+=beta[i]*net.nodeProb(i,0)*net.getAlphainit(i);
   for(unsigned int j=0;j<(unsigned int)E.size();j++)
     {
       Array2<double>&e=E[j];
       const double *epe=e.end();
       for(const double*ep=e.begin();ep!=epe;++ep)
 	esum+=*ep;
     }
   if( J>1 )
     esum2=esum/(J-1);
   else
     esum2=0.0;
   if(!(esum2==0.0||mfabs(esum2-bsum)/bsum<1e-3*I))
     cout << "ERROR2: " << esum2 <<" " <<bsum << " " << esum << net << endl;
   double * sumptr=conv<double>(sum.begin());
   double* ge=conv<double>(g.end());
   for(double* gp=conv<double>(g.begin());gp!=ge;gp+=I)
     {
       *sumptr++=normalize_if_possible(gp,gp+I);
       if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
 	cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
     }
   for(unsigned int j=0;j<(unsigned int)E.size();j++)
     {
       Array2<double>&e=E[j];
       double* epe=e.end();
       if( esum )
 	for(double*ep=e.begin();ep!=epe;++ep)
 	  *ep/=esum;
       else
 	for(double*ep=e.begin();ep!=epe;++ep)
 	  *ep/=1.0/(max(I*I,I*I*(J-1)));
     }
   if( sum.size() )
     return sum[0];
   else
     return 1.0;
 }
 void HMMViterbi(const HMMNetwork&net,Array<int>&vit) {
   const int I=net.size1(),J=net.size2();
   vit.resize(J);
   Array<double>g;
   Array<Array2<double> >e(1);
   ForwardBackwardTraining(net,g,e);
   for(int j=0;j<J;j++) {
     double * begin=conv<double>(g.begin())+I*j;
     vit[j]=max_element(begin,begin+I)-begin;
   }
 }
 void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit) {
   const int I=net.size1(),J=net.size2();
   vit.resize(J);
   for(int j=0;j<J;j++) {
     double* begin=conv<double>(g.begin())+I*j;
     vit[j]=max_element(begin,begin+I)-begin;
   }
 }

 double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bool verbose){
   const int I=net.size1(),J=net.size2(),N=I*J;
   Array<double> alpha(N,-1);
   Array<double*> bp(N,(double*)0);
   vitar.resize(J);
   if( J==0 )
     return 1.0;
   for(int i=0;i<I;i++)
     {
       alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
       if( i>I/2 )
 	alpha[i]=0; // only first empty word can be chosen
       bp[i]=0;
     }
   double *cur_alpha=conv<double>(alpha.begin())+I;
   double **cur_bp=conv<double*>(bp.begin())+I;
   for(int j=1;j<J;j++)
     {
       if( pegj+1==j)
 	for(int ti=0;ti<I;ti++)
 	  if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
 	    (cur_alpha-I)[ti]=0.0;
       for(int ti=0;ti<I;++ti,++cur_alpha,++cur_bp) {
 	double* prev_alpha=conv<double>(alpha.begin())+I*(j-1);
 	double this_node=net.nodeProb(ti,j);
 	const double *alprob= &net.outProb(j-1,0,ti);
 	for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
 	  massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
 	  const double alpha_increment= *prev_alpha*(*alprob)*this_node;
 	  if( alpha_increment> *cur_alpha )
 	    {
 	      (*cur_alpha)=alpha_increment;
 	      (*cur_bp)=prev_alpha;
 	    }
 	}
       }
     }
   for(int i=0;i<I;i++)
     alpha[N-I+i]*=net.getBetainit(i);
   if( pegj==J-1)
     for(int ti=0;ti<I;ti++)
       if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
 	(alpha)[N-I+ti]=0.0;

   int j=J-1;
   cur_alpha=conv<double>(alpha.begin())+j*I;
   vitar[J-1]=max_element(cur_alpha,cur_alpha+I)-cur_alpha;
   double ret= *max_element(cur_alpha,cur_alpha+I);
   while(bp[vitar[j]+j*I])
     {
       cur_alpha-=I;
       vitar[j-1]=bp[vitar[j]+j*I]-cur_alpha;
       massert(vitar[j-1]<I&&vitar[j-1]>=0);
       j--;
     }
   massert(j==0);
   if( verbose )
     {
       cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
       for(int j=0;j<J;j++)
 	cout << "NP " << net.nodeProb(vitar[j],j) << ' ' << "AP " << ((j==0)?net.getAlphainit(vitar[j]):net.outProb(j-1,vitar[j-1],vitar[j])) << " j:" << j << " i:" << vitar[j] << ";  ";
       cout << endl;
     }
   return ret;
 }

 double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
   Array<int> vitar;
   double ret=HMMRealViterbi(net,vitar);
   const int I=net.size1(),J=net.size2();
   if( E.size()==1 )
     {
       Array2<double>&e=E[0];
       e.resize(I,I);
       g.resize(I*J);
       fill(g.begin(),g.end(),0.0);
       fill(e.begin(),e.end(),0.0);
       for(int i=0;i<J;++i)
 	{
 	  g[i*I+vitar[i]]=1.0;
 	  if( i>0 )
 	    e(vitar[i],vitar[i-1])++;
 	}
     }
   else
     {
       g.resize(I*J);
       fill(g.begin(),g.end(),0.0);
       for(int i=0;i<J;++i)
 	{
 	  g[i*I+vitar[i]]=1.0;
 	  if( i>0 )
 	    {
 	      Array2<double>&e=E[i-1];
 	      e.resize(I,I);
 	      fill(e.begin(),e.end(),0.0);
 	      e(vitar[i],vitar[i-1])++;
 	    }
 	}
     }
   return ret;
 }

 #endif
	/*

	Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)

	This file is part of GIZA++ ( extension of GIZA ).

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
	USA.

	*/
	#ifndef NO_TRAINING
	#include "ForwardBackward.h"
	#include "Globals.h"
	#include "myassert.h"
	#include "HMMTables.h"
	#include "mymath.h"


	double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
	const int I=net.size1(),J=net.size2(),N=I*J;
	Array<double> alpha(N,0),beta(N,0),sum(J);
	for(int i=0;i<I;i++)
	beta[N-I+i]=net.getBetainit(i);
	double * cur_beta=conv<double>(beta.begin())+N-I-1;
	for(int j=J-2;j>=0;--j)
	for(int ti=I-1;ti>=0;--ti,--cur_beta) {
	const double next_beta=conv<double>(beta.begin())+(j+1)I;
	const double alprob=&net.outProb(j,ti,0),next_node=&net.nodeProb(0,j+1);
	for(int ni=0;ni<I;++ni,(next_node+=J)){
	massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
	massert(next_node == &net.nodeProb(ni,j+1));
	/* if( VERB&&(next_beta)(alprob)(*next_node) )
	cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
	<< next_beta-beta.begin() << ") alprob:" << (alprob) << " lexprob:" << (next_node) << endl;*/
	(cur_beta)+=(next_beta++)(alprob++)(next_node);
	}
	}
	for(int i=0;i<I;i++)
	alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
	double* cur_alpha=conv<double>(alpha.begin())+I;
	cur_beta=conv<double>(beta.begin())+I;
	for(int j=1;j<J;j++){
	Array2<double>&e=E[ (E.size()==1)?0:(j-1) ];
	if( (E.size()!=1) \|\| j==1 )
	{
	e.resize(I,I);
	fill(e.begin(),e.end(),0.0);
	}

	for(int ti=0;ti<I;++ti,++cur_alpha,++cur_beta) {
	const double * prev_alpha=conv<double>(alpha.begin())+I*(j-1);
	double *cur_e= &e(ti,0);
	double this_node=net.nodeProb(ti,j);
	const double* alprob= &net.outProb(j-1,0,ti);
	for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
	massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
	massert(&e(ti,pi)==cur_e);
	const double alpha_increment= prev_alpha(alprob)this_node;
	(*cur_alpha)+=alpha_increment;
	(cur_e++)+=alpha_increment(*cur_beta);
	}
	}
	}
	g.resize(N);
	transform(alpha.begin(),alpha.end(),beta.begin(),g.begin(),multiplies<double>());
	double bsum=0,esum=0,esum2;
	for(int i=0;i<I;i++)
	bsum+=beta[i]net.nodeProb(i,0)net.getAlphainit(i);
	for(unsigned int j=0;j<(unsigned int)E.size();j++)
	{
	Array2<double>&e=E[j];
	const double *epe=e.end();
	for(const double*ep=e.begin();ep!=epe;++ep)
	esum+=*ep;
	}
	if( J>1 )
	esum2=esum/(J-1);
	else
	esum2=0.0;
	if(!(esum2==0.0\|\|mfabs(esum2-bsum)/bsum<1e-3*I))
	cout << "ERROR2: " << esum2 <<" " <<bsum << " " << esum << net << endl;
	double * sumptr=conv<double>(sum.begin());
	double* ge=conv<double>(g.end());
	for(double* gp=conv<double>(g.begin());gp!=ge;gp+=I)
	{
	*sumptr++=normalize_if_possible(gp,gp+I);
	if(bsum && !(mfabs(((sumptr-1)-bsum)/bsum)<1e-3I))
	cout << "ERROR: " << (sumptr-1) << " " << bsum << " " << mfabs(((sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
	}
	for(unsigned int j=0;j<(unsigned int)E.size();j++)
	{
	Array2<double>&e=E[j];
	double* epe=e.end();
	if( esum )
	for(double*ep=e.begin();ep!=epe;++ep)
	*ep/=esum;
	else
	for(double*ep=e.begin();ep!=epe;++ep)
	ep/=1.0/(max(II,II(J-1)));
	}
	if( sum.size() )
	return sum[0];
	else
	return 1.0;
	}
	void HMMViterbi(const HMMNetwork&net,Array<int>&vit) {
	const int I=net.size1(),J=net.size2();
	vit.resize(J);
	Array<double>g;
	Array<Array2<double> >e(1);
	ForwardBackwardTraining(net,g,e);
	for(int j=0;j<J;j++) {
	double * begin=conv<double>(g.begin())+I*j;
	vit[j]=max_element(begin,begin+I)-begin;
	}
	}
	void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit) {
	const int I=net.size1(),J=net.size2();
	vit.resize(J);
	for(int j=0;j<J;j++) {
	double* begin=conv<double>(g.begin())+I*j;
	vit[j]=max_element(begin,begin+I)-begin;
	}
	}

	double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bool verbose){
	const int I=net.size1(),J=net.size2(),N=I*J;
	Array<double> alpha(N,-1);
	Array<double> bp(N,(double)0);
	vitar.resize(J);
	if( J==0 )
	return 1.0;
	for(int i=0;i<I;i++)
	{
	alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
	if( i>I/2 )
	alpha[i]=0; // only first empty word can be chosen
	bp[i]=0;
	}
	double *cur_alpha=conv<double>(alpha.begin())+I;
	double *cur_bp=conv<double>(bp.begin())+I;
	for(int j=1;j<J;j++)
	{
	if( pegj+1==j)
	for(int ti=0;ti<I;ti++)
	if( (pegi!=-1&&ti!=pegi)\|\|(pegi==-1&&ti<I/2) )
	(cur_alpha-I)[ti]=0.0;
	for(int ti=0;ti<I;++ti,++cur_alpha,++cur_bp) {
	double* prev_alpha=conv<double>(alpha.begin())+I*(j-1);
	double this_node=net.nodeProb(ti,j);
	const double *alprob= &net.outProb(j-1,0,ti);
	for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
	massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
	const double alpha_increment= prev_alpha(alprob)this_node;
	if( alpha_increment> *cur_alpha )
	{
	(*cur_alpha)=alpha_increment;
	(*cur_bp)=prev_alpha;
	}
	}
	}
	}
	for(int i=0;i<I;i++)
	alpha[N-I+i]*=net.getBetainit(i);
	if( pegj==J-1)
	for(int ti=0;ti<I;ti++)
	if( (pegi!=-1&&ti!=pegi)\|\|(pegi==-1&&ti<I/2) )
	(alpha)[N-I+ti]=0.0;

	int j=J-1;
	cur_alpha=conv<double>(alpha.begin())+j*I;
	vitar[J-1]=max_element(cur_alpha,cur_alpha+I)-cur_alpha;
	double ret= *max_element(cur_alpha,cur_alpha+I);
	while(bp[vitar[j]+j*I])
	{
	cur_alpha-=I;
	vitar[j-1]=bp[vitar[j]+j*I]-cur_alpha;
	massert(vitar[j-1]<I&&vitar[j-1]>=0);
	j--;
	}
	massert(j==0);
	if( verbose )
	{
	cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
	for(int j=0;j<J;j++)
	cout << "NP " << net.nodeProb(vitar[j],j) << ' ' << "AP " << ((j==0)?net.getAlphainit(vitar[j]):net.outProb(j-1,vitar[j-1],vitar[j])) << " j:" << j << " i:" << vitar[j] << "; ";
	cout << endl;
	}
	return ret;
	}

	double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
	Array<int> vitar;
	double ret=HMMRealViterbi(net,vitar);
	const int I=net.size1(),J=net.size2();
	if( E.size()==1 )
	{
	Array2<double>&e=E[0];
	e.resize(I,I);
	g.resize(I*J);
	fill(g.begin(),g.end(),0.0);
	fill(e.begin(),e.end(),0.0);
	for(int i=0;i<J;++i)
	{
	g[i*I+vitar[i]]=1.0;
	if( i>0 )
	e(vitar[i],vitar[i-1])++;
	}
	}
	else
	{
	g.resize(I*J);
	fill(g.begin(),g.end(),0.0);
	for(int i=0;i<J;++i)
	{
	g[i*I+vitar[i]]=1.0;
	if( i>0 )
	{
	Array2<double>&e=E[i-1];
	e.resize(I,I);
	fill(e.begin(),e.end(),0.0);
	e(vitar[i],vitar[i-1])++;
	}
	}
	}
	return ret;
	}

	#endif