src/giza-pp/GIZA++-v2/D4Tables.h - joshua - Git at Google

 /*

 Copyright (C) 1999,2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)

 This file is part of GIZA++ ( extension of GIZA ).

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 USA.

 */
 #ifndef _d4tables_h_define
 #define _d4tables_h_define
 #include <math.h>
 #include "WordClasses.h"
 #include "Globals.h"
 #include "myassert.h"

 extern float d4modelsmooth_factor;

 class m4_key
 {
  public:
   int deps;
   int l;
   int m;
   int F;
   int E;
   int prevj;
   int vacancies1,vacancies2;
   m4_key(int _deps,int _l,int _m,int _F,int _E,int _prevj,int _v1,int _v2)
     : deps(_deps),l(_l),m(_m),F(_F),E(_E),prevj(_prevj),vacancies1(_v1),vacancies2(_v2) {}
   friend ostream&print1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
     {
       if(x.deps&DEP_MODEL_l)out << "l: " << x.l<<' ';
       if(x.deps&DEP_MODEL_m)out << "m: " << x.m<<' ';
       if(x.deps&DEP_MODEL_F)out << "F: " << wcf.classString(x.F)<< ' ';
       if(x.deps&DEP_MODEL_E)out << "E: " << wce.classString(x.E)<< ' ';
       //      if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
       if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
       if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
       return out << '\n';
     }
   friend ostream&print1_m5(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
     {
       out << ((x.deps&DEP_MODEL_E)?wce.classString(x.E):string("0"))<< ' ';
       out << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
       out << x.vacancies1 << ' ';
       out << x.vacancies2 << ' ';
       return out;
     }
   friend ostream&printb1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
     {
       if(x.deps&DEP_MODELb_l)out << "l: " << x.l<<' ';
       if(x.deps&DEP_MODELb_m)out << "m: " << x.m<<' ';
       if(x.deps&DEP_MODELb_F)out << "F: " << wcf.classString(x.F) << ' ';
       if(x.deps&DEP_MODELb_E)out << "E: " << wce.classString(x.E) << ' ';
       if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
       if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
       return out << '\n';
     }
   friend ostream&printb1_m5(ostream&out,const m4_key&x,const WordClasses&wcf)
     {
       out << "-1 " << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
       out << x.vacancies1 << ' ';
       out << x.vacancies2 << ' ';
       return out;
     }
 };

 class compare1
 {
  private:
   int deps;
  public:
   compare1(int _deps) : deps(_deps) {}
   bool operator()(const m4_key&a,const m4_key&b)const
     {
       if(deps&DEP_MODEL_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
       if(deps&DEP_MODEL_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
       if(deps&DEP_MODEL_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
       if(deps&DEP_MODEL_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
       //if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
       if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
       if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
       return 0;
     }
 };

 class compareb1
 {
  private:
   int deps;
  public:
   compareb1(int _deps) : deps(_deps) {}
   bool operator()(const m4_key&a,const m4_key&b)const
     {
       if(deps&DEP_MODELb_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
       if(deps&DEP_MODELb_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
       if(deps&DEP_MODELb_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
       if(deps&DEP_MODELb_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
       //if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
       if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
       if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
       return 0;
     }
 };

 inline void tokenize(const string&in,Vector<string>&out)
 {
   string s;
   istringstream l(in);
   while(l>>s)
     out.push_back(s);
 }

 class d4model
 {
  public:
   typedef Vector<pair<COUNT,PROB> > Vpff;
   map<m4_key,Vpff,compare1 > D1;
   map<m4_key,Vpff,compareb1> Db1;
   PositionIndex msl;
   WordClasses ewordclasses;
   WordClasses fwordclasses;
   template<class MAPPER>
   void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile)
     {
       ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
       if( !estrm )
 	{
 	  cerr << "ERROR: can not read " << efile << endl;
 	}
       else
 	ewordclasses.read(estrm,m1);
       if( !fstrm )
 	cerr << "ERROR: can not read " << ffile << endl;
       else
 	fwordclasses.read(fstrm,m2);
     }
   d4model(PositionIndex _msl)
     : D1(compare1(M4_Dependencies)),Db1(compareb1(M4_Dependencies)),msl(_msl)
     {}
   COUNT&getCountRef_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)
     {
       assert(j>=1);
       m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
       map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
       if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
       assert(p!=D1.end());
       return (p->second)[j-j_cp+msl].first;
     }
   COUNT&getCountRef_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)
     {
       assert(j>=1);
       assert(j_prev>=1);
       m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
       map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
       if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
       assert(p!=Db1.end());
       return (p->second)[j-j_prev+msl].first;
     }
   map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,int F,int l,int m)const
     {return D1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));}
   PROB getProb_first_withiterator(WordIndex j,WordIndex j_cp,int m,const map<m4_key,Vpff,compare1 >::const_iterator& p)const
     {
       assert(j>=1);//assert(j_cp>=0);
       assert(j<=msl);assert(j_cp<=msl);
       if(p==D1.end())
 	{
 	return PROB_SMOOTH;
 	}
       else
 	{
 	  massert((p->second)[j-j_cp+msl].second<=1.0);
 	  return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
 	}
     }
   PROB getProb_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)const
     {
       assert(j>=1);//assert(j_cp>=0);
       assert(j<=msl);assert(j_cp<=msl);
       m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
       map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
       if(p==D1.end())
 	{
 	return PROB_SMOOTH;
 	}
       else
 	{
 	  massert((p->second)[j-j_cp+msl].second<=1.0);
 	  return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
 	}
     }
   map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,int F,int l,int m)const
     {
       return Db1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));
     }
   PROB getProb_bigger_withiterator(WordIndex j,WordIndex j_prev,int m,const map<m4_key,Vpff,compareb1 >::const_iterator&p)const
     {
       massert(j>=1);massert(j_prev>=1);
       massert(j>j_prev);
       massert(j<=msl);massert(j_prev<=msl);
       if(p==Db1.end())
 	{
 	  return PROB_SMOOTH;
 	}
       else
 	{
 	  massert((p->second)[j-j_prev+msl].second<=1.0 );
 	  return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
 	}
     }

   PROB getProb_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)const
     {
       massert(j>=1);massert(j_prev>=1);
       massert(j>j_prev);
       massert(j<=msl);massert(j_prev<=msl);
       m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
       map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
       if(p==Db1.end())
 	{
 	  return PROB_SMOOTH;
 	}
       else
 	{
 	  massert((p->second)[j-j_prev+msl].second<=1.0 );
 	  return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
 	}
     }
   void normalizeTable()
     {
       int nParams=0;
       for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
 	{
 	  Vpff&d1=i->second;
 	  double sum=0.0;
 	  for(PositionIndex i=0;i<d1.size();i++)
 	    sum+=d1[i].first;
 	  for(PositionIndex i=0;i<d1.size();i++)
 	    {
 	      d1[i].second=sum?(d1[i].first/sum):(1.0/d1.size());
 	      nParams++;
 	    }
 	}
       for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
 	{
 	  Vpff&db1=i->second;
 	  double sum=0.0;
 	  for(PositionIndex i=0;i<db1.size();i++)
 	    sum+=db1[i].first;
 	  for(PositionIndex i=0;i<db1.size();i++)
 	    {
 	      db1[i].second=sum?(db1[i].first/sum):(1.0/db1.size());
 	      nParams++;
 	    }
 	}
       cout << "D4 table contains " << nParams << " parameters.\n";
     }
    void clear()
     {
       for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
 	{
 	  Vpff&d1=i->second;
 	  for(PositionIndex i=0;i<d1.size();i++)
 	    d1[i].first=0.0;
 	}
       for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
 	{
 	  Vpff&db1=i->second;
 	  for(PositionIndex i=0;i<db1.size();i++)
 	    db1[i].first=0.0;
 	}
     }

   void printProbTable(const char*fname1,const char*fname2)
     {
       ofstream out(fname1);
       double ssum=0.0;
       out << "# Translation tables for Model 4 .\n";
       out << "# Table for head of cept.\n";
       for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
 	{
 	  const Vpff&d1=i->second;
 	  double sum=0.0;
 	  for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
 	  if ( sum )
 	    {
 	      print1(out,i->first,ewordclasses,fwordclasses);
 	      out << "SUM: " << sum << ' '<< '\n';
 	      for(unsigned ii=0;ii<d1.size();ii++)
 		if( d1[ii].first )
 		  out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
 	      out << endl;
 	    }
 	  ssum+=sum;
 	}
       out << "# Table for non-head of cept.\n";
       for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
 	{
 	  const Vpff&db1=i->second;
 	  double sum=0.0;
 	  for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
 	  if( sum )
 	    {
 	      printb1(out,i->first,ewordclasses,fwordclasses);
 	      out << "SUM: " << sum << ' '<<'\n';
 	      for(unsigned ii=0;ii<db1.size();ii++)
 		if( db1[ii].first )
 		  {
 		    out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
 		  }
 	      out << endl;
 	    }
 	  ssum+=sum;
 	}
       out << endl << "FULL-SUM: " << ssum << endl;
       if( M4_Dependencies==76 )
 	{
 	  ofstream out2(fname2);
 	  for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
 	    {
 	      const Vpff&d1=i->second;
 	      for(unsigned ii=0;ii<d1.size();ii++)
 		if( d1[ii].first )
 		  out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
 	    }
 	  for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
 	    {
 	      const Vpff&db1=i->second;
 	      for(unsigned ii=0;ii<db1.size();ii++)
 		if( db1[ii].first )
 		  out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
 	    }
 	}
     }
   bool readProbTable(const char *fname)
     {
       cerr << "Reading D4Tables from " << fname << endl;
       ifstream file(fname);
       string line;
       do
 	{
 	  getline(file,line);
 	} while(line.length()&&line[0]=='#');

       do
 	{
 	  while(line.length()==0)
 	    getline(file,line);
 	  if( line[0]=='#')
 	    break;
 	  Vector<string> linestr;
 	  tokenize(line,linestr);
 	  m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
 	  for(unsigned int i=0;i<linestr.size();i+=2)
 	    {
 	      if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_l);}
 	      if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_m);}
 	      if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_F);}
 	      if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_E);}
 	      //if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
 	    }
 	  string str;
 	  double sum;
 	  file >> str >> sum;
 	  iassert(str=="SUM:");
 	  if( str!="SUM:")
 	    cerr << "ERROR: string is " << str << " and not sum " << endl;

 	  do
 	    {
 	      int value;
 	      double count;
 	      getline(file,line);
 	      istringstream twonumbers(line);
 	      if(twonumbers >> value >> count)
 		{
 		  if( D1.count(k)==0 )
 		    D1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
 		  D1[k][value+msl]=make_pair(count,count/sum);
 		}
 	    }while(line.length());
 	}while(file);
       do
 	{
 	  getline(file,line);
 	} while(line.length()&&line[0]=='#');
       do
 	{
 	  while(line.length()==0)
 	    getline(file,line);
 	  if( line[0]=='#')
 	    break;
 	  Vector<string> linestr;
 	  tokenize(line,linestr);
 	  m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
 	  bool sumRead=0;
 	  for(unsigned int i=0;i<linestr.size();i+=2)
 	    {
 	      if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_l);}
 	      else if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_m);}
 	      else if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_F);}
 	      else if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_E);}
 	      else if( linestr[i]=="SUM:" )
 		{
 		  cerr << "Warning: obviously no dependency.\n";
 		  sumRead=1;
 		}
 	      else if( linestr[i]=="FULL-SUM:" )
 		{
 		  break;
 		}
 	      else
 		{
 		  cerr << "ERROR: error in reading d4 tables: " << linestr[i] << ' ' << linestr[i+1] << endl;
 		}
 	    }
 	  string str;
 	  double sum;
 	  if( sumRead==0 )
 	    file >> str >> sum;
 	  else
 	    {
 	      str=linestr[0];
 	      sum=atof(linestr[1].c_str());
 	    }
 	  if( str!="SUM:" )
 	    cerr << "ERROR: should read SUM but read " << str << endl;
 	  do
 	    {
 	      int value;
 	      double count;
 	      getline(file,line);
 	      istringstream twonumbers(line);
 	      if(twonumbers >> value >> count)
 		{
 		  if( Db1.count(k)==0 )
 		    Db1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
 		  Db1[k][value+msl]=make_pair(count,count/sum);
 		}
 	    }while(file&&line.length());
 	}while(file);
       return 1;
     }
 };

 #endif
	/*

	Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)

	This file is part of GIZA++ ( extension of GIZA ).

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
	USA.

	*/
	#ifndef _d4tables_h_define
	#define _d4tables_h_define
	#include <math.h>
	#include "WordClasses.h"
	#include "Globals.h"
	#include "myassert.h"

	extern float d4modelsmooth_factor;

	class m4_key
	{
	public:
	int deps;
	int l;
	int m;
	int F;
	int E;
	int prevj;
	int vacancies1,vacancies2;
	m4_key(int _deps,int _l,int _m,int _F,int _E,int _prevj,int _v1,int _v2)
	: deps(_deps),l(_l),m(_m),F(_F),E(_E),prevj(_prevj),vacancies1(_v1),vacancies2(_v2) {}
	friend ostream&print1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
	{
	if(x.deps&DEP_MODEL_l)out << "l: " << x.l<<' ';
	if(x.deps&DEP_MODEL_m)out << "m: " << x.m<<' ';
	if(x.deps&DEP_MODEL_F)out << "F: " << wcf.classString(x.F)<< ' ';
	if(x.deps&DEP_MODEL_E)out << "E: " << wce.classString(x.E)<< ' ';
	// if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
	if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
	if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
	return out << '\n';
	}
	friend ostream&print1_m5(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
	{
	out << ((x.deps&DEP_MODEL_E)?wce.classString(x.E):string("0"))<< ' ';
	out << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
	out << x.vacancies1 << ' ';
	out << x.vacancies2 << ' ';
	return out;
	}
	friend ostream&printb1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
	{
	if(x.deps&DEP_MODELb_l)out << "l: " << x.l<<' ';
	if(x.deps&DEP_MODELb_m)out << "m: " << x.m<<' ';
	if(x.deps&DEP_MODELb_F)out << "F: " << wcf.classString(x.F) << ' ';
	if(x.deps&DEP_MODELb_E)out << "E: " << wce.classString(x.E) << ' ';
	if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
	if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
	return out << '\n';
	}
	friend ostream&printb1_m5(ostream&out,const m4_key&x,const WordClasses&wcf)
	{
	out << "-1 " << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
	out << x.vacancies1 << ' ';
	out << x.vacancies2 << ' ';
	return out;
	}
	};

	class compare1
	{
	private:
	int deps;
	public:
	compare1(int _deps) : deps(_deps) {}
	bool operator()(const m4_key&a,const m4_key&b)const
	{
	if(deps&DEP_MODEL_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
	if(deps&DEP_MODEL_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
	if(deps&DEP_MODEL_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
	if(deps&DEP_MODEL_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
	//if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
	if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
	if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
	return 0;
	}
	};

	class compareb1
	{
	private:
	int deps;
	public:
	compareb1(int _deps) : deps(_deps) {}
	bool operator()(const m4_key&a,const m4_key&b)const
	{
	if(deps&DEP_MODELb_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
	if(deps&DEP_MODELb_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
	if(deps&DEP_MODELb_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
	if(deps&DEP_MODELb_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
	//if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
	if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
	if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
	return 0;
	}
	};

	inline void tokenize(const string&in,Vector<string>&out)
	{
	string s;
	istringstream l(in);
	while(l>>s)
	out.push_back(s);
	}

	class d4model
	{
	public:
	typedef Vector<pair<COUNT,PROB> > Vpff;
	map<m4_key,Vpff,compare1 > D1;
	map<m4_key,Vpff,compareb1> Db1;
	PositionIndex msl;
	WordClasses ewordclasses;
	WordClasses fwordclasses;
	template<class MAPPER>
	void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile)
	{
	ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
	if( !estrm )
	{
	cerr << "ERROR: can not read " << efile << endl;
	}
	else
	ewordclasses.read(estrm,m1);
	if( !fstrm )
	cerr << "ERROR: can not read " << ffile << endl;
	else
	fwordclasses.read(fstrm,m2);
	}
	d4model(PositionIndex _msl)
	: D1(compare1(M4_Dependencies)),Db1(compareb1(M4_Dependencies)),msl(_msl)
	{}
	COUNT&getCountRef_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)
	{
	assert(j>=1);
	m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
	map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
	if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
	assert(p!=D1.end());
	return (p->second)[j-j_cp+msl].first;
	}
	COUNT&getCountRef_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)
	{
	assert(j>=1);
	assert(j_prev>=1);
	m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
	map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
	if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
	assert(p!=Db1.end());
	return (p->second)[j-j_prev+msl].first;
	}
	map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,int F,int l,int m)const
	{return D1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));}
	PROB getProb_first_withiterator(WordIndex j,WordIndex j_cp,int m,const map<m4_key,Vpff,compare1 >::const_iterator& p)const
	{
	assert(j>=1);//assert(j_cp>=0);
	assert(j<=msl);assert(j_cp<=msl);
	if(p==D1.end())
	{
	return PROB_SMOOTH;
	}
	else
	{
	massert((p->second)[j-j_cp+msl].second<=1.0);
	return max(PROB_SMOOTH,d4modelsmooth_factor/(2m-1)+(1-d4modelsmooth_factor)(p->second)[j-j_cp+msl].second);
	}
	}
	PROB getProb_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)const
	{
	assert(j>=1);//assert(j_cp>=0);
	assert(j<=msl);assert(j_cp<=msl);
	m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
	map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
	if(p==D1.end())
	{
	return PROB_SMOOTH;
	}
	else
	{
	massert((p->second)[j-j_cp+msl].second<=1.0);
	return max(PROB_SMOOTH,d4modelsmooth_factor/(2m-1)+(1-d4modelsmooth_factor)(p->second)[j-j_cp+msl].second);
	}
	}
	map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,int F,int l,int m)const
	{
	return Db1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));
	}
	PROB getProb_bigger_withiterator(WordIndex j,WordIndex j_prev,int m,const map<m4_key,Vpff,compareb1 >::const_iterator&p)const
	{
	massert(j>=1);massert(j_prev>=1);
	massert(j>j_prev);
	massert(j<=msl);massert(j_prev<=msl);
	if(p==Db1.end())
	{
	return PROB_SMOOTH;
	}
	else
	{
	massert((p->second)[j-j_prev+msl].second<=1.0 );
	return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
	}
	}

	PROB getProb_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)const
	{
	massert(j>=1);massert(j_prev>=1);
	massert(j>j_prev);
	massert(j<=msl);massert(j_prev<=msl);
	m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
	map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
	if(p==Db1.end())
	{
	return PROB_SMOOTH;
	}
	else
	{
	massert((p->second)[j-j_prev+msl].second<=1.0 );
	return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
	}
	}
	void normalizeTable()
	{
	int nParams=0;
	for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
	{
	Vpff&d1=i->second;
	double sum=0.0;
	for(PositionIndex i=0;i<d1.size();i++)
	sum+=d1[i].first;
	for(PositionIndex i=0;i<d1.size();i++)
	{
	d1[i].second=sum?(d1[i].first/sum):(1.0/d1.size());
	nParams++;
	}
	}
	for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
	{
	Vpff&db1=i->second;
	double sum=0.0;
	for(PositionIndex i=0;i<db1.size();i++)
	sum+=db1[i].first;
	for(PositionIndex i=0;i<db1.size();i++)
	{
	db1[i].second=sum?(db1[i].first/sum):(1.0/db1.size());
	nParams++;
	}
	}
	cout << "D4 table contains " << nParams << " parameters.\n";
	}
	void clear()
	{
	for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
	{
	Vpff&d1=i->second;
	for(PositionIndex i=0;i<d1.size();i++)
	d1[i].first=0.0;
	}
	for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
	{
	Vpff&db1=i->second;
	for(PositionIndex i=0;i<db1.size();i++)
	db1[i].first=0.0;
	}
	}

	void printProbTable(const charfname1,const charfname2)
	{
	ofstream out(fname1);
	double ssum=0.0;
	out << "# Translation tables for Model 4 .\n";
	out << "# Table for head of cept.\n";
	for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
	{
	const Vpff&d1=i->second;
	double sum=0.0;
	for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
	if ( sum )
	{
	print1(out,i->first,ewordclasses,fwordclasses);
	out << "SUM: " << sum << ' '<< '\n';
	for(unsigned ii=0;ii<d1.size();ii++)
	if( d1[ii].first )
	out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
	out << endl;
	}
	ssum+=sum;
	}
	out << "# Table for non-head of cept.\n";
	for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
	{
	const Vpff&db1=i->second;
	double sum=0.0;
	for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
	if( sum )
	{
	printb1(out,i->first,ewordclasses,fwordclasses);
	out << "SUM: " << sum << ' '<<'\n';
	for(unsigned ii=0;ii<db1.size();ii++)
	if( db1[ii].first )
	{
	out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
	}
	out << endl;
	}
	ssum+=sum;
	}
	out << endl << "FULL-SUM: " << ssum << endl;
	if( M4_Dependencies==76 )
	{
	ofstream out2(fname2);
	for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
	{
	const Vpff&d1=i->second;
	for(unsigned ii=0;ii<d1.size();ii++)
	if( d1[ii].first )
	out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
	}
	for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
	{
	const Vpff&db1=i->second;
	for(unsigned ii=0;ii<db1.size();ii++)
	if( db1[ii].first )
	out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
	}
	}
	}
	bool readProbTable(const char *fname)
	{
	cerr << "Reading D4Tables from " << fname << endl;
	ifstream file(fname);
	string line;
	do
	{
	getline(file,line);
	} while(line.length()&&line[0]=='#');

	do
	{
	while(line.length()==0)
	getline(file,line);
	if( line[0]=='#')
	break;
	Vector<string> linestr;
	tokenize(line,linestr);
	m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
	for(unsigned int i=0;i<linestr.size();i+=2)
	{
	if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_l);}
	if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_m);}
	if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_F);}
	if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_E);}
	//if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
	}
	string str;
	double sum;
	file >> str >> sum;
	iassert(str=="SUM:");
	if( str!="SUM:")
	cerr << "ERROR: string is " << str << " and not sum " << endl;

	do
	{
	int value;
	double count;
	getline(file,line);
	istringstream twonumbers(line);
	if(twonumbers >> value >> count)
	{
	if( D1.count(k)==0 )
	D1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
	D1[k][value+msl]=make_pair(count,count/sum);
	}
	}while(line.length());
	}while(file);
	do
	{
	getline(file,line);
	} while(line.length()&&line[0]=='#');
	do
	{
	while(line.length()==0)
	getline(file,line);
	if( line[0]=='#')
	break;
	Vector<string> linestr;
	tokenize(line,linestr);
	m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
	bool sumRead=0;
	for(unsigned int i=0;i<linestr.size();i+=2)
	{
	if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_l);}
	else if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_m);}
	else if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_F);}
	else if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_E);}
	else if( linestr[i]=="SUM:" )
	{
	cerr << "Warning: obviously no dependency.\n";
	sumRead=1;
	}
	else if( linestr[i]=="FULL-SUM:" )
	{
	break;
	}
	else
	{
	cerr << "ERROR: error in reading d4 tables: " << linestr[i] << ' ' << linestr[i+1] << endl;
	}
	}
	string str;
	double sum;
	if( sumRead==0 )
	file >> str >> sum;
	else
	{
	str=linestr[0];
	sum=atof(linestr[1].c_str());
	}
	if( str!="SUM:" )
	cerr << "ERROR: should read SUM but read " << str << endl;
	do
	{
	int value;
	double count;
	getline(file,line);
	istringstream twonumbers(line);
	if(twonumbers >> value >> count)
	{
	if( Db1.count(k)==0 )
	Db1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
	Db1[k][value+msl]=make_pair(count,count/sum);
	}
	}while(file&&line.length());
	}while(file);
	return 1;
	}
	};

	#endif