scripts/training/giza-pp/GIZA++-v2/TTables.h - joshua - Git at Google

 /*

 EGYPT Toolkit for Statistical Machine Translation
 Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 USA.

 */
 /* --------------------------------------------------------------------------*
  *                                                                           *
  * Module : TTables                                                          *
  *                                                                           *
  * Prototypes File: TTables.h                                               *
  *                                                                           *
  * Objective: Defines clases and methods for handling I/O for Probability &  *
  *            Count tables and also alignment tables                         *
  *****************************************************************************/

 #ifndef _ttables_h
 #define _ttables_h 1


 #include "defs.h"
 #include "vocab.h"

 #include <cassert>

 #include <iostream>
 #include <algorithm>
 #include <functional>
 #include <map>
 #include <set>
 #include "Vector.h"
 #include <utility>

 #include <fstream>

 #include "Globals.h"


 /* The tables defined in the following classes are defined as hash tables. For
    example. the t-table is a hash function of a word pair; an alignment is
    a hash function of a vector of integer numbers (sentence positions) and so
    on   */


 /*----------- Defnition of Hash Function for class tmodel ------- -----------*/

 typedef pair<WordIndex, WordIndex> wordPairIds;


 class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
 {
 public:
   size_t operator() (const pair<WordIndex, WordIndex>& key) const
     {
       return (size_t) MAX_W*key.first + key.second; /* hash function and it
 						       is guarnteed to have
 						       unique id for each
 						       unique pair */
     }
 };


 /* ------------------ Class Prototype Definitions ---------------------------*
   Class Name: tmodel
   Objective: This defines the underlying data structur for t Tables and t
   Count Tables. They are defined as a hash table. Each entry in the hash table
   is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
   probability and the count are represented as log integer probability as
   defined by the class LogProb .

   This class is used to represents t Tables (probabiliity) and n (fertility
   Tables and also their corresponding count tables .

  *---------------------------------------------------------------------------*/

 //typedef float COUNT ;
 //typedef LogProb PROB ;
 template <class COUNT, class PROB>
 class LpPair {
  public:
   COUNT count ;
   PROB  prob ;
  public: // constructor
   LpPair():count(0), prob(0){} ;
   LpPair(COUNT c, PROB p):count(c), prob(p){};
 } ;

 #ifdef BINARY_SEARCH_FOR_TTABLE


 template<class T>
 T*mbinary_search(T*x,T*y,unsigned int val)
 {
   if( y-x==0 )
     return 0;
   if( x->first==val)
     return x;
   if( y-x<2 )
     return 0;
   T*mid=x+(y-x)/2;
   if( val < mid->first )
     return mbinary_search(x,mid,val);
   else
     return mbinary_search(mid,y,val);

 }

 template<class T>
 const T*mbinary_search(const T*x,const T*y,unsigned int val)
 {
   if( y-x==0 )
     return 0;
   if( x->first==val)
     return x;
   if( y-x<2 )
     return 0;
   const T*mid=x+(y-x)/2;
   if( val < mid->first )
     return mbinary_search(x,mid,val);
   else
     return mbinary_search(mid,y,val);

 }

 template <class COUNT, class PROB>
 class tmodel{
   typedef LpPair<COUNT, PROB> CPPair;
  public:
   int noEnglishWords;  // total number of unique source words
   int noFrenchWords;   // total number of unique target words
   //vector<pair<unsigned int,CPPair> > fs;
   //vector<unsigned int> es;
   vector< vector<pair<unsigned int,CPPair> >* > lexmat;

   void erase(WordIndex e, WordIndex f)
     {
       CPPair *p=find(e,f);
       if(p)
 	*p=CPPair(0,0);
     };
   CPPair*find(int e,int f)
     {
       //pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
       //pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
       pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
       pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
       pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
       if( x==0 )
 	{
 	  //cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
 	  //abort();
 	  return 0;
 	}
       return &(x->second);
     }
   const CPPair*find(int e,int f)const
     {
       const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
       const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
       //const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
       //const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
       const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
       if( x==0 )
 	{
 	  //cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
 	  //abort();
 	  return 0;
 	}

       return &(x->second);
     }
 public:
   void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
     *find(e,f)=CPPair(cval,pval);
   }
   CPPair*getPtr(int e,int f){return find(e,f);}
   tmodel(const string&fn)
     {
       int count=0,count2=0;
       ifstream infile2(fn.c_str());
       int e,f,olde=-1,oldf=-1;
       pair<unsigned int,CPPair> cp;
       vector< pair<unsigned int,CPPair> > cps;
       while(infile2>>e>>f)
 	{
 	  cp.first=f;
 	  assert(e>=olde);
 	  assert(e>olde ||f>oldf);
 	  if( e!=olde&&olde>=0 )
 	    {
 	      int oldsize=lexmat.size();
 	      lexmat.resize(olde+1);
 	      for(unsigned int i=oldsize;i<lexmat.size();++i)
 		lexmat[i]=0;
 	      lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
 	      cps.clear();
 	      if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
 		cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
 	      count2+=lexmat[olde]->capacity();
 	    }
 	  cps.push_back(cp);
 	  olde=e;
 	  oldf=f;
 	  count++;
 	}
       lexmat.resize(olde+1);
       lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
       count2+=lexmat[olde]->capacity();
       cout << "There are " << count << " " << count2 << " entries in table" << '\n';
     }


   /*  tmodel(const string&fn)
     {
       size_t count=0;
       {
 	ifstream infile1(fn.c_str());
 	if( !infile1 )
 	  {
 	    cerr << "ERROR: can't read coocurrence file " << fn << '\n';
 	    abort();
 	  }
 	int e,f;
 	while(infile1>>e>>f)
 	  count++;
       }
       cout << "There are " << count << " entries in table" << '\n';
       ifstream infile2(fn.c_str());
       fs.resize(count);
       int e,f,olde=-1,oldf=-1;
       pair<unsigned int,CPPair> cp;
       count=0;
       while(infile2>>e>>f)
 	{
 	  assert(e>=olde);
 	  assert(e>olde ||f>oldf);
 	  if( e!=olde )
 	    {
 	      es.resize(e+1);
 	      for(unsigned int i=olde+1;int(i)<=e;++i)
 		es[i]=count;
 	    }
 	  cp.first=f;
 	  assert(count<fs.size());
 	  fs[count]=cp;
 	  //fs.push_back(cp);
 	  olde=e;
 	  oldf=f;
 	  count++;
 	}
       assert(count==fs.size());
       es.push_back(fs.size());
       cout << fs.size() << " " << count << " coocurrences read" << '\n';
       }*/
   void incCount(WordIndex e, WordIndex f, COUNT inc)
     {
       if( inc )
 	{
 	  CPPair *p=find(e,f);
 	  if( p )
 	    p->count += inc ;
 	}
     }

   PROB getProb(WordIndex e, WordIndex f) const
     {
       const CPPair *p=find(e,f);
       if( p )
 	return max(p->prob, PROB_SMOOTH);
       else
 	return PROB_SMOOTH;
     }

   COUNT getCount(WordIndex e, WordIndex f) const
     {
       const CPPair *p=find(e,f);
       if( p )
 	return p->count;
       else
 	return 0.0;
     }

   void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
   void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
   void printProbTableInverse(const char *filename,
 			     const Vector<WordEntry>& evlist,
 			     const Vector<WordEntry>& fvlist,
 			     const double eTotal,
 			     const double fTotal,
 			     const bool actual = false ) const;
   void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
   void readProbTable(const char *filename);
 };


 #else


 template <class COUNT, class PROB>
 class tmodel{
   typedef LpPair<COUNT, PROB> CPPair;
  public:
   int noEnglishWords;  // total number of unique source words
   int noFrenchWords;   // total number of unique target words
   hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> > ef;
   void erase(WordIndex e, WordIndex f)
   // In: a source and a target token ids.
   // removes the entry with that pair from table
     {
       ef.erase(wordPairIds(e, f));
     };

 public:
   Vector<PROB> total2;
   Vector<int> nFrench;
   Vector<int> nEng;


   // methods;

   // insert: add entry P(fj/ei) to the hash function, Default value is 0.0
   void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
     ef[wordPairIds(e, f)].count = cval ;
     ef[wordPairIds(e, f)].prob = pval ;
   }

   // returns a reference to the word pair, if does not exists, it creates it.
   CPPair&getRe(WordIndex e, WordIndex f)
     {return ef[wordPairIds(e, f)];}

   // returns a pointer to an existing word pair. if pair does not exists,
   // the method returns the zero pointer (NULL)

   CPPair*getPtr(WordIndex e, WordIndex f)
     {
       // look up this pair and return its position
       typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::iterator i = ef.find(wordPairIds(e, f));
       if(i != ef.end())  // if it exists, return a pointer to it.
 	return(&((*i).second));
       else return(0) ; // else return NULL pointer
     }

   void incCount(WordIndex e, WordIndex f, COUNT inc)
     // increments the count of the given word pair. if the pair does not exist,
     // it creates it with the given value.
     {
       if( inc )
 	ef[wordPairIds(e, f)].count += inc ;
     }

   PROB getProb(WordIndex e, WordIndex f) const
     // read probability value for P(fj/ei) from the hash table
     // if pair does not exist, return floor value PROB_SMOOTH
     {
       typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
       if(i == ef.end())
 	return PROB_SMOOTH;
       else
 	return max(((*i).second).prob, PROB_SMOOTH);
     }

   COUNT getCount(WordIndex e, WordIndex f) const
     /* read count value for entry pair (fj/ei) from the hash table */
     {
       typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
       if(i == ef.end())
 	return 0;
       else
 	return ((*i).second).count;
     }

   inline const hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >& getHash(void) const {return ef;};
   /* get a refernece to the hash table */
   //inline void resize(WordIndex n) {ef.resize(n);};
   // to resize he hash table

   void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
   void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
   // print the t table to the given file but this time print actual source and
   //     target words instead of thier token ids

   void printProbTableInverse(const char *filename,
 			     const Vector<WordEntry>& evlist,
 			     const Vector<WordEntry>& fvlist,
 			     const double eTotal,
 			     const double fTotal,
 			     const bool actual = false ) const;
   // dump  inverse of t table (i.e P(ei/fj)) to the given file name,
   //  if the given flag is true then actual words are printed not token ids

   void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
   // to norlmalize the table i.e. make sure P(fj/ei) for all j is equal to 1

   void readProbTable(const char *filename);
   //  void readAsFertilityTable(const char *filename);
 };
 /*--------------- End of Class Definition for tmodel -----------------------*/

 #endif

 #endif
	/*

	EGYPT Toolkit for Statistical Machine Translation
	Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
	USA.

	*/
	/* --------------------------------------------------------------------------*
	* *
	* Module : TTables *
	* *
	* Prototypes File: TTables.h *
	* *
	* Objective: Defines clases and methods for handling I/O for Probability & *
	* Count tables and also alignment tables *
	*****************************************************************************/

	#ifndef _ttables_h
	#define _ttables_h 1


	#include "defs.h"
	#include "vocab.h"

	#include <cassert>

	#include <iostream>
	#include <algorithm>
	#include <functional>
	#include <map>
	#include <set>
	#include "Vector.h"
	#include <utility>

	#include <fstream>

	#include "Globals.h"


	/* The tables defined in the following classes are defined as hash tables. For
	example. the t-table is a hash function of a word pair; an alignment is
	a hash function of a vector of integer numbers (sentence positions) and so
	on */


	/----------- Defnition of Hash Function for class tmodel ------- -----------/

	typedef pair<WordIndex, WordIndex> wordPairIds;


	class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
	{
	public:
	size_t operator() (const pair<WordIndex, WordIndex>& key) const
	{
	return (size_t) MAX_Wkey.first + key.second; / hash function and it
	is guarnteed to have
	unique id for each
	unique pair */
	}
	};



	/* ------------------ Class Prototype Definitions ---------------------------*
	Class Name: tmodel
	Objective: This defines the underlying data structur for t Tables and t
	Count Tables. They are defined as a hash table. Each entry in the hash table
	is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
	probability and the count are represented as log integer probability as
	defined by the class LogProb .

	This class is used to represents t Tables (probabiliity) and n (fertility
	Tables and also their corresponding count tables .

	---------------------------------------------------------------------------/

	//typedef float COUNT ;
	//typedef LogProb PROB ;
	template <class COUNT, class PROB>
	class LpPair {
	public:
	COUNT count ;
	PROB prob ;
	public: // constructor
	LpPair():count(0), prob(0){} ;
	LpPair(COUNT c, PROB p):count(c), prob(p){};
	} ;

	#ifdef BINARY_SEARCH_FOR_TTABLE


	template<class T>
	Tmbinary_search(Tx,T*y,unsigned int val)
	{
	if( y-x==0 )
	return 0;
	if( x->first==val)
	return x;
	if( y-x<2 )
	return 0;
	T*mid=x+(y-x)/2;
	if( val < mid->first )
	return mbinary_search(x,mid,val);
	else
	return mbinary_search(mid,y,val);

	}

	template<class T>
	const Tmbinary_search(const Tx,const T*y,unsigned int val)
	{
	if( y-x==0 )
	return 0;
	if( x->first==val)
	return x;
	if( y-x<2 )
	return 0;
	const T*mid=x+(y-x)/2;
	if( val < mid->first )
	return mbinary_search(x,mid,val);
	else
	return mbinary_search(mid,y,val);

	}

	template <class COUNT, class PROB>
	class tmodel{
	typedef LpPair<COUNT, PROB> CPPair;
	public:
	int noEnglishWords; // total number of unique source words
	int noFrenchWords; // total number of unique target words
	//vector<pair<unsigned int,CPPair> > fs;
	//vector<unsigned int> es;
	vector< vector<pair<unsigned int,CPPair> >* > lexmat;

	void erase(WordIndex e, WordIndex f)
	{
	CPPair *p=find(e,f);
	if(p)
	*p=CPPair(0,0);
	};
	CPPair*find(int e,int f)
	{
	//pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
	//pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
	pair<unsigned int,CPPair> be=&(lexmat[e])[0];
	pair<unsigned int,CPPair> en=&(lexmat[e])[0]+(*lexmat[e]).size();
	pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
	if( x==0 )
	{
	//cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
	//abort();
	return 0;
	}
	return &(x->second);
	}
	const CPPair*find(int e,int f)const
	{
	const pair<unsigned int,CPPair> be=&(lexmat[e])[0];
	const pair<unsigned int,CPPair> en=&(lexmat[e])[0]+(*lexmat[e]).size();
	//const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
	//const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
	const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
	if( x==0 )
	{
	//cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
	//abort();
	return 0;
	}

	return &(x->second);
	}
	public:
	void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
	*find(e,f)=CPPair(cval,pval);
	}
	CPPair*getPtr(int e,int f){return find(e,f);}
	tmodel(const string&fn)
	{
	int count=0,count2=0;
	ifstream infile2(fn.c_str());
	int e,f,olde=-1,oldf=-1;
	pair<unsigned int,CPPair> cp;
	vector< pair<unsigned int,CPPair> > cps;
	while(infile2>>e>>f)
	{
	cp.first=f;
	assert(e>=olde);
	assert(e>olde \|\|f>oldf);
	if( e!=olde&&olde>=0 )
	{
	int oldsize=lexmat.size();
	lexmat.resize(olde+1);
	for(unsigned int i=oldsize;i<lexmat.size();++i)
	lexmat[i]=0;
	lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
	cps.clear();
	if( !((lexmat[olde]).size()==(lexmat[olde]).capacity()) )
	cerr << "eRROR: waste of memory: " << (lexmat[olde]).size() << " " << (lexmat[olde]).capacity() << endl;
	count2+=lexmat[olde]->capacity();
	}
	cps.push_back(cp);
	olde=e;
	oldf=f;
	count++;
	}
	lexmat.resize(olde+1);
	lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
	count2+=lexmat[olde]->capacity();
	cout << "There are " << count << " " << count2 << " entries in table" << '\n';
	}


	/* tmodel(const string&fn)
	{
	size_t count=0;
	{
	ifstream infile1(fn.c_str());
	if( !infile1 )
	{
	cerr << "ERROR: can't read coocurrence file " << fn << '\n';
	abort();
	}
	int e,f;
	while(infile1>>e>>f)
	count++;
	}
	cout << "There are " << count << " entries in table" << '\n';
	ifstream infile2(fn.c_str());
	fs.resize(count);
	int e,f,olde=-1,oldf=-1;
	pair<unsigned int,CPPair> cp;
	count=0;
	while(infile2>>e>>f)
	{
	assert(e>=olde);
	assert(e>olde \|\|f>oldf);
	if( e!=olde )
	{
	es.resize(e+1);
	for(unsigned int i=olde+1;int(i)<=e;++i)
	es[i]=count;
	}
	cp.first=f;
	assert(count<fs.size());
	fs[count]=cp;
	//fs.push_back(cp);
	olde=e;
	oldf=f;
	count++;
	}
	assert(count==fs.size());
	es.push_back(fs.size());
	cout << fs.size() << " " << count << " coocurrences read" << '\n';
	}*/
	void incCount(WordIndex e, WordIndex f, COUNT inc)
	{
	if( inc )
	{
	CPPair *p=find(e,f);
	if( p )
	p->count += inc ;
	}
	}

	PROB getProb(WordIndex e, WordIndex f) const
	{
	const CPPair *p=find(e,f);
	if( p )
	return max(p->prob, PROB_SMOOTH);
	else
	return PROB_SMOOTH;
	}

	COUNT getCount(WordIndex e, WordIndex f) const
	{
	const CPPair *p=find(e,f);
	if( p )
	return p->count;
	else
	return 0.0;
	}

	void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
	void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
	void printProbTableInverse(const char *filename,
	const Vector<WordEntry>& evlist,
	const Vector<WordEntry>& fvlist,
	const double eTotal,
	const double fTotal,
	const bool actual = false ) const;
	void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
	void readProbTable(const char *filename);
	};


	#else


	template <class COUNT, class PROB>
	class tmodel{
	typedef LpPair<COUNT, PROB> CPPair;
	public:
	int noEnglishWords; // total number of unique source words
	int noFrenchWords; // total number of unique target words
	hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> > ef;
	void erase(WordIndex e, WordIndex f)
	// In: a source and a target token ids.
	// removes the entry with that pair from table
	{
	ef.erase(wordPairIds(e, f));
	};

	public:
	Vector<PROB> total2;
	Vector<int> nFrench;
	Vector<int> nEng;


	// methods;

	// insert: add entry P(fj/ei) to the hash function, Default value is 0.0
	void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
	ef[wordPairIds(e, f)].count = cval ;
	ef[wordPairIds(e, f)].prob = pval ;
	}

	// returns a reference to the word pair, if does not exists, it creates it.
	CPPair&getRe(WordIndex e, WordIndex f)
	{return ef[wordPairIds(e, f)];}

	// returns a pointer to an existing word pair. if pair does not exists,
	// the method returns the zero pointer (NULL)

	CPPair*getPtr(WordIndex e, WordIndex f)
	{
	// look up this pair and return its position
	typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::iterator i = ef.find(wordPairIds(e, f));
	if(i != ef.end()) // if it exists, return a pointer to it.
	return(&((*i).second));
	else return(0) ; // else return NULL pointer
	}

	void incCount(WordIndex e, WordIndex f, COUNT inc)
	// increments the count of the given word pair. if the pair does not exist,
	// it creates it with the given value.
	{
	if( inc )
	ef[wordPairIds(e, f)].count += inc ;
	}

	PROB getProb(WordIndex e, WordIndex f) const
	// read probability value for P(fj/ei) from the hash table
	// if pair does not exist, return floor value PROB_SMOOTH
	{
	typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
	if(i == ef.end())
	return PROB_SMOOTH;
	else
	return max(((*i).second).prob, PROB_SMOOTH);
	}

	COUNT getCount(WordIndex e, WordIndex f) const
	/* read count value for entry pair (fj/ei) from the hash table */
	{
	typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
	if(i == ef.end())
	return 0;
	else
	return ((*i).second).count;
	}

	inline const hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >& getHash(void) const {return ef;};
	/* get a refernece to the hash table */
	//inline void resize(WordIndex n) {ef.resize(n);};
	// to resize he hash table

	void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
	void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
	// print the t table to the given file but this time print actual source and
	// target words instead of thier token ids

	void printProbTableInverse(const char *filename,
	const Vector<WordEntry>& evlist,
	const Vector<WordEntry>& fvlist,
	const double eTotal,
	const double fTotal,
	const bool actual = false ) const;
	// dump inverse of t table (i.e P(ei/fj)) to the given file name,
	// if the given flag is true then actual words are printed not token ids

	void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
	// to norlmalize the table i.e. make sure P(fj/ei) for all j is equal to 1

	void readProbTable(const char *filename);
	// void readAsFertilityTable(const char *filename);
	};
	/--------------- End of Class Definition for tmodel -----------------------/

	#endif

	#endif