src/joshua/decoder/ff/FeatureFunction.java - joshua - Git at Google

 package joshua.decoder.ff;

 import java.util.HashMap;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.chart_parser.SourcePath;
 import joshua.decoder.ff.state_maintenance.DPState;
 import joshua.decoder.ff.tm.Rule;
 import joshua.decoder.hypergraph.HGNode;
 import joshua.decoder.segment_file.Sentence;

 /**
  * This class defines Joshua's feature function interface, for both sparse and
  * dense features. It is immediately inherited by StatelessFF and StatefulFF,
  * which provide functionality common to stateless and stateful features,
  * respectively. Any feature implementation should extend those classes, and not
  * this one. The distinction between stateless and stateful features is somewhat
  * narrow: all features have the opportunity to return an instance of a
  * {@link DPState} object, and stateless ones just return null.
  *
  * Features in Joshua work like templates. Each feature function defines any
  * number of actual features, which are associated with weights. The task of the
  * feature function is to compute the features that are fired in different
  * circumstances and then return the inner product of those features with the
  * weight vector. Feature functions can also produce estimates of their future
  * cost (via {@link estimateCost()}); these values are not used in computing the
  * score, but are only used for sorting rules during cube pruning. The
  * individual features produced by each template should have globally unique
  * names; a good convention is to prefix each feature with the name of the
  * template that produced it.
  *
  * Joshua does not retain individual feature values while decoding, since this
  * requires keeping a sparse feature vector along every hyperedge, which can be
  * expensive. Instead, it computes only the weighted cost of each edge. If the
  * individual feature values are requested, the feature functions are replayed
  * in post-processing, say during k-best list extraction. This is implemented in
  * a generic way by passing an {@link Accumulator} object to the compute()
  * function. During decoding, the accumulator simply sums weighted features in a
  * scalar. During k-best extraction, when individual feature values are needed,
  * a {@link FeatureAccumulator} is used to retain the individual values.
  *
  * @author Matt Post <post@cs.jhu.edu>
  * @author Juri Ganitkevich <juri@cs.jhu.edu>
  */
 public abstract class FeatureFunction {

   /*
    * The name of the feature function; this generally matches the weight name on
    * the config file. This can also be used as a prefix for feature / weight
    * names, for templates that define multiple features.
    */
   protected String name = null;

   // The list of arguments passed to the feature, and the hash for the parsed args
   protected String[] args;
   protected HashMap<String, String> parsedArgs = null;

   /*
    * The global weight vector used by the decoder, passed it when the feature is
    * instantiated
    */
   protected FeatureVector weights;

   /* The config */
   protected JoshuaConfiguration config;

   public String getName() {
     return name;
   }

   // Whether the feature has state.
   public abstract boolean isStateful();

   public FeatureFunction(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
     this.weights = weights;
     this.name = name;
     this.args = args;
     this.config = config;

     this.parsedArgs = FeatureFunction.parseArgs(args);
   }

   public String logString() {
     try {
       return String.format("%s (weight %.3f)", name, weights.get(name));
     } catch (RuntimeException e) {
       return name;
     }
   }

   /**
    * This is the main function for defining feature values. The implementor
    * should compute all the features along the hyperedge, calling acc.put(name,
    * value) for each feature. It then returns the newly-computed dynamic
    * programming state for this feature (for example, for the
    * {@link LanguageModelFF} feature, this returns the new language model
    * context). For stateless features, this value is null.
    *
    * Note that the accumulator accumulates *unweighted* feature values. The
    * feature vector is multiplied times the weight vector later on.
    *
    * @param rule
    * @param tailNodes
    * @param i
    * @param j
    * @param sourcePath
    * @param sentID
    * @param acc
    * @return the new dynamic programming state (null for stateless features)
    */
   public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
       SourcePath sourcePath, Sentence sentence, Accumulator acc);

   /**
    * Feature functions must overrided this. StatefulFF and StatelessFF provide
    * reasonable defaults since most features do not fire on the goal node.
    *
    * @param tailNode
    * @param i
    * @param j
    * @param sourcePath
    * @param sentID
    * @param acc
    * @return the DPState (null if none)
    */
   public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
       Sentence sentence, Accumulator acc);

   /**
    * This is a convenience function for retrieving the features fired when
    * applying a rule, provided for backward compatibility.
    *
    * Returns the *unweighted* cost of the features delta computed at this
    * position. Note that this is a feature delta, so existing feature costs of
    * the tail nodes should not be incorporated, and it is very important not to
    * incorporate the feature weights. This function is used in the kbest
    * extraction code but could also be used in computing the cost.
    *
    * @param rule
    * @param tailNodes
    * @param i
    * @param j
    * @param sourcePath
    * @param sentID
    * @return an *unweighted* feature delta
    */
   public final FeatureVector computeFeatures(Rule rule, List<HGNode> tailNodes, int i, int j,
       SourcePath sourcePath, Sentence sentence) {

     FeatureAccumulator features = new FeatureAccumulator();
     compute(rule, tailNodes, i, j, sourcePath, sentence, features);
     return features.getFeatures();
   }

   /**
    * This function is called for the final transition. For example, the
    * LanguageModel feature function treats the last rule specially. It needs to
    * return the *weighted* cost of applying the feature. Provided for backward
    * compatibility.
    *
    * @param tailNode
    * @param i
    * @param j
    * @param sourcePath
    * @param sentID
    * @return a *weighted* feature cost
    */
   public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
       Sentence sentence) {

     ScoreAccumulator score = new ScoreAccumulator();
     computeFinal(tailNode, i, j, sourcePath, sentence, score);
     return score.getScore();
   }

   /**
    * Returns the *unweighted* feature delta for the final transition (e.g., for
    * the language model feature function). Provided for backward compatibility.
    *
    * @param tailNode
    * @param i
    * @param j
    * @param sourcePath
    * @param sentID
    * @return
    */
   public final FeatureVector computeFinalFeatures(HGNode tailNode, int i, int j,
       SourcePath sourcePath, Sentence sentence) {

     FeatureAccumulator features = new FeatureAccumulator();
     computeFinal(tailNode, i, j, sourcePath, sentence, features);
     return features.getFeatures();
   }

   /**
    * This function is called when sorting rules for cube pruning. It must return
    * the *weighted* estimated cost of applying a feature. This need not be the
    * actual cost of applying the rule in context. Basically, it's the inner
    * product of the weight vector and all features found in the grammar rule,
    * though some features (like LanguageModelFF) can also compute some of their
    * values. This is just an estimate of the cost, which helps do better
    * sorting. Later, the real cost of this feature function is called via
    * compute();
    *
    * @return the *weighted* cost of applying the feature.
    */
   public abstract float estimateCost(Rule rule, Sentence sentence);

   /**
    * This feature is called to produce a *weighted estimate* of the future cost
    * of applying this feature. This value is not incorporated into the model
    * score but is used in pruning decisions. Stateless features return 0.0f by
    * default, but Stateful features might want to override this.
    *
    * @param rule
    * @param state
    * @param sentence
    * @return the *weighted* future cost estimate of applying this rule in
    *         context.
    */
   public abstract float estimateFutureCost(Rule rule, DPState state, Sentence sentence);

   /**
    * Parses the arguments passed to a feature function in the Joshua config file TODO: Replace this
    * with a proper CLI library at some point Expects key value pairs in the form : -argname value
    * Any key without a value is added with an empty string as value Multiple values for the same key
    * are not parsed. The first one is used.
    *
    * @param rawArgs A string with the raw arguments and their names
    * @return A hash with the keys and the values of the string
    */
   public static HashMap<String, String> parseArgs(String[] args) {
     HashMap<String, String> parsedArgs = new HashMap<String, String>();
     boolean lookingForValue = false;
     String currentKey = "";
     for (int i = 0; i < args.length; i++) {

       Pattern argKeyPattern = Pattern.compile("^-[a-zA-Z]\\S+");
       Matcher argKey = argKeyPattern.matcher(args[i]);
       if (argKey.find()) {
         // This is a key
         // First check to see if there is a key that is waiting to be written
         if (lookingForValue) {
           // This is a key with no specified value
           parsedArgs.put(currentKey, "");
         }
         // Now store the new key and look for its value
         currentKey = args[i].substring(1);
         lookingForValue = true;
       } else {
         // This is a value
         if (lookingForValue) {
           parsedArgs.put(currentKey, args[i]);
           lookingForValue = false;
         }
       }
     }
     return parsedArgs;
   }

   /**
    * Accumulator objects allow us to generalize feature computation.
    * ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
    * sum (for decoding). FeatureAccumulator records the named feature values
    * (for k-best extraction).
    */

   public interface Accumulator {
     public void add(String name, float value);
   }

   public class ScoreAccumulator implements Accumulator {
     private float score;

     public ScoreAccumulator() {
       this.score = 0.0f;
     }

     public void add(String name, float value) {
       if (weights.containsKey(name)) {
         score += value * weights.get(name);
       }
     }

     public float getScore() {
       return score;
     }
   }

   public class FeatureAccumulator implements Accumulator {
     private FeatureVector features;

     public FeatureAccumulator() {
       this.features = new FeatureVector();
     }

     public void add(String name, float value) {
       if (features.containsKey(name)) {
         features.put(name, features.get(name) + value);
       } else {
         features.put(name, value);
       }
     }

     public FeatureVector getFeatures() {
       return features;
     }
   }
 }
	package joshua.decoder.ff;

	import java.util.HashMap;
	import java.util.List;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	import joshua.decoder.JoshuaConfiguration;
	import joshua.decoder.chart_parser.SourcePath;
	import joshua.decoder.ff.state_maintenance.DPState;
	import joshua.decoder.ff.tm.Rule;
	import joshua.decoder.hypergraph.HGNode;
	import joshua.decoder.segment_file.Sentence;

	/**
	* This class defines Joshua's feature function interface, for both sparse and
	* dense features. It is immediately inherited by StatelessFF and StatefulFF,
	* which provide functionality common to stateless and stateful features,
	* respectively. Any feature implementation should extend those classes, and not
	* this one. The distinction between stateless and stateful features is somewhat
	* narrow: all features have the opportunity to return an instance of a
	* {@link DPState} object, and stateless ones just return null.
	*
	* Features in Joshua work like templates. Each feature function defines any
	* number of actual features, which are associated with weights. The task of the
	* feature function is to compute the features that are fired in different
	* circumstances and then return the inner product of those features with the
	* weight vector. Feature functions can also produce estimates of their future
	* cost (via {@link estimateCost()}); these values are not used in computing the
	* score, but are only used for sorting rules during cube pruning. The
	* individual features produced by each template should have globally unique
	* names; a good convention is to prefix each feature with the name of the
	* template that produced it.
	*
	* Joshua does not retain individual feature values while decoding, since this
	* requires keeping a sparse feature vector along every hyperedge, which can be
	* expensive. Instead, it computes only the weighted cost of each edge. If the
	* individual feature values are requested, the feature functions are replayed
	* in post-processing, say during k-best list extraction. This is implemented in
	* a generic way by passing an {@link Accumulator} object to the compute()
	* function. During decoding, the accumulator simply sums weighted features in a
	* scalar. During k-best extraction, when individual feature values are needed,
	* a {@link FeatureAccumulator} is used to retain the individual values.
	*
	* @author Matt Post <post@cs.jhu.edu>
	* @author Juri Ganitkevich <juri@cs.jhu.edu>
	*/
	public abstract class FeatureFunction {

	/*
	* The name of the feature function; this generally matches the weight name on
	* the config file. This can also be used as a prefix for feature / weight
	* names, for templates that define multiple features.
	*/
	protected String name = null;

	// The list of arguments passed to the feature, and the hash for the parsed args
	protected String[] args;
	protected HashMap<String, String> parsedArgs = null;

	/*
	* The global weight vector used by the decoder, passed it when the feature is
	* instantiated
	*/
	protected FeatureVector weights;

	/* The config */
	protected JoshuaConfiguration config;

	public String getName() {
	return name;
	}

	// Whether the feature has state.
	public abstract boolean isStateful();

	public FeatureFunction(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
	this.weights = weights;
	this.name = name;
	this.args = args;
	this.config = config;

	this.parsedArgs = FeatureFunction.parseArgs(args);
	}

	public String logString() {
	try {
	return String.format("%s (weight %.3f)", name, weights.get(name));
	} catch (RuntimeException e) {
	return name;
	}
	}

	/**
	* This is the main function for defining feature values. The implementor
	* should compute all the features along the hyperedge, calling acc.put(name,
	* value) for each feature. It then returns the newly-computed dynamic
	* programming state for this feature (for example, for the
	* {@link LanguageModelFF} feature, this returns the new language model
	* context). For stateless features, this value is null.
	*
	* Note that the accumulator accumulates unweighted feature values. The
	* feature vector is multiplied times the weight vector later on.
	*
	* @param rule
	* @param tailNodes
	* @param i
	* @param j
	* @param sourcePath
	* @param sentID
	* @param acc
	* @return the new dynamic programming state (null for stateless features)
	*/
	public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
	SourcePath sourcePath, Sentence sentence, Accumulator acc);

	/**
	* Feature functions must overrided this. StatefulFF and StatelessFF provide
	* reasonable defaults since most features do not fire on the goal node.
	*
	* @param tailNode
	* @param i
	* @param j
	* @param sourcePath
	* @param sentID
	* @param acc
	* @return the DPState (null if none)
	*/
	public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
	Sentence sentence, Accumulator acc);

	/**
	* This is a convenience function for retrieving the features fired when
	* applying a rule, provided for backward compatibility.
	*
	* Returns the unweighted cost of the features delta computed at this
	* position. Note that this is a feature delta, so existing feature costs of
	* the tail nodes should not be incorporated, and it is very important not to
	* incorporate the feature weights. This function is used in the kbest
	* extraction code but could also be used in computing the cost.
	*
	* @param rule
	* @param tailNodes
	* @param i
	* @param j
	* @param sourcePath
	* @param sentID
	* @return an unweighted feature delta
	*/
	public final FeatureVector computeFeatures(Rule rule, List<HGNode> tailNodes, int i, int j,
	SourcePath sourcePath, Sentence sentence) {

	FeatureAccumulator features = new FeatureAccumulator();
	compute(rule, tailNodes, i, j, sourcePath, sentence, features);
	return features.getFeatures();
	}

	/**
	* This function is called for the final transition. For example, the
	* LanguageModel feature function treats the last rule specially. It needs to
	* return the weighted cost of applying the feature. Provided for backward
	* compatibility.
	*
	* @param tailNode
	* @param i
	* @param j
	* @param sourcePath
	* @param sentID
	* @return a weighted feature cost
	*/
	public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
	Sentence sentence) {

	ScoreAccumulator score = new ScoreAccumulator();
	computeFinal(tailNode, i, j, sourcePath, sentence, score);
	return score.getScore();
	}

	/**
	* Returns the unweighted feature delta for the final transition (e.g., for
	* the language model feature function). Provided for backward compatibility.
	*
	* @param tailNode
	* @param i
	* @param j
	* @param sourcePath
	* @param sentID
	* @return
	*/
	public final FeatureVector computeFinalFeatures(HGNode tailNode, int i, int j,
	SourcePath sourcePath, Sentence sentence) {

	FeatureAccumulator features = new FeatureAccumulator();
	computeFinal(tailNode, i, j, sourcePath, sentence, features);
	return features.getFeatures();
	}

	/**
	* This function is called when sorting rules for cube pruning. It must return
	* the weighted estimated cost of applying a feature. This need not be the
	* actual cost of applying the rule in context. Basically, it's the inner
	* product of the weight vector and all features found in the grammar rule,
	* though some features (like LanguageModelFF) can also compute some of their
	* values. This is just an estimate of the cost, which helps do better
	* sorting. Later, the real cost of this feature function is called via
	* compute();
	*
	* @return the weighted cost of applying the feature.
	*/
	public abstract float estimateCost(Rule rule, Sentence sentence);

	/**
	* This feature is called to produce a weighted estimate of the future cost
	* of applying this feature. This value is not incorporated into the model
	* score but is used in pruning decisions. Stateless features return 0.0f by
	* default, but Stateful features might want to override this.
	*
	* @param rule
	* @param state
	* @param sentence
	* @return the weighted future cost estimate of applying this rule in
	* context.
	*/
	public abstract float estimateFutureCost(Rule rule, DPState state, Sentence sentence);

	/**
	* Parses the arguments passed to a feature function in the Joshua config file TODO: Replace this
	* with a proper CLI library at some point Expects key value pairs in the form : -argname value
	* Any key without a value is added with an empty string as value Multiple values for the same key
	* are not parsed. The first one is used.
	*
	* @param rawArgs A string with the raw arguments and their names
	* @return A hash with the keys and the values of the string
	*/
	public static HashMap<String, String> parseArgs(String[] args) {
	HashMap<String, String> parsedArgs = new HashMap<String, String>();
	boolean lookingForValue = false;
	String currentKey = "";
	for (int i = 0; i < args.length; i++) {

	Pattern argKeyPattern = Pattern.compile("^-[a-zA-Z]\\S+");
	Matcher argKey = argKeyPattern.matcher(args[i]);
	if (argKey.find()) {
	// This is a key
	// First check to see if there is a key that is waiting to be written
	if (lookingForValue) {
	// This is a key with no specified value
	parsedArgs.put(currentKey, "");
	}
	// Now store the new key and look for its value
	currentKey = args[i].substring(1);
	lookingForValue = true;
	} else {
	// This is a value
	if (lookingForValue) {
	parsedArgs.put(currentKey, args[i]);
	lookingForValue = false;
	}
	}
	}
	return parsedArgs;
	}

	/**
	* Accumulator objects allow us to generalize feature computation.
	* ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
	* sum (for decoding). FeatureAccumulator records the named feature values
	* (for k-best extraction).
	*/

	public interface Accumulator {
	public void add(String name, float value);
	}

	public class ScoreAccumulator implements Accumulator {
	private float score;

	public ScoreAccumulator() {
	this.score = 0.0f;
	}

	public void add(String name, float value) {
	if (weights.containsKey(name)) {
	score += value * weights.get(name);
	}
	}

	public float getScore() {
	return score;
	}
	}

	public class FeatureAccumulator implements Accumulator {
	private FeatureVector features;

	public FeatureAccumulator() {
	this.features = new FeatureVector();
	}

	public void add(String name, float value) {
	if (features.containsKey(name)) {
	features.put(name, features.get(name) + value);
	} else {
	features.put(name, value);
	}
	}

	public FeatureVector getFeatures() {
	return features;
	}
	}
	}