src/joshua/decoder/ff/PhraseModel.java - joshua - Git at Google

 package joshua.decoder.ff;

 import java.util.List;

 import joshua.corpus.Vocabulary;
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.chart_parser.SourcePath;
 import joshua.decoder.ff.state_maintenance.DPState;
 import joshua.decoder.ff.tm.Rule;
 import joshua.decoder.hypergraph.HGNode;
 import joshua.decoder.segment_file.Sentence;

 /**
  * This feature handles the list of features that are found with grammar rules in the grammar file.
  * dense features that may be associated with the rules in a grammar file. The feature names of
  * these dense rules are a function of the phrase model owner. When the feature is loaded, it
  * queries the weights for the set of features that are active for this grammar, storing them in an
  * array.
  *
  * @author Matt Post <post@cs.jhu.edu>
  * @author Zhifei Li <zhifei.work@gmail.com>
  */

 public class PhraseModel extends StatelessFF {

   /* The owner of the grammar. */
   private int ownerID;

   private float[] phrase_weights = null;

   public PhraseModel(FeatureVector weights, String[] args, JoshuaConfiguration config) {
     super(weights, "tm_", args, config);

     String owner = parsedArgs.get("owner");
     this.name = String.format("tm_%s", owner);

     /*
      * This is an efficiency hack; we cache the full dot product of the weights with the dense
      * features, storing them as a value under the name "tm_OWNER". There won't be a weight for
      * that, so we add a weight to the weights vector. This weight will never be output because when
      * the k-best list is retrieved and the actual feature values asked for, the accumulator will
      * fetch the fine-grained dense features.
      */
     if (weights.containsKey(name)) {
       System.err.println(String.format(
           "* FATAL: Your weights file contains an entry for '%s', shouldn't", name));
       System.exit(1);
     }
     weights.put(name, 1.0f);

     int num_features = 0;
     while (weights.containsKey(String.format("tm_%s_%d", owner, num_features)))
       num_features++;

     phrase_weights = new float[num_features];
     for (int i = 0; i < num_features; i++)
       phrase_weights[i] = weights.get(String.format("tm_%s_%d", owner, i));

     // Store the owner.
     this.ownerID = Vocabulary.id(owner);
   }

   /**
    * Estimates the cost of applying this rule, which is just the score of the precomputable
    * feature functions.
    */
   @Override
   public float estimateCost(final Rule rule, Sentence sentence) {

     if (rule != null && rule.getOwner() == ownerID) {
       if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY)
         rule.setPrecomputableCost(phrase_weights, weights);

       return rule.getPrecomputableCost();
     }

     return 0.0f;
   }

   /**
    * Just chain to computeFeatures(rule), since this feature doesn't use the sourcePath or sentID. *
    */
   @Override
   public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
       Sentence sentence, Accumulator acc) {

     if (rule != null && rule.getOwner() == ownerID) {
       /*
        * Here, we peak at the Accumulator object. If it's asking for scores, then we don't bother to
        * add each feature, but rather compute the inner product and add *that*. This is totally
        * cheating; the Accumulator is supposed to be a generic object. But without this cheat
        */
       if (acc instanceof ScoreAccumulator) {
         if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY) {
 //          float score = rule.getFeatureVector().innerProduct(weights);
           rule.setPrecomputableCost(phrase_weights, weights);
         }
         acc.add(name, rule.getPrecomputableCost());
       } else {
         FeatureVector features = rule.getFeatureVector();
         for (String key : features.keySet())
           acc.add(key, features.get(key));
       }
     }

     return null;
   }

   public String toString() {
     return name + " " + Vocabulary.word(ownerID);
   }
 }
	package joshua.decoder.ff;

	import java.util.List;

	import joshua.corpus.Vocabulary;
	import joshua.decoder.JoshuaConfiguration;
	import joshua.decoder.chart_parser.SourcePath;
	import joshua.decoder.ff.state_maintenance.DPState;
	import joshua.decoder.ff.tm.Rule;
	import joshua.decoder.hypergraph.HGNode;
	import joshua.decoder.segment_file.Sentence;

	/**
	* This feature handles the list of features that are found with grammar rules in the grammar file.
	* dense features that may be associated with the rules in a grammar file. The feature names of
	* these dense rules are a function of the phrase model owner. When the feature is loaded, it
	* queries the weights for the set of features that are active for this grammar, storing them in an
	* array.
	*
	* @author Matt Post <post@cs.jhu.edu>
	* @author Zhifei Li <zhifei.work@gmail.com>
	*/

	public class PhraseModel extends StatelessFF {

	/* The owner of the grammar. */
	private int ownerID;

	private float[] phrase_weights = null;

	public PhraseModel(FeatureVector weights, String[] args, JoshuaConfiguration config) {
	super(weights, "tm_", args, config);

	String owner = parsedArgs.get("owner");
	this.name = String.format("tm_%s", owner);

	/*
	* This is an efficiency hack; we cache the full dot product of the weights with the dense
	* features, storing them as a value under the name "tm_OWNER". There won't be a weight for
	* that, so we add a weight to the weights vector. This weight will never be output because when
	* the k-best list is retrieved and the actual feature values asked for, the accumulator will
	* fetch the fine-grained dense features.
	*/
	if (weights.containsKey(name)) {
	System.err.println(String.format(
	"* FATAL: Your weights file contains an entry for '%s', shouldn't", name));
	System.exit(1);
	}
	weights.put(name, 1.0f);

	int num_features = 0;
	while (weights.containsKey(String.format("tm_%s_%d", owner, num_features)))
	num_features++;

	phrase_weights = new float[num_features];
	for (int i = 0; i < num_features; i++)
	phrase_weights[i] = weights.get(String.format("tm_%s_%d", owner, i));

	// Store the owner.
	this.ownerID = Vocabulary.id(owner);
	}

	/**
	* Estimates the cost of applying this rule, which is just the score of the precomputable
	* feature functions.
	*/
	@Override
	public float estimateCost(final Rule rule, Sentence sentence) {

	if (rule != null && rule.getOwner() == ownerID) {
	if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY)
	rule.setPrecomputableCost(phrase_weights, weights);

	return rule.getPrecomputableCost();
	}

	return 0.0f;
	}

	/**
	* Just chain to computeFeatures(rule), since this feature doesn't use the sourcePath or sentID. *
	*/
	@Override
	public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
	Sentence sentence, Accumulator acc) {

	if (rule != null && rule.getOwner() == ownerID) {
	/*
	* Here, we peak at the Accumulator object. If it's asking for scores, then we don't bother to
	* add each feature, but rather compute the inner product and add that. This is totally
	* cheating; the Accumulator is supposed to be a generic object. But without this cheat
	*/
	if (acc instanceof ScoreAccumulator) {
	if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY) {
	// float score = rule.getFeatureVector().innerProduct(weights);
	rule.setPrecomputableCost(phrase_weights, weights);
	}
	acc.add(name, rule.getPrecomputableCost());
	} else {
	FeatureVector features = rule.getFeatureVector();
	for (String key : features.keySet())
	acc.add(key, features.get(key));
	}
	}

	return null;
	}

	public String toString() {
	return name + " " + Vocabulary.word(ownerID);
	}
	}