blob: d39cce615625b722453467bc55597ba3932deedd [file] [log] [blame]
package joshua.decoder.ff;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.chart_parser.SourcePath;
import joshua.decoder.ff.state_maintenance.DPState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.segment_file.Sentence;
/**
* This class defines Joshua's feature function interface, for both sparse and
* dense features. It is immediately inherited by StatelessFF and StatefulFF,
* which provide functionality common to stateless and stateful features,
* respectively. Any feature implementation should extend those classes, and not
* this one. The distinction between stateless and stateful features is somewhat
* narrow: all features have the opportunity to return an instance of a
* {@link DPState} object, and stateless ones just return null.
*
* Features in Joshua work like templates. Each feature function defines any
* number of actual features, which are associated with weights. The task of the
* feature function is to compute the features that are fired in different
* circumstances and then return the inner product of those features with the
* weight vector. Feature functions can also produce estimates of their future
* cost (via {@link estimateCost()}); these values are not used in computing the
* score, but are only used for sorting rules during cube pruning. The
* individual features produced by each template should have globally unique
* names; a good convention is to prefix each feature with the name of the
* template that produced it.
*
* Joshua does not retain individual feature values while decoding, since this
* requires keeping a sparse feature vector along every hyperedge, which can be
* expensive. Instead, it computes only the weighted cost of each edge. If the
* individual feature values are requested, the feature functions are replayed
* in post-processing, say during k-best list extraction. This is implemented in
* a generic way by passing an {@link Accumulator} object to the compute()
* function. During decoding, the accumulator simply sums weighted features in a
* scalar. During k-best extraction, when individual feature values are needed,
* a {@link FeatureAccumulator} is used to retain the individual values.
*
* @author Matt Post <post@cs.jhu.edu>
* @author Juri Ganitkevich <juri@cs.jhu.edu>
*/
public abstract class FeatureFunction {
/*
* The name of the feature function; this generally matches the weight name on
* the config file. This can also be used as a prefix for feature / weight
* names, for templates that define multiple features.
*/
protected String name = null;
// The list of arguments passed to the feature, and the hash for the parsed args
protected String[] args;
protected HashMap<String, String> parsedArgs = null;
/*
* The global weight vector used by the decoder, passed it when the feature is
* instantiated
*/
protected FeatureVector weights;
/* The config */
protected JoshuaConfiguration config;
public String getName() {
return name;
}
// Whether the feature has state.
public abstract boolean isStateful();
public FeatureFunction(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
this.weights = weights;
this.name = name;
this.args = args;
this.config = config;
this.parsedArgs = FeatureFunction.parseArgs(args);
}
public String logString() {
try {
return String.format("%s (weight %.3f)", name, weights.get(name));
} catch (RuntimeException e) {
return name;
}
}
/**
* This is the main function for defining feature values. The implementor
* should compute all the features along the hyperedge, calling acc.put(name,
* value) for each feature. It then returns the newly-computed dynamic
* programming state for this feature (for example, for the
* {@link LanguageModelFF} feature, this returns the new language model
* context). For stateless features, this value is null.
*
* Note that the accumulator accumulates *unweighted* feature values. The
* feature vector is multiplied times the weight vector later on.
*
* @param rule
* @param tailNodes
* @param i
* @param j
* @param sourcePath
* @param sentID
* @param acc
* @return the new dynamic programming state (null for stateless features)
*/
public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
SourcePath sourcePath, Sentence sentence, Accumulator acc);
/**
* Feature functions must overrided this. StatefulFF and StatelessFF provide
* reasonable defaults since most features do not fire on the goal node.
*
* @param tailNode
* @param i
* @param j
* @param sourcePath
* @param sentID
* @param acc
* @return the DPState (null if none)
*/
public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc);
/**
* This is a convenience function for retrieving the features fired when
* applying a rule, provided for backward compatibility.
*
* Returns the *unweighted* cost of the features delta computed at this
* position. Note that this is a feature delta, so existing feature costs of
* the tail nodes should not be incorporated, and it is very important not to
* incorporate the feature weights. This function is used in the kbest
* extraction code but could also be used in computing the cost.
*
* @param rule
* @param tailNodes
* @param i
* @param j
* @param sourcePath
* @param sentID
* @return an *unweighted* feature delta
*/
public final FeatureVector computeFeatures(Rule rule, List<HGNode> tailNodes, int i, int j,
SourcePath sourcePath, Sentence sentence) {
FeatureAccumulator features = new FeatureAccumulator();
compute(rule, tailNodes, i, j, sourcePath, sentence, features);
return features.getFeatures();
}
/**
* This function is called for the final transition. For example, the
* LanguageModel feature function treats the last rule specially. It needs to
* return the *weighted* cost of applying the feature. Provided for backward
* compatibility.
*
* @param tailNode
* @param i
* @param j
* @param sourcePath
* @param sentID
* @return a *weighted* feature cost
*/
public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
Sentence sentence) {
ScoreAccumulator score = new ScoreAccumulator();
computeFinal(tailNode, i, j, sourcePath, sentence, score);
return score.getScore();
}
/**
* Returns the *unweighted* feature delta for the final transition (e.g., for
* the language model feature function). Provided for backward compatibility.
*
* @param tailNode
* @param i
* @param j
* @param sourcePath
* @param sentID
* @return
*/
public final FeatureVector computeFinalFeatures(HGNode tailNode, int i, int j,
SourcePath sourcePath, Sentence sentence) {
FeatureAccumulator features = new FeatureAccumulator();
computeFinal(tailNode, i, j, sourcePath, sentence, features);
return features.getFeatures();
}
/**
* This function is called when sorting rules for cube pruning. It must return
* the *weighted* estimated cost of applying a feature. This need not be the
* actual cost of applying the rule in context. Basically, it's the inner
* product of the weight vector and all features found in the grammar rule,
* though some features (like LanguageModelFF) can also compute some of their
* values. This is just an estimate of the cost, which helps do better
* sorting. Later, the real cost of this feature function is called via
* compute();
*
* @return the *weighted* cost of applying the feature.
*/
public abstract float estimateCost(Rule rule, Sentence sentence);
/**
* This feature is called to produce a *weighted estimate* of the future cost
* of applying this feature. This value is not incorporated into the model
* score but is used in pruning decisions. Stateless features return 0.0f by
* default, but Stateful features might want to override this.
*
* @param rule
* @param state
* @param sentence
* @return the *weighted* future cost estimate of applying this rule in
* context.
*/
public abstract float estimateFutureCost(Rule rule, DPState state, Sentence sentence);
/**
* Parses the arguments passed to a feature function in the Joshua config file TODO: Replace this
* with a proper CLI library at some point Expects key value pairs in the form : -argname value
* Any key without a value is added with an empty string as value Multiple values for the same key
* are not parsed. The first one is used.
*
* @param rawArgs A string with the raw arguments and their names
* @return A hash with the keys and the values of the string
*/
public static HashMap<String, String> parseArgs(String[] args) {
HashMap<String, String> parsedArgs = new HashMap<String, String>();
boolean lookingForValue = false;
String currentKey = "";
for (int i = 0; i < args.length; i++) {
Pattern argKeyPattern = Pattern.compile("^-[a-zA-Z]\\S+");
Matcher argKey = argKeyPattern.matcher(args[i]);
if (argKey.find()) {
// This is a key
// First check to see if there is a key that is waiting to be written
if (lookingForValue) {
// This is a key with no specified value
parsedArgs.put(currentKey, "");
}
// Now store the new key and look for its value
currentKey = args[i].substring(1);
lookingForValue = true;
} else {
// This is a value
if (lookingForValue) {
parsedArgs.put(currentKey, args[i]);
lookingForValue = false;
}
}
}
return parsedArgs;
}
/**
* Accumulator objects allow us to generalize feature computation.
* ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
* sum (for decoding). FeatureAccumulator records the named feature values
* (for k-best extraction).
*/
public interface Accumulator {
public void add(String name, float value);
}
public class ScoreAccumulator implements Accumulator {
private float score;
public ScoreAccumulator() {
this.score = 0.0f;
}
public void add(String name, float value) {
if (weights.containsKey(name)) {
score += value * weights.get(name);
}
}
public float getScore() {
return score;
}
}
public class FeatureAccumulator implements Accumulator {
private FeatureVector features;
public FeatureAccumulator() {
this.features = new FeatureVector();
}
public void add(String name, float value) {
if (features.containsKey(name)) {
features.put(name, features.get(name) + value);
} else {
features.put(name, value);
}
}
public FeatureVector getFeatures() {
return features;
}
}
}