blob: 501e9dedb229866b090b3d826d67a1f34e7c6a4f [file] [log] [blame]
package joshua.decoder.ff;
import java.util.List;
import joshua.corpus.Vocabulary;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.chart_parser.SourcePath;
import joshua.decoder.ff.state_maintenance.DPState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.segment_file.Sentence;
/*
* This feature computes a bin for the rule and activates a feature for it. It requires access to
* the index of the RarityPenalty field, from which the rule count can be computed.
*/
public class RuleCountBin extends StatelessFF {
private int field = -1;
public RuleCountBin(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "RuleCountBin", args, config);
field = Integer.parseInt(parsedArgs.get("field"));
}
@Override
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
if (rule.getOwner() != Vocabulary.id("pt"))
return null;
float rarityPenalty = -rule.getFeatureVector().getSparse(String.format("tm_pt_%d", field));
int count = (int) (1.0 - Math.log(rarityPenalty));
String feature = "RuleCountBin_inf";
int[] bins = { 1, 2, 4, 8, 16, 32, 64, 128, 1000, 10000 };
for (int k : bins) {
if (count <= k) {
feature = String.format("RuleCountBin_%d", k);
break;
}
}
System.err.println(String.format("RuleCountBin(%f) = %d ==> %s", rarityPenalty, count, feature));
acc.add(feature, 1.0f);
return null;
}
}