| package joshua.decoder.chart_parser; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| |
| import joshua.corpus.Vocabulary; |
| import joshua.decoder.ff.tm.Grammar; |
| import joshua.decoder.ff.tm.Rule; |
| import joshua.decoder.segment_file.ConstraintRule; |
| import joshua.decoder.segment_file.ConstraintSpan; |
| |
| /** |
| * @author Zhifei Li, <zhifei.work@gmail.com> |
| */ |
| |
| public class ManualConstraintsHandler { |
| |
| // TODO: each span only has one ConstraintSpan |
| // contain spans that have LHS or RHS constraints (they are always hard) |
| private HashMap<String, ConstraintSpan> constraintSpansForFiltering; |
| |
| // contain spans that have hard "rule" constraint; key: start_span; value: |
| // end_span |
| private ArrayList<Span> spansWithHardRuleConstraint; |
| |
| private Chart chart; |
| private Grammar grammarForConstructManualRule; |
| |
| private static final Logger logger = Logger.getLogger(ManualConstraintsHandler.class.getName()); |
| |
| public ManualConstraintsHandler(Chart chart, Grammar grammarForConstructManualRule, |
| List<ConstraintSpan> constraintSpans) { |
| this.chart = chart; |
| this.grammarForConstructManualRule = grammarForConstructManualRule; |
| initialize(constraintSpans); |
| } |
| |
| private void initialize(List<ConstraintSpan> constraintSpans) { |
| /** |
| * Note that manual constraints or OOV handling is not part of seeding |
| * */ |
| /** |
| * (1) add manual rule (only allow flat rules) into the chart as constraints (2) add RHS or LHS |
| * constraint into constraintSpansForFiltering (3) add span signature into |
| * setOfSpansWithHardRuleConstraint; if the span contains a hard "RULE" constraint |
| */ |
| if (null != constraintSpans) { |
| |
| for (ConstraintSpan cSpan : constraintSpans) { |
| if (null != cSpan.rules()) { |
| boolean shouldAdd = false; // contain LHS or RHS constraints? |
| for (ConstraintRule cRule : cSpan.rules()) { |
| /** |
| * Note that LHS and RHS constraints are always hard, while Rule constraint can be soft |
| * or hard |
| **/ |
| switch (cRule.type()) { |
| case RULE: |
| // == prepare the feature scores |
| // TODO: this require the input always specify the right number of |
| // features |
| float[] featureScores = new float[cRule.features().length]; |
| |
| for (int i = 0; i < featureScores.length; i++) { |
| if (cSpan.isHard()) { |
| featureScores[i] = 0; // force the feature cost as zero |
| } else { |
| featureScores[i] = cRule.features()[i]; |
| } |
| } |
| |
| /** |
| * If the RULE constraint is hard, then we should filter all out all consituents |
| * (within this span), which are contructed from regular grammar |
| */ |
| if (cSpan.isHard()) { |
| if (null == this.spansWithHardRuleConstraint) { |
| this.spansWithHardRuleConstraint = new ArrayList<Span>(); |
| } |
| this.spansWithHardRuleConstraint.add(new Span(cSpan.start(), cSpan.end())); |
| } |
| |
| int arity = 0; // only allow flat rule (i.e. arity=0) |
| Rule rule = |
| this.grammarForConstructManualRule.constructManualRule( |
| Vocabulary.id(cRule.lhs()), Vocabulary.addAll(cRule.foreignRhs()), |
| Vocabulary.addAll(cRule.nativeRhs()), featureScores, arity); |
| |
| // add to the chart |
| chart.addAxiom(cSpan.start(), cSpan.end(), rule, new SourcePath()); |
| if (logger.isLoggable(Level.INFO)) |
| logger.info("Adding RULE constraint for span " + cSpan.start() + ", " |
| + cSpan.end() + "; isHard=" + cSpan.isHard() + rule.getLHS()); |
| break; |
| |
| default: |
| shouldAdd = true; |
| } |
| } |
| if (shouldAdd) { |
| if (logger.isLoggable(Level.INFO)) |
| logger.info("Adding LHS or RHS constraint for span " + cSpan.start() + ", " |
| + cSpan.end()); |
| if (null == this.constraintSpansForFiltering) { |
| this.constraintSpansForFiltering = new HashMap<String, ConstraintSpan>(); |
| } |
| this.constraintSpansForFiltering.put(getSpanSignature(cSpan.start(), cSpan.end()), |
| cSpan); |
| } |
| } |
| } |
| } |
| |
| } |
| |
| // =============================================================== |
| // Manual constraint annotation methods and classes |
| // =============================================================== |
| |
| /** |
| * if there are any LHS or RHS constraints for a span, then all the applicable grammar rules in |
| * that span will have to pass the filter. |
| */ |
| public List<Rule> filterRules(int i, int j, List<Rule> rulesIn) { |
| if (null == this.constraintSpansForFiltering) return rulesIn; |
| ConstraintSpan cSpan = this.constraintSpansForFiltering.get(getSpanSignature(i, j)); |
| if (null == cSpan) { // no filtering |
| return rulesIn; |
| } else { |
| |
| List<Rule> rulesOut = new ArrayList<Rule>(); |
| for (Rule gRule : rulesIn) { |
| // gRule will survive, if any constraint (LHS or RHS) lets it survive |
| for (ConstraintRule cRule : cSpan.rules()) { |
| if (shouldSurvive(cRule, gRule)) { |
| rulesOut.add(gRule); |
| break; |
| } |
| } |
| } |
| return rulesOut; |
| } |
| } |
| |
| /** |
| * should we filter out the gRule based on the manually provided constraint cRule |
| */ |
| public boolean shouldSurvive(ConstraintRule cRule, Rule gRule) { |
| |
| switch (cRule.type()) { |
| case LHS: |
| return (gRule.getLHS() == Vocabulary.id(cRule.lhs())); |
| case RHS: |
| int[] targetWords = Vocabulary.addAll(cRule.nativeRhs()); |
| |
| if (targetWords.length != gRule.getEnglish().length) return false; |
| |
| for (int t = 0; t < targetWords.length; t++) { |
| if (targetWords[t] != gRule.getEnglish()[t]) return false; |
| } |
| |
| return true; |
| default: // not surviving |
| return false; |
| } |
| } |
| |
| /** |
| * if a span is *within* the coverage of a *hard* rule constraint, then this span will be only |
| * allowed to use the mannual rules |
| */ |
| public boolean containHardRuleConstraint(int startSpan, int endSpan) { |
| if (null != this.spansWithHardRuleConstraint) { |
| for (Span span : this.spansWithHardRuleConstraint) { |
| if (startSpan >= span.startPos && endSpan <= span.endPos) return true; |
| } |
| } |
| return false; |
| } |
| |
| private String getSpanSignature(int i, int j) { |
| return i + " " + j; |
| } |
| |
| private static class Span { |
| |
| int startPos; |
| int endPos; |
| |
| public Span(int startPos, int endPos) { |
| this.startPos = startPos; |
| this.endPos = endPos; |
| } |
| } |
| |
| } |