The OOV penalty is now -100 inside the decoder instead of 1
This means that the decoder itself always strongly disprefers OOVs; tuning
can still downweight it, but this is more natural than relying on a hugely negative
weight for OOVs
diff --git a/scripts/support/moses2joshua.pl b/scripts/support/moses2joshua.pl
index 800f3f8..4f0b91a 100755
--- a/scripts/support/moses2joshua.pl
+++ b/scripts/support/moses2joshua.pl
@@ -128,7 +128,7 @@
push @WEIGHTS, "tm_owner${num}_${i} $weights[$i]";
}
} elsif ($name eq "UnknownWordPenalty") {
- push @WEIGHTS, "OOVPenalty " . (-100 * $value);
+ push @WEIGHTS, "OOVPenalty " . ($value);
} elsif ($name eq "WordPenalty") {
push @WEIGHTS, "WordPenalty " . ($value * 2.29885);
} else {
diff --git a/src/joshua/decoder/ff/OOVFF.java b/src/joshua/decoder/ff/OOVFF.java
index e89beac..3136102 100644
--- a/src/joshua/decoder/ff/OOVFF.java
+++ b/src/joshua/decoder/ff/OOVFF.java
@@ -37,7 +37,7 @@
int sentID, Accumulator acc) {
if (rule != null && this.ownerID == rule.getOwner())
- acc.add(name, 1.0f);
+ acc.add(name, -100.0f);
return null;
}