blob: 23a6ba543deb547f6fb366b0411e516602c012c4 [file] [log] [blame]
package joshua.decoder.phrase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import joshua.decoder.Decoder;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.tm.RuleCollection;
import joshua.decoder.segment_file.Sentence;
/**
* This class represents a bundle of phrase tables that have been read in,
* reporting some stats about them. Probably could be done away with.
*/
public class PhraseChart {
private int sentence_length;
private int max_source_phrase_length;
// Banded array: different source lengths are next to each other.
private List<TargetPhrases> entries;
// number of translation options
int numOptions = 20;
/**
* Create a new PhraseChart object, which represents all phrases that are
* applicable against the current input sentence. These phrases are extracted
* from all available grammars.
*
* @param tables
* @param source
*/
public PhraseChart(PhraseTable[] tables, List<FeatureFunction> features, Sentence source, int num_options) {
float startTime = System.currentTimeMillis();
max_source_phrase_length = 0;
for (int i = 0; i < tables.length; i++)
max_source_phrase_length = Math.max(max_source_phrase_length,
tables[i].getMaxSourcePhraseLength());
sentence_length = source.length();
// System.err.println(String.format(
// "PhraseChart()::Initializing chart for sentlen %d max %d from %s", sentence_length,
// max_source_phrase_length, source));
entries = new ArrayList<TargetPhrases>();
for (int i = 0; i < sentence_length * max_source_phrase_length; i++)
entries.add(null);
// There's some unreachable ranges off the edge. Meh.
for (int begin = 0; begin != sentence_length; ++begin) {
for (int end = begin + 1; (end != sentence_length + 1)
&& (end <= begin + max_source_phrase_length); ++end) {
if (source.hasPath(begin, end)) {
for (PhraseTable table : tables)
SetRange(begin, end,
table.Phrases(Arrays.copyOfRange(source.intSentence(), begin, end)));
}
}
}
for (TargetPhrases phrases: entries) {
if (phrases != null)
phrases.finish(features, Decoder.weights, num_options);
}
System.err.println(String.format("[%d] Collecting options took %.3f seconds", source.id(),
(System.currentTimeMillis() - startTime) / 1000.0f));
}
public int SentenceLength() {
return sentence_length;
}
// c++: TODO: make this reflect the longest source phrase for this sentence.
public int MaxSourcePhraseLength() {
return max_source_phrase_length;
}
/**
* Maps two-dimensional span into a one-dimensional array.
*
* @param i
* @param j
* @return offset into private list of TargetPhrases
*/
private int offset(int i, int j) {
return i * max_source_phrase_length + j - i - 1;
}
/**
* Returns phrases from all grammars that match the span.
*
* @param begin
* @param end
* @return
*/
public TargetPhrases getRange(int begin, int end) {
int index = offset(begin, end);
// System.err.println(String.format("PhraseChart::Range(%d,%d): found %d entries", begin, end,
// entries.get(index) == null ? 0 : entries.get(index).size()));
// if (entries.get(index) != null)
// for (Rule phrase: entries.get(index))
// System.err.println(" RULE: " + phrase);
if (index < 0 || index >= entries.size() || entries.get(index) == null)
return null;
return entries.get(index);
}
/**
* Add a set of phrases from a grammar to the current span.
*
* @param begin
* @param end
* @param to
*/
private void SetRange(int begin, int end, RuleCollection to) {
if (to != null) {
try {
int offset = offset(begin, end);
if (entries.get(offset) == null)
entries.set(offset, new TargetPhrases(to.getRules()));
else
entries.get(offset).addAll(to.getRules());
} catch (java.lang.IndexOutOfBoundsException e) {
System.err.println(String.format("Whoops! %s [%d-%d] too long (%d)", to, begin, end,
entries.size()));
}
}
}
}