blob: a3b706200abe03d808ea7709791af87c762c2d59 [file] [log] [blame]
package joshua.decoder.ff.tm;
import java.util.List;
import joshua.decoder.ff.FeatureFunction;
/**
* Grammar is a class for wrapping a trie of TrieGrammar in order to store holistic metadata.
*
* @author wren ng thornton <wren@users.sourceforge.net>
* @author Zhifei Li, <zhifei.work@gmail.com>
*/
public interface Grammar {
/**
* Gets the root of the <code>Trie</code> backing this grammar.
* <p>
* <em>Note</em>: This method should run as a small constant-time function.
*
* @return the root of the <code>Trie</code> backing this grammar
*/
Trie getTrieRoot();
/**
* After calling this method, the rules in this grammar are guaranteed to be sorted based on the
* latest feature function values.
* <p>
* Cube-pruning requires that the grammar be sorted based on the latest feature functions.
*
* @param weights The model weights.
*/
void sortGrammar(List<FeatureFunction> models);
/**
* Determines whether the rules in this grammar have been sorted based on the latest feature
* function values.
* <p>
* This method is needed for the cube-pruning algorithm.
*
* @return <code>true</code> if the rules in this grammar have been sorted based on the latest
* feature function values, <code>false</code> otherwise
*/
boolean isSorted();
/**
* Returns whether this grammar has any valid rules for covering a particular span of a sentence.
* Hiero's "glue" grammar will only say True if the span is longer than our span limit, and is
* anchored at startIndex==0. Hiero's "regular" grammar will only say True if the span is less
* than the span limit. Other grammars, e.g. for rule-based systems, may have different behaviors.
*
* @param startIndex Indicates the starting index of a phrase in a source input phrase, or a
* starting node identifier in a source input lattice
* @param endIndex Indicates the ending index of a phrase in a source input phrase, or an ending
* node identifier in a source input lattice
* @param pathLength Length of the input path in a source input lattice. If a source input phrase
* is used instead of a lattice, this value will likely be ignored by the underlying
* implementation, but would normally be defined as <code>endIndex-startIndex</code>
*/
boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength);
/**
* Gets the number of rules stored in the grammar.
*
* @return the number of rules stored in the grammar
*/
int getNumRules();
/**
* Returns the number of dense features.
*
* @return the number of dense features
*/
int getNumDenseFeatures();
/**
* This is used to construct a manual rule supported from outside the grammar, but the owner
* should be the same as the grammar. Rule ID will the same as OOVRuleId, and no lattice cost
*/
@Deprecated
Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores, int arity);
/**
* Dump the grammar to disk.
*
* @param file
*/
@Deprecated
void writeGrammarOnDisk(String file);
/**
* This returns true if the grammar contains rules that are regular expressions, possibly matching
* many different inputs.
*
* @return true if the grammar's rules may contain regular expressions.
*/
boolean isRegexpGrammar();
/**
* Return the grammar's owner.
*/
int getOwner();
/**
* Return the maximum source phrase length (terminals + nonterminals).
*/
int getMaxSourcePhraseLength();
/**
* Add an OOV rule for the requested word for the grammar.
*
* @param word
* @param featureFunctions
*/
void addOOVRules(int word, List<FeatureFunction> featureFunctions);
}