src/joshua/decoder/ff/lm/kenlm/jni/KenLM.java - joshua - Git at Google

 package joshua.decoder.ff.lm.kenlm.jni;

 import joshua.decoder.ff.lm.NGramLanguageModel;
 import joshua.decoder.ff.state_maintenance.KenLMState;

 /**
  * JNI wrapper for KenLM. This version of KenLM supports two use cases, implemented by the separate
  * feature functions KenLMFF and LanguageModelFF. KenLMFF uses the RuleScore() interface in
  * lm/left.hh, returning a state pointer representing the KenLM state, while LangaugeModelFF handles
  * state by itself and just passes in the ngrams for scoring.
  *
  * @author Kenneth Heafield
  * @author Matt Post <post@cs.jhu.edu>
  */

 public class KenLM implements NGramLanguageModel, Comparable<KenLM> {

   static {
     try {
       System.loadLibrary("ken");
     } catch (UnsatisfiedLinkError e) {
       System.err.println("* FATAL: Can't find libken.so (libken.dylib on OS X) in $JOSHUA/lib");
       System.err.println("*        This probably means that the KenLM library didn't compile.");
       System.err.println("*        Make sure that BOOST_ROOT is set to the root of your boost");
       System.err.println("*        installation (it's not /opt/local/, the default), change to");
       System.err.println("*        $JOSHUA, and type 'ant kenlm'. If problems persist, see the");
       System.err.println("*        website (joshua-decoder.org).");
       System.exit(1);
     }
   }

   private final long pointer;

   // this is read from the config file, used to set maximum order
   private final int ngramOrder;
   // inferred from model file (may be larger than ngramOrder)
   private final int N;
   // whether left-state minimization was requested
   private boolean minimizing;

   private final static native long construct(String file_name);

   private final static native void destroy(long ptr);

   private final static native int order(long ptr);

   private final static native boolean registerWord(long ptr, String word, int id);

   private final static native float prob(long ptr, int words[]);

   private final static native StateProbPair probRule(long ptr, long pool, long words[]);

   private final static native float estimateRule(long ptr, long words[]);

   private final static native float probString(long ptr, int words[], int start);

   public final static native long createPool();
   public final static native void destroyPool(long pointer);

   public KenLM(int order, String file_name) {
     ngramOrder = order;

     pointer = construct(file_name);
     N = order(pointer);
   }

   public void destroy() {
     destroy(pointer);
   }

   public int getOrder() {
     return ngramOrder;
   }

   public boolean registerWord(String word, int id) {
     return registerWord(pointer, word, id);
   }

   public float prob(int words[]) {
     return prob(pointer, words);
   }

   // Apparently Zhifei starts some array indices at 1. Change to 0-indexing.
   public float probString(int words[], int start) {
     return probString(pointer, words, start - 1);
   }

   /**
    * This function is the bridge to the interface in kenlm/lm/left.hh, which has KenLM score the
    * whole rule. It takes a list of words and states retrieved from tail nodes (nonterminals in the
    * rule). Nonterminals have a negative value so KenLM can distinguish them. The sentence number is
    * needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
    * state and the LM probability incurred along this rule.
    *
    * @param words
    * @param sentId
    * @return
    */
   public StateProbPair probRule(long[] words, long poolPointer) {

     StateProbPair pair = null;
     try {
       pair = probRule(pointer, poolPointer, words);
     } catch (NoSuchMethodError e) {
       e.printStackTrace();
       System.exit(1);
     }

     return pair;
   }

   /**
    * Public facing function that estimates the cost of a rule, which value is used for sorting
    * rules during cube pruning.
    *
    * @param words
    * @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
    */
   public float estimateRule(long[] words) {
     float estimate = 0.0f;
     try {
       estimate = estimateRule(pointer, words);
     } catch (NoSuchMethodError e) {
       e.printStackTrace();
       System.exit(1);
     }

     return estimate;
   }


   /**
    * Inner class used to hold the results returned from KenLM with left-state minimization. Note
    * that inner classes have to be static to be accessible from the JNI!
    */
   public static class StateProbPair {
     public KenLMState state = null;
     public float prob = 0.0f;

     public StateProbPair(long state, float prob) {
       this.state = new KenLMState(state);
       this.prob = prob;
     }
   }

   @Override
   public int compareTo(KenLM other) {
     if (this == other)
       return 0;
     else
       return -1;
   }

   /**
    * These functions are used if KenLM is invoked under LanguageModelFF instead of KenLMFF.
    */
   @Override
   public float sentenceLogProbability(int[] sentence, int order, int startIndex) {
     return probString(sentence, startIndex);
   }

   @Override
   public float ngramLogProbability(int[] ngram, int order) {
     if (order != N && order != ngram.length)
       throw new RuntimeException("Lower order not supported.");
     return prob(ngram);
   }

   @Override
   public float ngramLogProbability(int[] ngram) {
     return prob(ngram);
   }
 }
	package joshua.decoder.ff.lm.kenlm.jni;

	import joshua.decoder.ff.lm.NGramLanguageModel;
	import joshua.decoder.ff.state_maintenance.KenLMState;

	/**
	* JNI wrapper for KenLM. This version of KenLM supports two use cases, implemented by the separate
	* feature functions KenLMFF and LanguageModelFF. KenLMFF uses the RuleScore() interface in
	* lm/left.hh, returning a state pointer representing the KenLM state, while LangaugeModelFF handles
	* state by itself and just passes in the ngrams for scoring.
	*
	* @author Kenneth Heafield
	* @author Matt Post <post@cs.jhu.edu>
	*/

	public class KenLM implements NGramLanguageModel, Comparable<KenLM> {

	static {
	try {
	System.loadLibrary("ken");
	} catch (UnsatisfiedLinkError e) {
	System.err.println("* FATAL: Can't find libken.so (libken.dylib on OS X) in $JOSHUA/lib");
	System.err.println("* This probably means that the KenLM library didn't compile.");
	System.err.println("* Make sure that BOOST_ROOT is set to the root of your boost");
	System.err.println("* installation (it's not /opt/local/, the default), change to");
	System.err.println("* $JOSHUA, and type 'ant kenlm'. If problems persist, see the");
	System.err.println("* website (joshua-decoder.org).");
	System.exit(1);
	}
	}

	private final long pointer;

	// this is read from the config file, used to set maximum order
	private final int ngramOrder;
	// inferred from model file (may be larger than ngramOrder)
	private final int N;
	// whether left-state minimization was requested
	private boolean minimizing;

	private final static native long construct(String file_name);

	private final static native void destroy(long ptr);

	private final static native int order(long ptr);

	private final static native boolean registerWord(long ptr, String word, int id);

	private final static native float prob(long ptr, int words[]);

	private final static native StateProbPair probRule(long ptr, long pool, long words[]);

	private final static native float estimateRule(long ptr, long words[]);

	private final static native float probString(long ptr, int words[], int start);

	public final static native long createPool();
	public final static native void destroyPool(long pointer);

	public KenLM(int order, String file_name) {
	ngramOrder = order;

	pointer = construct(file_name);
	N = order(pointer);
	}

	public void destroy() {
	destroy(pointer);
	}

	public int getOrder() {
	return ngramOrder;
	}

	public boolean registerWord(String word, int id) {
	return registerWord(pointer, word, id);
	}

	public float prob(int words[]) {
	return prob(pointer, words);
	}

	// Apparently Zhifei starts some array indices at 1. Change to 0-indexing.
	public float probString(int words[], int start) {
	return probString(pointer, words, start - 1);
	}

	/**
	* This function is the bridge to the interface in kenlm/lm/left.hh, which has KenLM score the
	* whole rule. It takes a list of words and states retrieved from tail nodes (nonterminals in the
	* rule). Nonterminals have a negative value so KenLM can distinguish them. The sentence number is
	* needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
	* state and the LM probability incurred along this rule.
	*
	* @param words
	* @param sentId
	* @return
	*/
	public StateProbPair probRule(long[] words, long poolPointer) {

	StateProbPair pair = null;
	try {
	pair = probRule(pointer, poolPointer, words);
	} catch (NoSuchMethodError e) {
	e.printStackTrace();
	System.exit(1);
	}

	return pair;
	}

	/**
	* Public facing function that estimates the cost of a rule, which value is used for sorting
	* rules during cube pruning.
	*
	* @param words
	* @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
	*/
	public float estimateRule(long[] words) {
	float estimate = 0.0f;
	try {
	estimate = estimateRule(pointer, words);
	} catch (NoSuchMethodError e) {
	e.printStackTrace();
	System.exit(1);
	}

	return estimate;
	}


	/**
	* Inner class used to hold the results returned from KenLM with left-state minimization. Note
	* that inner classes have to be static to be accessible from the JNI!
	*/
	public static class StateProbPair {
	public KenLMState state = null;
	public float prob = 0.0f;

	public StateProbPair(long state, float prob) {
	this.state = new KenLMState(state);
	this.prob = prob;
	}
	}

	@Override
	public int compareTo(KenLM other) {
	if (this == other)
	return 0;
	else
	return -1;
	}

	/**
	* These functions are used if KenLM is invoked under LanguageModelFF instead of KenLMFF.
	*/
	@Override
	public float sentenceLogProbability(int[] sentence, int order, int startIndex) {
	return probString(sentence, startIndex);
	}

	@Override
	public float ngramLogProbability(int[] ngram, int order) {
	if (order != N && order != ngram.length)
	throw new RuntimeException("Lower order not supported.");
	return prob(ngram);
	}

	@Override
	public float ngramLogProbability(int[] ngram) {
	return prob(ngram);
	}
	}