modelbuilder-prototype/src/main/java/opennlp/modelbuilder/v2/Example.java - opennlp-sandbox - Git at Google

 /*
  * To change this template, choose Tools | Templates
  * and open the template in the editor.
  */
 package opennlp.modelbuilder.v2;

 import java.util.HashMap;
 import java.util.Map;
 import opennlp.modelbuilder.v2.impls.FileKnownEntityProvider;
 import opennlp.modelbuilder.v2.impls.FileModelValidatorImpl;
 import opennlp.modelbuilder.v2.impls.FileSentenceProvider;
 import opennlp.modelbuilder.v2.impls.ModelableImpl;

 /**
  *
  * @author Owner
  */
 public class Example {

   public static void main(String[] args) {

     SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
     //every component has a map as a place to recieve params
     //these are required for the current file-based impls
     Map<String, String> params = new HashMap<String, String>();
     params.put("sentencesfile", "/the/file");
     params.put("knownentityfile", "/the/file");
     params.put("knownentitytype", "person");
     params.put("blacklistfile", "/the/file");
     params.put("modelablepath", "/the/file");

     /**
      * sentence providers feed this process with user data derived sentences
      * this impl just reads line by line through a file
      */
     SentenceProvider sentenceProvider = new FileSentenceProvider();
     sentenceProvider.setParameters(params);
     /**
      *KnownEntityProviders provide a seed list of known entities... such as Barack Obama for person, or Germany for location
      * obviously these would want to be prolific, non ambiguous names
      */
     KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
     knownEntityProvider.setParameters(params);
     /**
      * ModelGenerationValidators try to weed out bad hits by the iterations of the name finder.
      * Since this is a recursive process, with each iteration the namefinder will get more and more greedy if bad entities are allowed in
      * this provides a mechanism for throwing out obviously bad hits.
      * A good impl may be to make sure a location is actually within a noun phrase etc...users can make this as specific as they need for their dat
      * and their use case
      */
     ModelGenerationValidator validator = new FileModelValidatorImpl();
     validator.setParameters(params);
     /**
      * Modelable's write and read the annotated sentences, as well as create and write the NER models
      */

     Modelable modelable = new ModelableImpl();
     modelable.setParameters(params);

     /**
      * the modelGenerator actually runs the process with a set number of iterations... could be better by actually calculating the
      * diff between runs and stopping based on a thresh, but for extrememly large sentence sets this may be too much.
      */
     modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, 3);

   }
 }
	/*
	* To change this template, choose Tools \| Templates
	* and open the template in the editor.
	*/
	package opennlp.modelbuilder.v2;

	import java.util.HashMap;
	import java.util.Map;
	import opennlp.modelbuilder.v2.impls.FileKnownEntityProvider;
	import opennlp.modelbuilder.v2.impls.FileModelValidatorImpl;
	import opennlp.modelbuilder.v2.impls.FileSentenceProvider;
	import opennlp.modelbuilder.v2.impls.ModelableImpl;

	/**
	*
	* @author Owner
	*/
	public class Example {

	public static void main(String[] args) {

	SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
	//every component has a map as a place to recieve params
	//these are required for the current file-based impls
	Map<String, String> params = new HashMap<String, String>();
	params.put("sentencesfile", "/the/file");
	params.put("knownentityfile", "/the/file");
	params.put("knownentitytype", "person");
	params.put("blacklistfile", "/the/file");
	params.put("modelablepath", "/the/file");

	/**
	* sentence providers feed this process with user data derived sentences
	* this impl just reads line by line through a file
	*/
	SentenceProvider sentenceProvider = new FileSentenceProvider();
	sentenceProvider.setParameters(params);
	/**
	*KnownEntityProviders provide a seed list of known entities... such as Barack Obama for person, or Germany for location
	* obviously these would want to be prolific, non ambiguous names
	*/
	KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
	knownEntityProvider.setParameters(params);
	/**
	* ModelGenerationValidators try to weed out bad hits by the iterations of the name finder.
	* Since this is a recursive process, with each iteration the namefinder will get more and more greedy if bad entities are allowed in
	* this provides a mechanism for throwing out obviously bad hits.
	* A good impl may be to make sure a location is actually within a noun phrase etc...users can make this as specific as they need for their dat
	* and their use case
	*/
	ModelGenerationValidator validator = new FileModelValidatorImpl();
	validator.setParameters(params);
	/**
	* Modelable's write and read the annotated sentences, as well as create and write the NER models
	*/

	Modelable modelable = new ModelableImpl();
	modelable.setParameters(params);

	/**
	* the modelGenerator actually runs the process with a set number of iterations... could be better by actually calculating the
	* diff between runs and stopping based on a thresh, but for extrememly large sentence sets this may be too much.
	*/
	modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, 3);

	}
	}