blob: b4db28b4b0e601c33f72e8d12d211b29329ddddc [file] [log] [blame]
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package opennlp.modelbuilder.v2;
import java.util.HashMap;
import java.util.Map;
import opennlp.modelbuilder.v2.impls.FileKnownEntityProvider;
import opennlp.modelbuilder.v2.impls.FileModelValidatorImpl;
import opennlp.modelbuilder.v2.impls.FileSentenceProvider;
import opennlp.modelbuilder.v2.impls.ModelableImpl;
/**
*
* @author Owner
*/
public class Example {
public static void main(String[] args) {
SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
//every component has a map as a place to recieve params
//these are required for the current file-based impls
Map<String, String> params = new HashMap<String, String>();
params.put("sentencesfile", "/the/file");
params.put("knownentityfile", "/the/file");
params.put("knownentitytype", "person");
params.put("blacklistfile", "/the/file");
params.put("modelablepath", "/the/file");
/**
* sentence providers feed this process with user data derived sentences
* this impl just reads line by line through a file
*/
SentenceProvider sentenceProvider = new FileSentenceProvider();
sentenceProvider.setParameters(params);
/**
*KnownEntityProviders provide a seed list of known entities... such as Barack Obama for person, or Germany for location
* obviously these would want to be prolific, non ambiguous names
*/
KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
knownEntityProvider.setParameters(params);
/**
* ModelGenerationValidators try to weed out bad hits by the iterations of the name finder.
* Since this is a recursive process, with each iteration the namefinder will get more and more greedy if bad entities are allowed in
* this provides a mechanism for throwing out obviously bad hits.
* A good impl may be to make sure a location is actually within a noun phrase etc...users can make this as specific as they need for their dat
* and their use case
*/
ModelGenerationValidator validator = new FileModelValidatorImpl();
validator.setParameters(params);
/**
* Modelable's write and read the annotated sentences, as well as create and write the NER models
*/
Modelable modelable = new ModelableImpl();
modelable.setParameters(params);
/**
* the modelGenerator actually runs the process with a set number of iterations... could be better by actually calculating the
* diff between runs and stopping based on a thresh, but for extrememly large sentence sets this may be too much.
*/
modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, 3);
}
}