blob: eed4c1cc85e89b0f553f6ce7a678eee7542ccf08 [file] [log] [blame]
* To change this template, choose Tools | Templates
* and open the template in the editor.
package opennlp.modelbuilder.v2;
import java.util.HashMap;
import java.util.Map;
import opennlp.modelbuilder.v2.impls.FileKnownEntityProvider;
import opennlp.modelbuilder.v2.impls.FileModelValidatorImpl;
import opennlp.modelbuilder.v2.impls.FileSentenceProvider;
import opennlp.modelbuilder.v2.impls.ModelableImpl;
* @author Owner
public class Example {
public static void main(String[] args) {
GenericModelGenerator modelGenerator = new GenericModelGenerator();
//every component has a map as a place to recieve params
//these are required for the current file-based impls
Map<String, String> params = new HashMap<String, String>();
params.put("sentencesfile", "/the/file");
params.put("knownentityfile", "/the/file");
params.put("knownentitytype", "person");
params.put("blacklistfile", "/the/file");
params.put("modelablepath", "/the/file");
* sentence providers feed this process with user data derived sentences
* this impl just reads line by line through a file
SentenceProvider sentenceProvider = new FileSentenceProvider();
*KnownEntityProviders provide a seed list of known entities... such as Barack Obama for person, or Germany for location
* obviously these would want to be prolific, non ambiguous names
KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
* ModelGenerationValidators try to weed out bad hits by the iterations of the name finder.
* Since this is a recursive process, with each iteration the namefinder will get more and more greedy if bad entities are allowed in
* this provides a mechanism for throwing out obviously bad hits.
* A good impl may be to make sure a location is actually within a noun phrase etc...users can make this as specific as they need for their dat
* and their use case
ModelGenerationValidator validator = new FileModelValidatorImpl();
* Modelable's write and read the annotated sentences, as well as create and write the NER models
Modelable modelable = new ModelableImpl();
* the modelGenerator actually runs the process with a set number of iterations... could be better by actually calculating the
* diff between runs and stopping based on a thresh, but for extrememly large sentence sets this may be too much.
*/, knownEntityProvider, validator, modelable, 3);