blob: bf4d59f3c0ffb7eb35c82d160b88df3ee0e4d2d4 [file] [log] [blame]
/*
* Copyright 2013 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.modelbuilder.v2;
import java.util.HashMap;
import java.util.Map;
import opennlp.modelbuilder.v2.impls.FileKnownEntityProvider;
import opennlp.modelbuilder.v2.impls.FileModelValidatorImpl;
import opennlp.modelbuilder.v2.impls.FileSentenceProvider;
import opennlp.modelbuilder.v2.impls.ModelableImpl;
/**
*
* @author Owner
*/
public class Example {
public static void main(String[] args) {
SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
//every component has a map as a place to recieve params
//these are required for the current file-based impls
Map<String, String> params = new HashMap<String, String>();
params.put("knownentityfile", "C:\\apache\\entitylinker\\opennlp.geoentitylinker.countrycontext.txt");
params.put("sentencesfile", "C:\\apache\\modelbuilder\\sentences.txt");
params.put("knownentitytype", "location");
params.put("blacklistfile", "C:\\apache\\modelbuilder\\blacklist.txt");
params.put("modelablepath", "C:\\apache\\modelbuilder");
/**
* sentence providers feed this process with user data derived sentences
* this impl just reads line by line through a file
*/
SentenceProvider sentenceProvider = new FileSentenceProvider();
sentenceProvider.setParameters(params);
/**
*KnownEntityProviders provide a seed list of known entities... such as Barack Obama for person, or Germany for location
* obviously these would want to be prolific, non ambiguous names
*/
KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
knownEntityProvider.setParameters(params);
/**
* ModelGenerationValidators try to weed out bad hits by the iterations of the name finder.
* Since this is a recursive process, with each iteration the namefinder will get more and more greedy if bad entities are allowed in
* this provides a mechanism for throwing out obviously bad hits.
* A good impl may be to make sure a location is actually within a noun phrase etc...users can make this as specific as they need for their dat
* and their use case
*/
ModelGenerationValidator validator = new FileModelValidatorImpl();
validator.setParameters(params);
/**
* Modelable's write and read the annotated sentences, as well as create and write the NER models
*/
Modelable modelable = new ModelableImpl();
modelable.setParameters(params);
/**
* the modelGenerator actually runs the process with a set number of iterations... could be better by actually calculating the
* diff between runs and stopping based on a thresh, but for extrememly large sentence sets this may be too much.
*/
modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, 2);
}
}