blob: 4fe9d89af0b8928286ea82f196ca2abe92bf2ac3 [file] [log] [blame]
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package opennlp.modelbuilder.v2;
import java.util.HashMap;
import java.util.Map;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.util.Span;
/**
*
*
*/
public class GenericModelGenerator implements SemiSupervisedModelGenerator{
private Map<String, String> params = new HashMap<String, String>();
@Override
public void setParameters(Map<String, String> params) {
this.params = params;
}
@Override
public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
ModelGenerationValidator validator, Modelable modelable, int iterations) {
for (int iteration = 0; iteration < iterations; iteration++) {
System.out.println("ITERATION: " + iteration);
System.out.println("\tPerfoming Known Entity Annotation");
System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
System.out.println("\t\treading data....: ");
for (String sentence : sentenceProvider.getSentences()) {
for (String knownEntity : knownEntityProvider.getKnownEntities()) {
if (sentence.contains(knownEntity)) {
//if the same sentence has multiple hits should they be annotated separately?
modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));
}
}
}
System.out.println("\t\twriting annotated sentences....: ");
modelable.writeAnnotatedSentences();
modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
NameFinderME nf = new NameFinderME(modelable.getModel());
System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
System.out.println("\tPerforming NER");
for (String sentence : sentenceProvider.getSentences()) {
if (!validator.validSentence(sentence)) {
continue;
}
String[] tokens = modelable.tokenizeSentenceToWords(sentence);
Span[] find = nf.find(tokens);
nf.clearAdaptiveData();
String[] namedEntities = Span.spansToStrings(find, tokens);
for (String namedEntity : namedEntities) {
if (validator.validNamedEntity(namedEntity)) {
knownEntityProvider.addKnownEntity(namedEntity);
modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));
}
}
}
System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size());
}
modelable.writeAnnotatedSentences();
modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
}
}