/* | |
* Copyright 2013 The Apache Software Foundation. | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package opennlp.modelbuilder.v2; | |
import java.util.HashMap; | |
import java.util.Map; | |
import opennlp.tools.namefind.NameFinderME; | |
import opennlp.tools.util.Span; | |
/** | |
* | |
*Generic impl | |
*/ | |
public class GenericModelGenerator implements SemiSupervisedModelGenerator{ | |
private Map<String, String> params = new HashMap<String, String>(); | |
@Override | |
public void setParameters(Map<String, String> params) { | |
this.params = params; | |
} | |
@Override | |
public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider, | |
ModelGenerationValidator validator, Modelable modelable, int iterations) { | |
for (int iteration = 0; iteration < iterations; iteration++) { | |
System.out.println("ITERATION: " + iteration); | |
System.out.println("\tPerfoming Known Entity Annotation"); | |
System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size()); | |
System.out.println("\t\treading data....: "); | |
for (String sentence : sentenceProvider.getSentences()) { | |
for (String knownEntity : knownEntityProvider.getKnownEntities()) { | |
if (sentence.contains(knownEntity)) { | |
//if the same sentence has multiple hits should they be annotated separately? | |
modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType())); | |
} | |
} | |
} | |
System.out.println("\t\twriting annotated sentences....: "); | |
modelable.writeAnnotatedSentences(); | |
modelable.buildModel(knownEntityProvider.getKnownEntitiesType()); | |
NameFinderME nf = new NameFinderME(modelable.getModel()); | |
System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size()); | |
System.out.println("\tPerforming NER"); | |
for (String sentence : sentenceProvider.getSentences()) { | |
if (!validator.validSentence(sentence)) { | |
continue; | |
} | |
String[] tokens = modelable.tokenizeSentenceToWords(sentence); | |
Span[] find = nf.find(tokens); | |
nf.clearAdaptiveData(); | |
String[] namedEntities = Span.spansToStrings(find, tokens); | |
for (String namedEntity : namedEntities) { | |
if (validator.validNamedEntity(namedEntity)) { | |
knownEntityProvider.addKnownEntity(namedEntity); | |
modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType())); | |
} | |
} | |
} | |
System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size()); | |
System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size()); | |
} | |
modelable.writeAnnotatedSentences(); | |
modelable.buildModel(knownEntityProvider.getKnownEntitiesType()); | |
} | |
} |