blob: 4e92cacecb9ae4390579a53ed9599c3ef303c7f4 [file] [log] [blame]
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package opennlp.modelbuilder.v2.impls;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import opennlp.modelbuilder.v2.ModelGenerationValidator;
/**
*
* @author Owner
*/
public class FileModelValidatorImpl implements ModelGenerationValidator {
private Set<String> badentities = new HashSet<String>();
private final double MIN_SCORE_FOR_TRAINING = 0.95d;
private Object validationData;
private Map<String, String> params = new HashMap<String, String>();
@Override
public void setParameters(Map<String, String> params) {
this.params = params;
}
@Override
public Boolean validSentence(String sentence) {
//returning true by default, because the sentence provider will return only "valid" sentences in this case
return true;
}
@Override
public Boolean validNamedEntity(String namedEntity) {
if (badentities.isEmpty()) {
getBlackList();
}
Pattern p = Pattern.compile("[0-9]", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
if (p.matcher(namedEntity).find()) {
return false;
}
Boolean b = true;
if (badentities.contains(namedEntity.toLowerCase())) {
b = false;
}
return b;
}
@Override
public Collection<String> getBlackList() {
if (!badentities.isEmpty()) {
try {
InputStream fis;
BufferedReader br;
String line;
fis = new FileInputStream(params.get("blacklistfile"));
br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
while ((line = br.readLine()) != null) {
badentities.add(line);
}
br.close();
br = null;
fis = null;
} catch (FileNotFoundException ex) {
Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
}
}
return badentities;
}
}