blob: 5b2952a6d42b4e8e78ae69a52d03f0002cd01778 [file] [log] [blame]
package opennlp.tools.disambiguator;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import net.sf.extjwnl.JWNLException;
import net.sf.extjwnl.data.POS;
import net.sf.extjwnl.dictionary.Dictionary;
import net.sf.extjwnl.dictionary.MorphologicalProcessor;
import opennlp.tools.cmdline.postag.POSModelLoader;
import opennlp.tools.lemmatizer.SimpleLemmatizer;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.InvalidFormatException;
public class Loader {
private static String modelsDir = "src\\test\\resources\\opennlp\\tools\\disambiguator\\";
private static SentenceDetectorME sdetector;
private static Tokenizer tokenizer;
private static POSTaggerME tagger;
private static NameFinderME nameFinder;
private static SimpleLemmatizer lemmatizer;
private static Dictionary dictionary;
private static MorphologicalProcessor morph;
private static boolean IsInitialized = false;
// local caches for faster lookup
private static HashMap<String,Object> stemCache;
private static HashMap<String,Object> stopCache;
private static HashMap<String,Object> relvCache;
// Constructor
public Loader(){
super();
load();
}
public static HashMap<String,Object> getRelvCache(){
if (relvCache==null || relvCache.keySet().isEmpty()){
relvCache = new HashMap<String, Object>();
for (String t : Constants.relevantPOS){
relvCache.put(t, null);
}
}
return relvCache;
}
public static HashMap<String,Object> getStopCache(){
if (stopCache==null || stopCache.keySet().isEmpty()){
stopCache = new HashMap<String, Object>();
for (String s : Constants.stopWords){
stopCache.put(s, null);
}
}
return stopCache;
}
public static HashMap<String,Object> getStemCache(){
if (stemCache==null || stemCache.keySet().isEmpty()){
stemCache = new HashMap<String,Object>();
for (Object pos : POS.getAllPOS()){
stemCache.put(((POS)pos).getKey(),new HashMap());
}
}
return stemCache;
}
public static MorphologicalProcessor getMorph(){
if (morph==null){
morph = dictionary.getMorphologicalProcessor();
}
return morph;
}
public static Dictionary getDictionary(){
if (dictionary==null){
try {
dictionary = Dictionary.getDefaultResourceInstance();
} catch (JWNLException e) {
e.printStackTrace();
}
}
return dictionary;
}
public static SimpleLemmatizer getLemmatizer(){
if (lemmatizer==null){
try {
lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
} catch (IOException e) {
e.printStackTrace();
}
}
return lemmatizer;
}
public static NameFinderME getNameFinder(){
if (nameFinder==null){
TokenNameFinderModel nameFinderModel;
try {
nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
nameFinder = new NameFinderME(nameFinderModel);
} catch (IOException e) {
e.printStackTrace();
}
}
return nameFinder;
}
public static POSTaggerME getTagger(){
if (tagger==null){
POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
tagger = new POSTaggerME(posTaggerModel);
}
return tagger;
}
public static SentenceDetectorME getSDetector(){
if (sdetector==null){
try {
SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
sdetector = new SentenceDetectorME(enSentModel);
} catch (IOException e) {
e.printStackTrace();
}
}
return sdetector;
}
public static Tokenizer getTokenizer(){
if (tokenizer == null){
try {
TokenizerModel tokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
tokenizer = new TokenizerME(tokenizerModel);
} catch (IOException e) {
e.printStackTrace();
}
}
return tokenizer;
}
public static boolean isInitialized(){
return (dictionary !=null
&& morph !=null
&& stemCache !=null
&& stopCache !=null
&& relvCache !=null);
}
public void load(){
try {
SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
sdetector = new SentenceDetectorME(enSentModel);
TokenizerModel TokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
tokenizer = new TokenizerME(TokenizerModel);
POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
tagger = new POSTaggerME(posTaggerModel);
TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
nameFinder = new NameFinderME(nameFinderModel);
lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
dictionary = Dictionary.getDefaultResourceInstance();
morph = dictionary.getMorphologicalProcessor();
// loading lookup caches
stemCache = new HashMap();
for (Object pos : POS.getAllPOS()){
stemCache.put(((POS)pos).getKey(),new HashMap());
}
stopCache = new HashMap<String, Object>();
for (String s : Constants.stopWords){
stopCache.put(s, null);
}
relvCache = new HashMap<String, Object>();
for (String t : Constants.relevantPOS){
relvCache.put(t, null);
}
if (isInitialized()){
Constants.print("loading was succesfull");
}else{
Constants.print("loading was unsuccesfull");
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (InvalidFormatException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (JWNLException e) {
e.printStackTrace();
}
}
public static void unload ()
{
dictionary.close();
}
}