blob: dbbec1db87c0843ffd8f27978d80aa9e2322a910 [file] [log] [blame]
package opennlp.tools.apps.relevanceVocabs;
import java.util.HashMap;
import java.util.Map;
import opennlp.tools.coref.mention.Dictionary;
public class WordDictionary {
private static final String[][] SPECIAL_CASES = { { "lens", "lenses" } };
//private static final String WORDNET_PROPERTITES_KEY = "wordnet.propertites.file";
//private static final String PROPERTIES_FILE = null;;
// private static final String DATA_DIR;
private static WordDictionary instance;
private Dictionary dictionary;
private Map<String, String> specialCaseMap;
/*static {
ConfigProperties config = ConfigFactory.getInstance()
.getConfigProperties(ConfigFactory.NLP_CONFIG_PATH);
PROPERTIES_FILE = config.getProperty(WORDNET_PROPERTITES_KEY);
}*/
public synchronized static WordDictionary getInstance() {
if (instance == null)
instance = new WordDictionary();
return instance;
}
private WordDictionary() {
// initialize the dictionary by loading the WordNet database
try {
dictionary = new TopJWNLDictionary("PROPERTIES_FILE");
} catch (Exception e) {
e.printStackTrace();
System.err.println("Failed to load the WordNet database: " + e);
}
// build the dictionary for special cases
specialCaseMap = buildSpecialCaseMap();
}
public String getLemmaOrWord(String word, String type) {
String lemma = getLemma(word, type);
if (lemma != null)
return lemma;
else
return (word == null) ? null : word.trim().toLowerCase();
}
public String getLemma(String word, String type) {
if (word == null)
return null;
// skip some long word,avoid dictionary getLemmas dead
if (word.length() >= 20)
return word;
word = word.trim().toLowerCase();
if (word.length() == 0)
return null;
// check special cases first
String lemma = specialCaseMap.get(word);
if (lemma != null)
return lemma;
// use the dictionary for general cases
// JWNLDictionary has a bug, and we have to use lower case type
type = (type == null) ? null : type.toLowerCase();
String[] lemmas = dictionary.getLemmas(word, type);
if (lemmas == null || lemmas.length == 0)
return null;
return lemmas[0];
}
/**
* get the lemma for a word of unknown POS type return the word if no lemma
* is found
*
* @param word
* @return
*/
public String getLemmaOrWord(String word) {
if (word == null)
return null;
// try noun first
String lemma = getLemma(word, "NN");
if (lemma != null)
return lemma;
// then try verb
lemma = getLemma(word, "VB");
if (lemma != null)
return lemma;
// return word now
return word.trim().toLowerCase();
}
private Map<String, String> buildSpecialCaseMap() {
Map<String, String> specialCaseMap = new HashMap<String, String>();
for (String[] wordList : SPECIAL_CASES) {
String lemma = wordList[0];
for (String word : wordList) {
specialCaseMap.put(word, lemma);
}
}
return specialCaseMap;
}
public static void main(String[] args) {
String[] verbs = { "is", "has", "were", "likes", "TaKen", "going" };
String[] nouns = { "efficient", "Cars", "lens", "wives", "lenses",
"photos" };
String[] adverbs = { "would", "could", "should", "might" };
WordDictionary dictionary = WordDictionary.getInstance();
for (String word : verbs) {
System.out
.println(word + " ==> " + dictionary.getLemma(word, "VB"));
}
for (String word : nouns) {
System.out
.println(word + " ==> " + dictionary.getLemma(word, "NN"));
}
for (String word : adverbs) {
System.out
.println(word + " ==> " + dictionary.getLemma(word, "JJ"));
}
}
}