blob: 2c2d4ee4beaf4ec98f40d9f48a3f941cfa1c86dc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.coref.mention;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.Adjective;
import net.didion.jwnl.data.FileDictionaryElementFactory;
import net.didion.jwnl.data.IndexWord;
import net.didion.jwnl.data.POS;
import net.didion.jwnl.data.Pointer;
import net.didion.jwnl.data.PointerType;
import net.didion.jwnl.data.Synset;
import net.didion.jwnl.data.VerbFrame;
import net.didion.jwnl.dictionary.FileBackedDictionary;
import net.didion.jwnl.dictionary.MorphologicalProcessor;
import net.didion.jwnl.dictionary.file_manager.FileManager;
import net.didion.jwnl.dictionary.file_manager.FileManagerImpl;
import net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor;
import net.didion.jwnl.dictionary.morph.DetachSuffixesOperation;
import net.didion.jwnl.dictionary.morph.LookupExceptionsOperation;
import net.didion.jwnl.dictionary.morph.LookupIndexWordOperation;
import net.didion.jwnl.dictionary.morph.Operation;
import net.didion.jwnl.dictionary.morph.TokenizerOperation;
import net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory;
import net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile;
/**
* An implementation of the Dictionary interface using the JWNL library.
*/
public class JWNLDictionary implements Dictionary {
private net.didion.jwnl.dictionary.Dictionary dict;
private MorphologicalProcessor morphy;
private static String[] empty = new String[0];
public JWNLDictionary(String searchDirectory) throws IOException, JWNLException {
PointerType.initialize();
Adjective.initialize();
VerbFrame.initialize();
Map<POS, String[][]> suffixMap = new HashMap<POS, String[][]>();
suffixMap.put(POS.NOUN,new String[][] {{"s",""},{"ses","s"},{"xes","x"},{"zes","z"},{"ches","ch"},{"shes","sh"},{"men","man"},{"ies","y"}});
suffixMap.put(POS.VERB,new String[][] {{"s",""},{"ies","y"},{"es","e"},{"es",""},{"ed","e"},{"ed",""},{"ing","e"},{"ing",""}});
suffixMap.put(POS.ADJECTIVE,new String[][] {{"er",""},{"est",""},{"er","e"},{"est","e"}});
DetachSuffixesOperation tokDso = new DetachSuffixesOperation(suffixMap);
tokDso.addDelegate(DetachSuffixesOperation.OPERATIONS,new Operation[] {new LookupIndexWordOperation(),new LookupExceptionsOperation()});
TokenizerOperation tokOp = new TokenizerOperation(new String[] {" ","-"});
tokOp.addDelegate(TokenizerOperation.TOKEN_OPERATIONS,new Operation[] {new LookupIndexWordOperation(),new LookupExceptionsOperation(),tokDso});
DetachSuffixesOperation morphDso = new DetachSuffixesOperation(suffixMap);
morphDso.addDelegate(DetachSuffixesOperation.OPERATIONS,new Operation[] {new LookupIndexWordOperation(),new LookupExceptionsOperation()});
Operation[] operations = {new LookupExceptionsOperation(), morphDso , tokOp};
morphy = new DefaultMorphologicalProcessor(operations);
FileManager manager = new FileManagerImpl(searchDirectory,PrincetonRandomAccessDictionaryFile.class);
FileDictionaryElementFactory factory = new PrincetonWN17FileDictionaryElementFactory();
FileBackedDictionary.install(manager, morphy,factory,true);
dict = net.didion.jwnl.dictionary.Dictionary.getInstance();
morphy = dict.getMorphologicalProcessor();
}
@SuppressWarnings("unchecked")
public String[] getLemmas(String word, String tag) {
try {
POS pos;
if (tag.startsWith("N") || tag.startsWith("n")) {
pos = POS.NOUN;
}
else if (tag.startsWith("V") || tag.startsWith("v")) {
pos = POS.VERB;
}
else if (tag.startsWith("J") || tag.startsWith("a")) {
pos = POS.ADJECTIVE;
}
else if (tag.startsWith("R") || tag.startsWith("r")) {
pos = POS.ADVERB;
}
else {
pos = POS.NOUN;
}
List<String> lemmas = morphy.lookupAllBaseForms(pos,word);
return lemmas.toArray(new String[lemmas.size()]);
}
catch (JWNLException e) {
e.printStackTrace();
return null;
}
}
public String getSenseKey(String lemma, String pos,int sense) {
try {
IndexWord iw = dict.getIndexWord(POS.NOUN,lemma);
if (iw == null) {
return null;
}
return String.valueOf(iw.getSynsetOffsets()[sense]);
}
catch (JWNLException e) {
e.printStackTrace();
return null;
}
}
public int getNumSenses(String lemma, String pos) {
try {
IndexWord iw = dict.getIndexWord(POS.NOUN,lemma);
if (iw == null){
return 0;
}
return iw.getSenseCount();
}
catch (JWNLException e) {
return 0;
}
}
private void getParents(Synset synset, List<String> parents) throws JWNLException {
Pointer[] pointers = synset.getPointers();
for (int pi=0,pn=pointers.length;pi<pn;pi++) {
if (pointers[pi].getType() == PointerType.HYPERNYM) {
Synset parent = pointers[pi].getTargetSynset();
parents.add(String.valueOf(parent.getOffset()));
getParents(parent,parents);
}
}
}
public String[] getParentSenseKeys(String lemma, String pos, int sense) {
//System.err.println("JWNLDictionary.getParentSenseKeys: lemma="+lemma);
try {
IndexWord iw = dict.getIndexWord(POS.NOUN,lemma);
if (iw != null) {
Synset synset = iw.getSense(sense+1);
List<String> parents = new ArrayList<String>();
getParents(synset,parents);
return parents.toArray(new String[parents.size()]);
}
else {
return empty;
}
}
catch (JWNLException e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) throws IOException, JWNLException {
String searchDir = System.getProperty("WNSEARCHDIR");
System.err.println("searchDir="+searchDir);
if (searchDir != null) {
Dictionary dict = new JWNLDictionary(System.getProperty("WNSEARCHDIR"));
String word = args[0];
String[] lemmas = dict.getLemmas(word,"NN");
for (int li=0,ln=lemmas.length;li<ln;li++) {
for (int si=0,sn=dict.getNumSenses(lemmas[li],"NN");si<sn;si++) {
System.out.println(lemmas[li]+" ("+si+")\t"+java.util.Arrays.asList(dict.getParentSenseKeys(lemmas[li],"NN",si)));
}
}
}
}
}