blob: c96a6458dd951a336e16b4c949fbffc8cedfe1f6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package opennlp.tools.disambiguator.lesk;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import opennlp.tools.disambiguator.WSDHelper;
import opennlp.tools.disambiguator.SynNode;
import opennlp.tools.disambiguator.WSDParameters;
import opennlp.tools.disambiguator.WSDSample;
import opennlp.tools.disambiguator.WSDisambiguator;
import opennlp.tools.disambiguator.WordPOS;
import opennlp.tools.disambiguator.WordSense;
import opennlp.tools.disambiguator.mfs.MFS;
import net.sf.extjwnl.JWNLException;
import net.sf.extjwnl.data.Synset;
import net.sf.extjwnl.data.Word;
/**
* Implementation of the <b>Overlap Of Senses</b> approach originally proposed
* by Lesk. The main idea is to check for word overlaps in the sense definitions
* of the surrounding context. An overlap is when two words have similar stems.
* The more overlaps a word has the higher its score. Different variations of
* the approach are included in this class.
*
*/
public class Lesk extends WSDisambiguator {
/**
* The lesk specific parameters
*/
protected LeskParameters params;
/**
* List of filtered context words
*/
ArrayList<WordPOS> contextWords = new ArrayList<WordPOS>();
public Lesk() {
this(null);
}
/**
* Initializes the WSDParameters object and sets the input parameters
*
* @param Input
* Parameters
* @throws InvalidParameterException
*/
public Lesk(LeskParameters params) throws InvalidParameterException {
this.setParams(params);
}
/**
* If the parameters are null set the default ones, else only set them if they
* valid. Invalid parameters will return a exception
*
* @param Input
* parameters
* @throws InvalidParameterException
*/
@Override
public void setParams(WSDParameters params) throws InvalidParameterException {
if (params == null) {
this.params = new LeskParameters();
} else {
if (params.isValid()) {
this.params = (LeskParameters) params;
} else {
throw new InvalidParameterException("wrong params");
}
}
}
/**
* @return the parameter settings
*/
public LeskParameters getParams() {
return params;
}
/**
* The basic Lesk method where the entire context is considered for overlaps
*
* @param sample
* the word sample to disambiguate
* @return The array of WordSenses with their scores
*/
public ArrayList<WordSense> basic(WSDSample sample) {
WordPOS word = new WordPOS(sample.getTargetWord(), sample.getTargetTag());
ArrayList<Synset> synsets = word.getSynsets();
ArrayList<SynNode> nodes = new ArrayList<SynNode>();
for (int i = 0; i < sample.getSentence().length; i++) {
if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
contextWords.add(new WordPOS(sample.getSentence()[i], sample
.getTags()[i]));
}
}
}
for (Synset synset : synsets) {
SynNode node = new SynNode(synset, contextWords);
nodes.add(node);
}
ArrayList<WordSense> scoredSenses = SynNode.updateSenses(nodes);
for (WordSense wordSense : scoredSenses) {
wordSense.setWSDSample(sample);
int count = 0;
for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
for (WordPOS sentenceWordPOS : contextWords) {
if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
count = count + 1;
}
}
}
wordSense.setScore(count);
}
return scoredSenses;
}
/**
* The basic Lesk method but applied to a default context windows
*
* @param sample
* the word sample to disambiguate
* @return The array of WordSenses with their scores
*/
public ArrayList<WordSense> basicContextual(WSDSample sample) {
WordPOS word = new WordPOS(sample.getTargetWord(), sample.getTargetTag());
ArrayList<Synset> synsets = word.getSynsets();
ArrayList<SynNode> nodes = new ArrayList<SynNode>();
int index = sample.getTargetPosition();
for (int i = index - getParams().win_b_size; i <= index
+ getParams().win_f_size; i++) {
if (i >= 0 && i < sample.getSentence().length && i != index) {
if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
contextWords.add(new WordPOS(sample.getSentence()[i], sample
.getTags()[i]));
}
}
}
}
for (Synset synset : synsets) {
SynNode node = new SynNode(synset, contextWords);
nodes.add(node);
}
ArrayList<WordSense> scoredSenses = SynNode.updateSenses(nodes);
for (WordSense wordSense : scoredSenses) {
wordSense.setWSDSample(sample);
int count = 0;
for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
for (WordPOS sentenceWordPOS : contextWords) {
// TODO change to lemma check
if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
count = count + 1;
}
}
}
wordSense.setScore(count);
}
return scoredSenses;
}
/**
* An extended version of the Lesk approach that takes into consideration
* semantically related feature overlaps across the entire context The scoring
* function uses linear weights.
*
* @param sample
* the word sample to disambiguate
* @return the array of WordSenses with their scores
*/
public ArrayList<WordSense> extended(WSDSample sample) {
params.setWin_b_size(0);
params.setWin_f_size(0);
return extendedContextual(sample);
}
/**
* An extended version of the Lesk approach that takes into consideration
* semantically related feature overlaps in a default context window The
* scoring function uses linear weights.
*
* @param sample
* the word sample to disambiguate
* @return the array of WordSenses with their scores
*/
public ArrayList<WordSense> extendedContextual(WSDSample sample) {
ArrayList<WordSense> scoredSenses;
if (params.getWin_b_size() == 0 && params.getWin_f_size() == 0) {
scoredSenses = basic(sample);
} else {
scoredSenses = basicContextual(sample);
}
for (WordSense wordSense : scoredSenses) {
if (getParams().getFeatures()[0]) {
wordSense.setScore(wordSense.getScore() + getParams().depth_weight
* assessSynonyms(wordSense.getNode().getSynonyms(), contextWords));
}
if (getParams().getFeatures()[1]) {
fathomHypernyms(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[2]) {
fathomHyponyms(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[3]) {
fathomMeronyms(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[4]) {
fathomHolonyms(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[5]) {
fathomEntailments(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[6]) {
fathomCoordinateTerms(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[7]) {
fathomCauses(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[8]) {
fathomAttributes(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
if (getParams().getFeatures()[9]) {
fathomPertainyms(wordSense, wordSense.getNode().synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
}
return scoredSenses;
}
/*
* An extended version of the Lesk approach that takes into consideration
* semantically related feature overlaps in all the context. The scoring
* function uses exponential weights.
*
* @param sample the word sample to disambiguate
*
* @return the array of WordSenses with their scores
*/
public ArrayList<WordSense> extendedExponential(WSDSample sample) {
params.setWin_b_size(0);
params.setWin_f_size(0);
return extendedExponentialContextual(sample);
}
/**
* An extended version of the Lesk approach that takes into consideration
* semantically related feature overlaps in a custom window in the context.
* The scoring function uses exponential weights.
*
* @param sample
* the word sample to disambiguate
* @return the array of WordSenses with their scores
*/
public ArrayList<WordSense> extendedExponentialContextual(WSDSample sample) {
ArrayList<WordSense> scoredSenses;
if (params.getWin_b_size() == 0 && params.getWin_f_size() == 0) {
scoredSenses = basic(sample);
} else {
scoredSenses = basicContextual(sample);
}
for (WordSense wordSense : scoredSenses) {
if (params.features[0]) {
wordSense.setScore(wordSense.getScore()
+ Math
.pow(
assessSynonyms(wordSense.getNode().getSynonyms(),
contextWords), params.iexp));
}
if (params.features[1]) {
fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[2]) {
fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[3]) {
fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[4]) {
fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[5]) {
fathomEntailmentsExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[6]) {
fathomCoordinateTermsExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[7]) {
fathomCausesExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[8]) {
fathomAttributesExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[9]) {
fathomPertainymsExponential(wordSense, wordSense.getNode().synset,
contextWords, params.depth, params.depth, params.iexp, params.dexp);
}
}
return scoredSenses;
}
/**
* Recursively score the hypernym tree linearly
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param depthScoreWeight
*/
private void fathomHypernyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHypernyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getHypernyms(), relvWords));
for (Synset hypernym : childNode.getHypernyms()) {
fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
depthScoreWeight);
}
}
/**
* Recursively score the hypernym tree exponentially
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param intersectionExponent
* @param depthScoreExponent
*/
private void fathomHypernymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHypernyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getHypernyms(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset hypernym : childNode.getHypernyms()) {
fathomHypernymsExponential(wordSense, hypernym, relvGlossWords,
depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
}
}
/**
* Recursively score the hyponym tree linearly
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param depthScoreWeight
*/
private void fathomHyponyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHyponyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getHyponyms(), relvWords));
for (Synset hyponym : childNode.getHyponyms()) {
fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
depthScoreWeight);
}
}
/**
* Recursively score the hyponym tree exponentially
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param intersectionExponent
* @param depthScoreExponent
*/
private void fathomHyponymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHyponyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getHyponyms(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset hyponym : childNode.getHyponyms()) {
fathomHyponymsExponential(wordSense, hyponym, relvGlossWords, depth - 1,
maxDepth, intersectionExponent, depthScoreExponent);
}
}
/**
* Recursively score the meronym tree linearly
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param depthScoreWeight
*/
private void fathomMeronyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setMeronyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getMeronyms(), relvWords));
for (Synset meronym : childNode.getMeronyms()) {
fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
depthScoreWeight);
}
}
/**
* Recursively score the meronym tree exponentially
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param intersectionExponent
* @param depthScoreExponent
*/
private void fathomMeronymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setMeronyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getMeronyms(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset meronym : childNode.getMeronyms()) {
fathomMeronymsExponential(wordSense, meronym, relvGlossWords, depth - 1,
maxDepth, intersectionExponent, depthScoreExponent);
}
}
/**
* Recursively score the holonym tree linearly
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param depthScoreWeight
*/
private void fathomHolonyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHolonyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getHolonyms(), relvWords));
for (Synset holonym : childNode.getHolonyms()) {
fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
depthScoreWeight);
}
}
/**
* Recursively score the holonym tree exponentially
*
* @param wordSense
* @param child
* @param relvWords
* @param depth
* @param maxDepth
* @param intersectionExponent
* @param depthScoreExponent
*/
private void fathomHolonymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setHolonyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getHolonyms(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset holonym : childNode.getHolonyms()) {
fathomHolonymsExponential(wordSense, holonym, relvGlossWords, depth - 1,
maxDepth, intersectionExponent, depthScoreExponent);
}
}
private void fathomEntailments(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setEntailements();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getEntailments(), relvWords));
for (Synset entailment : childNode.getEntailments()) {
fathomEntailments(wordSense, entailment, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
}
}
private void fathomEntailmentsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setEntailements();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getEntailments(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset entailment : childNode.getEntailments()) {
fathomEntailmentsExponential(wordSense, entailment, relvGlossWords,
depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
}
}
private void fathomCoordinateTerms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setCoordinateTerms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getCoordinateTerms(), relvWords));
for (Synset coordinate : childNode.getCoordinateTerms()) {
fathomCoordinateTerms(wordSense, coordinate, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
}
}
private void fathomCoordinateTermsExponential(WordSense wordSense,
Synset child, ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setCoordinateTerms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getCoordinateTerms(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset coordinate : childNode.getCoordinateTerms()) {
fathomCoordinateTermsExponential(wordSense, coordinate, relvGlossWords,
depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
}
}
private void fathomCauses(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setCauses();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getCauses(), relvWords));
for (Synset cause : childNode.getCauses()) {
fathomEntailments(wordSense, cause, relvGlossWords, depth - 1, maxDepth,
depthScoreWeight);
}
}
private void fathomCausesExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setCauses();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getCauses(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset cause : childNode.getCauses()) {
fathomCausesExponential(wordSense, cause, relvGlossWords, depth - 1,
maxDepth, intersectionExponent, depthScoreExponent);
}
}
private void fathomAttributes(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setAttributes();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getAttributes(), relvWords));
for (Synset attribute : childNode.getAttributes()) {
fathomAttributes(wordSense, attribute, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
}
}
private void fathomAttributesExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setAttributes();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getAttributes(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset attribute : childNode.getAttributes()) {
fathomAttributesExponential(wordSense, attribute, relvGlossWords,
depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
}
}
private void fathomPertainyms(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double depthScoreWeight) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setPertainyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
* assessFeature(childNode.getPertainyms(), relvWords));
for (Synset pertainym : childNode.getPertainyms()) {
fathomPertainyms(wordSense, pertainym, relvGlossWords, depth - 1,
maxDepth, depthScoreWeight);
}
}
private void fathomPertainymsExponential(WordSense wordSense, Synset child,
ArrayList<WordPOS> relvWords, int depth, int maxDepth,
double intersectionExponent, double depthScoreExponent) {
if (depth == 0)
return;
String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
child.getGloss().toString());
ArrayList<WordPOS> relvGlossWords = WSDHelper
.getAllRelevantWords(tokenizedGloss);
SynNode childNode = new SynNode(child, relvGlossWords);
childNode.setPertainyms();
wordSense.setScore(wordSense.getScore()
+ Math.pow(assessFeature(childNode.getPertainyms(), relvWords),
intersectionExponent) / Math.pow(depth, depthScoreExponent));
for (Synset pertainym : childNode.getPertainyms()) {
fathomPertainymsExponential(wordSense, pertainym, relvGlossWords,
depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
}
}
/**
* Checks if the feature should be counted in the score
*
* @param featureSynsets
* @param relevantWords
* @return count of features to consider
*/
private int assessFeature(ArrayList<Synset> featureSynsets,
ArrayList<WordPOS> relevantWords) {
int count = 0;
for (Synset synset : featureSynsets) {
SynNode subNode = new SynNode(synset, relevantWords);
String[] tokenizedSense = WSDHelper.getTokenizer().tokenize(
subNode.getGloss());
ArrayList<WordPOS> relvSenseWords = WSDHelper
.getAllRelevantWords(tokenizedSense);
for (WordPOS senseWord : relvSenseWords) {
for (WordPOS sentenceWord : relevantWords) {
if (sentenceWord.isStemEquivalent(senseWord)) {
count = count + 1;
}
}
}
}
return count;
}
/**
* Checks if the synonyms should be counted in the score
*
* @param synonyms
* @param relevantWords
* @return count of synonyms to consider
*/
private int assessSynonyms(ArrayList<WordPOS> synonyms,
ArrayList<WordPOS> relevantWords) {
int count = 0;
for (WordPOS synonym : synonyms) {
for (WordPOS sentenceWord : relevantWords) {
if (sentenceWord.isStemEquivalent(synonym)) {
count = count + 1;
}
}
}
return count;
}
@Override
public String[] disambiguate(WSDSample sample) {
// if not relevant POS tag
if (!WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
String s = WSDParameters.SenseSource.WSDHELPER.name() + " "
+ sample.getTargetTag();
String[] sense = { s };
return sense;
} else {
return null;
}
}
ArrayList<WordSense> wsenses = null;
switch (this.params.leskType) {
case LESK_BASIC:
wsenses = basic(sample);
break;
case LESK_BASIC_CTXT:
wsenses = basicContextual(sample);
break;
case LESK_EXT:
wsenses = extended(sample);
break;
case LESK_EXT_CTXT:
wsenses = extendedContextual(sample);
break;
case LESK_EXT_EXP:
wsenses = extendedExponential(sample);
break;
case LESK_EXT_EXP_CTXT:
wsenses = extendedExponentialContextual(sample);
break;
default:
wsenses = extendedExponentialContextual(sample);
break;
}
Collections.sort(wsenses);
String[] senses;
if (wsenses.get(0).getScore() > 0) { // if at least one overlap
List<Word> synsetWords;
senses = new String[wsenses.size()];
String senseKey = "?";
for (int i = 0; i < wsenses.size(); i++) {
synsetWords = wsenses.get(i).getNode().synset.getWords();
for (Word synWord : synsetWords) {
if (synWord.getLemma().equals(
sample.getLemmas()[sample.getTargetPosition()])) {
try {
senseKey = synWord.getSenseKey();
} catch (JWNLException e) {
e.printStackTrace();
}
break;
}
}
senses[i] = params.source.name() + " " + senseKey + " "
+ wsenses.get(i).getScore();
}
} else { // get the MFS if no overlaps
senses = new String[1];
senses[0] = MFS.getMostFrequentSense(sample) + " -1";
}
return senses;
}
@Override
public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas, int ambiguousTokenIndex) {
return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
ambiguousTokenIndex));
}
}