opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package opennlp.tools.disambiguator.lesk;

 import java.security.InvalidParameterException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;

 import opennlp.tools.disambiguator.WSDHelper;
 import opennlp.tools.disambiguator.SynNode;
 import opennlp.tools.disambiguator.WSDParameters;
 import opennlp.tools.disambiguator.WSDSample;
 import opennlp.tools.disambiguator.WSDisambiguator;
 import opennlp.tools.disambiguator.WordPOS;
 import opennlp.tools.disambiguator.WordSense;
 import opennlp.tools.disambiguator.mfs.MFS;
 import net.sf.extjwnl.JWNLException;
 import net.sf.extjwnl.data.Synset;
 import net.sf.extjwnl.data.Word;

 /**
  * Implementation of the <b>Overlap Of Senses</b> approach originally proposed
  * by Lesk. The main idea is to check for word overlaps in the sense definitions
  * of the surrounding context. An overlap is when two words have similar stems.
  * The more overlaps a word has the higher its score. Different variations of
  * the approach are included in this class.
  *
  */
 public class Lesk extends WSDisambiguator {

   /**
    * The lesk specific parameters
    */
   protected LeskParameters params;
   /**
    * List of filtered context words
    */
   ArrayList<WordPOS> contextWords = new ArrayList<WordPOS>();

   public Lesk() {
     this(null);
   }

   /**
    * Initializes the WSDParameters object and sets the input parameters
    *
    * @param Input
    *          Parameters
    * @throws InvalidParameterException
    */
   public Lesk(LeskParameters params) throws InvalidParameterException {
     this.setParams(params);
   }

   /**
    * If the parameters are null set the default ones, else only set them if they
    * valid. Invalid parameters will return a exception
    *
    * @param Input
    *          parameters
    * @throws InvalidParameterException
    */
   @Override
   public void setParams(WSDParameters params) throws InvalidParameterException {
     if (params == null) {
       this.params = new LeskParameters();
     } else {
       if (params.isValid()) {
         this.params = (LeskParameters) params;
       } else {
         throw new InvalidParameterException("wrong params");
       }
     }
   }

   /**
    * @return the parameter settings
    */
   public LeskParameters getParams() {
     return params;
   }

   /**
    * The basic Lesk method where the entire context is considered for overlaps
    *
    * @param sample
    *          the word sample to disambiguate
    * @return The array of WordSenses with their scores
    */
   public ArrayList<WordSense> basic(WSDSample sample) {

     WordPOS word = new WordPOS(sample.getTargetWord(), sample.getTargetTag());

     ArrayList<Synset> synsets = word.getSynsets();
     ArrayList<SynNode> nodes = new ArrayList<SynNode>();

     for (int i = 0; i < sample.getSentence().length; i++) {
       if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
         if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
           contextWords.add(new WordPOS(sample.getSentence()[i], sample
               .getTags()[i]));
         }
       }
     }
     for (Synset synset : synsets) {
       SynNode node = new SynNode(synset, contextWords);
       nodes.add(node);
     }

     ArrayList<WordSense> scoredSenses = SynNode.updateSenses(nodes);

     for (WordSense wordSense : scoredSenses) {
       wordSense.setWSDSample(sample);
       int count = 0;
       for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
         for (WordPOS sentenceWordPOS : contextWords) {
           if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
             count = count + 1;
           }
         }
       }
       wordSense.setScore(count);
     }

     return scoredSenses;
   }

   /**
    * The basic Lesk method but applied to a default context windows
    *
    * @param sample
    *          the word sample to disambiguate
    * @return The array of WordSenses with their scores
    */
   public ArrayList<WordSense> basicContextual(WSDSample sample) {

     WordPOS word = new WordPOS(sample.getTargetWord(), sample.getTargetTag());

     ArrayList<Synset> synsets = word.getSynsets();
     ArrayList<SynNode> nodes = new ArrayList<SynNode>();

     int index = sample.getTargetPosition();

     for (int i = index - getParams().win_b_size; i <= index
         + getParams().win_f_size; i++) {
       if (i >= 0 && i < sample.getSentence().length && i != index) {
         if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
           if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
             contextWords.add(new WordPOS(sample.getSentence()[i], sample
                 .getTags()[i]));
           }
         }
       }
     }

     for (Synset synset : synsets) {
       SynNode node = new SynNode(synset, contextWords);
       nodes.add(node);
     }

     ArrayList<WordSense> scoredSenses = SynNode.updateSenses(nodes);

     for (WordSense wordSense : scoredSenses) {
       wordSense.setWSDSample(sample);

       int count = 0;
       for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {

         for (WordPOS sentenceWordPOS : contextWords) {
           // TODO change to lemma check
           if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
             count = count + 1;
           }
         }

       }
       wordSense.setScore(count);

     }
     return scoredSenses;
   }

   /**
    * An extended version of the Lesk approach that takes into consideration
    * semantically related feature overlaps across the entire context The scoring
    * function uses linear weights.
    *
    * @param sample
    *          the word sample to disambiguate
    * @return the array of WordSenses with their scores
    */
   public ArrayList<WordSense> extended(WSDSample sample) {
     params.setWin_b_size(0);
     params.setWin_f_size(0);
     return extendedContextual(sample);

   }

   /**
    * An extended version of the Lesk approach that takes into consideration
    * semantically related feature overlaps in a default context window The
    * scoring function uses linear weights.
    *
    * @param sample
    *          the word sample to disambiguate
    * @return the array of WordSenses with their scores
    */
   public ArrayList<WordSense> extendedContextual(WSDSample sample) {
     ArrayList<WordSense> scoredSenses;
     if (params.getWin_b_size() == 0 && params.getWin_f_size() == 0) {
       scoredSenses = basic(sample);
     } else {
       scoredSenses = basicContextual(sample);
     }
     for (WordSense wordSense : scoredSenses) {

       if (getParams().getFeatures()[0]) {
         wordSense.setScore(wordSense.getScore() + getParams().depth_weight
             * assessSynonyms(wordSense.getNode().getSynonyms(), contextWords));
       }

       if (getParams().getFeatures()[1]) {
         fathomHypernyms(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);
       }

       if (getParams().getFeatures()[2]) {
         fathomHyponyms(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);
       }

       if (getParams().getFeatures()[3]) {
         fathomMeronyms(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);

       }

       if (getParams().getFeatures()[4]) {
         fathomHolonyms(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);

       }

       if (getParams().getFeatures()[5]) {
         fathomEntailments(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);

       }
       if (getParams().getFeatures()[6]) {
         fathomCoordinateTerms(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.depth_weight);

       }
       if (getParams().getFeatures()[7]) {
         fathomCauses(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);

       }
       if (getParams().getFeatures()[8]) {
         fathomAttributes(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);

       }
       if (getParams().getFeatures()[9]) {
         fathomPertainyms(wordSense, wordSense.getNode().synset, contextWords,
             params.depth, params.depth, params.depth_weight);

       }

     }

     return scoredSenses;

   }

   /*
    * An extended version of the Lesk approach that takes into consideration
    * semantically related feature overlaps in all the context. The scoring
    * function uses exponential weights.
    *
    * @param sample the word sample to disambiguate
    *
    * @return the array of WordSenses with their scores
    */
   public ArrayList<WordSense> extendedExponential(WSDSample sample) {
     params.setWin_b_size(0);
     params.setWin_f_size(0);
     return extendedExponentialContextual(sample);

   }

   /**
    * An extended version of the Lesk approach that takes into consideration
    * semantically related feature overlaps in a custom window in the context.
    * The scoring function uses exponential weights.
    *
    * @param sample
    *          the word sample to disambiguate
    * @return the array of WordSenses with their scores
    */
   public ArrayList<WordSense> extendedExponentialContextual(WSDSample sample) {
     ArrayList<WordSense> scoredSenses;
     if (params.getWin_b_size() == 0 && params.getWin_f_size() == 0) {
       scoredSenses = basic(sample);
     } else {
       scoredSenses = basicContextual(sample);
     }

     for (WordSense wordSense : scoredSenses) {

       if (params.features[0]) {
         wordSense.setScore(wordSense.getScore()
             + Math
                 .pow(
                     assessSynonyms(wordSense.getNode().getSynonyms(),
                         contextWords), params.iexp));
       }

       if (params.features[1]) {
         fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);
       }

       if (params.features[2]) {
         fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);
       }

       if (params.features[3]) {
         fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);

       }

       if (params.features[4]) {
         fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);

       }

       if (params.features[5]) {
         fathomEntailmentsExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);
       }

       if (params.features[6]) {
         fathomCoordinateTermsExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);

       }
       if (params.features[7]) {
         fathomCausesExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);

       }
       if (params.features[8]) {
         fathomAttributesExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);

       }
       if (params.features[9]) {
         fathomPertainymsExponential(wordSense, wordSense.getNode().synset,
             contextWords, params.depth, params.depth, params.iexp, params.dexp);

       }

     }

     return scoredSenses;

   }

   /**
    * Recursively score the hypernym tree linearly
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param depthScoreWeight
    */
   private void fathomHypernyms(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setHypernyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getHypernyms(), relvWords));
     for (Synset hypernym : childNode.getHypernyms()) {
       fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
           depthScoreWeight);
     }
   }

   /**
    * Recursively score the hypernym tree exponentially
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param intersectionExponent
    * @param depthScoreExponent
    */
   private void fathomHypernymsExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setHypernyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getHypernyms(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset hypernym : childNode.getHypernyms()) {
       fathomHypernymsExponential(wordSense, hypernym, relvGlossWords,
           depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
     }
   }

   /**
    * Recursively score the hyponym tree linearly
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param depthScoreWeight
    */
   private void fathomHyponyms(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setHyponyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getHyponyms(), relvWords));
     for (Synset hyponym : childNode.getHyponyms()) {

       fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
           depthScoreWeight);
     }
   }

   /**
    * Recursively score the hyponym tree exponentially
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param intersectionExponent
    * @param depthScoreExponent
    */
   private void fathomHyponymsExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setHyponyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getHyponyms(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset hyponym : childNode.getHyponyms()) {

       fathomHyponymsExponential(wordSense, hyponym, relvGlossWords, depth - 1,
           maxDepth, intersectionExponent, depthScoreExponent);
     }
   }

   /**
    * Recursively score the meronym tree linearly
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param depthScoreWeight
    */
   private void fathomMeronyms(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setMeronyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getMeronyms(), relvWords));
     for (Synset meronym : childNode.getMeronyms()) {

       fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
           depthScoreWeight);
     }
   }

   /**
    * Recursively score the meronym tree exponentially
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param intersectionExponent
    * @param depthScoreExponent
    */
   private void fathomMeronymsExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setMeronyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getMeronyms(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset meronym : childNode.getMeronyms()) {

       fathomMeronymsExponential(wordSense, meronym, relvGlossWords, depth - 1,
           maxDepth, intersectionExponent, depthScoreExponent);
     }
   }

   /**
    * Recursively score the holonym tree linearly
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param depthScoreWeight
    */
   private void fathomHolonyms(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setHolonyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getHolonyms(), relvWords));
     for (Synset holonym : childNode.getHolonyms()) {

       fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
           depthScoreWeight);
     }
   }

   /**
    * Recursively score the holonym tree exponentially
    *
    * @param wordSense
    * @param child
    * @param relvWords
    * @param depth
    * @param maxDepth
    * @param intersectionExponent
    * @param depthScoreExponent
    */
   private void fathomHolonymsExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setHolonyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getHolonyms(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset holonym : childNode.getHolonyms()) {

       fathomHolonymsExponential(wordSense, holonym, relvGlossWords, depth - 1,
           maxDepth, intersectionExponent, depthScoreExponent);
     }
   }

   private void fathomEntailments(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setEntailements();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getEntailments(), relvWords));
     for (Synset entailment : childNode.getEntailments()) {
       fathomEntailments(wordSense, entailment, relvGlossWords, depth - 1,
           maxDepth, depthScoreWeight);
     }

   }

   private void fathomEntailmentsExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setEntailements();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getEntailments(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset entailment : childNode.getEntailments()) {
       fathomEntailmentsExponential(wordSense, entailment, relvGlossWords,
           depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
     }

   }

   private void fathomCoordinateTerms(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setCoordinateTerms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getCoordinateTerms(), relvWords));
     for (Synset coordinate : childNode.getCoordinateTerms()) {
       fathomCoordinateTerms(wordSense, coordinate, relvGlossWords, depth - 1,
           maxDepth, depthScoreWeight);
     }

   }

   private void fathomCoordinateTermsExponential(WordSense wordSense,
       Synset child, ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setCoordinateTerms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getCoordinateTerms(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset coordinate : childNode.getCoordinateTerms()) {
       fathomCoordinateTermsExponential(wordSense, coordinate, relvGlossWords,
           depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
     }

   }

   private void fathomCauses(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setCauses();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getCauses(), relvWords));
     for (Synset cause : childNode.getCauses()) {
       fathomEntailments(wordSense, cause, relvGlossWords, depth - 1, maxDepth,
           depthScoreWeight);
     }

   }

   private void fathomCausesExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setCauses();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getCauses(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset cause : childNode.getCauses()) {
       fathomCausesExponential(wordSense, cause, relvGlossWords, depth - 1,
           maxDepth, intersectionExponent, depthScoreExponent);
     }

   }

   private void fathomAttributes(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setAttributes();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getAttributes(), relvWords));
     for (Synset attribute : childNode.getAttributes()) {
       fathomAttributes(wordSense, attribute, relvGlossWords, depth - 1,
           maxDepth, depthScoreWeight);
     }

   }

   private void fathomAttributesExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setAttributes();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getAttributes(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset attribute : childNode.getAttributes()) {
       fathomAttributesExponential(wordSense, attribute, relvGlossWords,
           depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
     }

   }

   private void fathomPertainyms(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double depthScoreWeight) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setPertainyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(depthScoreWeight, maxDepth - depth + 1)
         * assessFeature(childNode.getPertainyms(), relvWords));
     for (Synset pertainym : childNode.getPertainyms()) {
       fathomPertainyms(wordSense, pertainym, relvGlossWords, depth - 1,
           maxDepth, depthScoreWeight);
     }

   }

   private void fathomPertainymsExponential(WordSense wordSense, Synset child,
       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
       double intersectionExponent, double depthScoreExponent) {
     if (depth == 0)
       return;

     String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
     ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);

     SynNode childNode = new SynNode(child, relvGlossWords);

     childNode.setPertainyms();
     wordSense.setScore(wordSense.getScore()
         + Math.pow(assessFeature(childNode.getPertainyms(), relvWords),
             intersectionExponent) / Math.pow(depth, depthScoreExponent));
     for (Synset pertainym : childNode.getPertainyms()) {
       fathomPertainymsExponential(wordSense, pertainym, relvGlossWords,
           depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
     }

   }

   /**
    * Checks if the feature should be counted in the score
    *
    * @param featureSynsets
    * @param relevantWords
    * @return count of features to consider
    */
   private int assessFeature(ArrayList<Synset> featureSynsets,
       ArrayList<WordPOS> relevantWords) {
     int count = 0;
     for (Synset synset : featureSynsets) {
       SynNode subNode = new SynNode(synset, relevantWords);

       String[] tokenizedSense = WSDHelper.getTokenizer().tokenize(
           subNode.getGloss());
       ArrayList<WordPOS> relvSenseWords = WSDHelper
           .getAllRelevantWords(tokenizedSense);

       for (WordPOS senseWord : relvSenseWords) {
         for (WordPOS sentenceWord : relevantWords) {
           if (sentenceWord.isStemEquivalent(senseWord)) {
             count = count + 1;
           }
         }
       }
     }
     return count;
   }

   /**
    * Checks if the synonyms should be counted in the score
    *
    * @param synonyms
    * @param relevantWords
    * @return count of synonyms to consider
    */
   private int assessSynonyms(ArrayList<WordPOS> synonyms,
       ArrayList<WordPOS> relevantWords) {
     int count = 0;

     for (WordPOS synonym : synonyms) {
       for (WordPOS sentenceWord : relevantWords) {
         if (sentenceWord.isStemEquivalent(synonym)) {
           count = count + 1;
         }
       }
     }
     return count;
   }

   @Override
   public String[] disambiguate(WSDSample sample) {
     // if not relevant POS tag
     if (!WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
       if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
         String s = WSDParameters.SenseSource.WSDHELPER.name() + " "
             + sample.getTargetTag();
         String[] sense = { s };
         return sense;
       } else {
         return null;
       }
     }

     ArrayList<WordSense> wsenses = null;

     switch (this.params.leskType) {
     case LESK_BASIC:
       wsenses = basic(sample);
       break;
     case LESK_BASIC_CTXT:
       wsenses = basicContextual(sample);
       break;
     case LESK_EXT:
       wsenses = extended(sample);
       break;
     case LESK_EXT_CTXT:
       wsenses = extendedContextual(sample);
       break;
     case LESK_EXT_EXP:
       wsenses = extendedExponential(sample);
       break;
     case LESK_EXT_EXP_CTXT:
       wsenses = extendedExponentialContextual(sample);
       break;
     default:
       wsenses = extendedExponentialContextual(sample);
       break;
     }

     Collections.sort(wsenses);

     String[] senses;
     if (wsenses.get(0).getScore() > 0) { // if at least one overlap
       List<Word> synsetWords;
       senses = new String[wsenses.size()];
       String senseKey = "?";
       for (int i = 0; i < wsenses.size(); i++) {
         synsetWords = wsenses.get(i).getNode().synset.getWords();
         for (Word synWord : synsetWords) {
           if (synWord.getLemma().equals(
               sample.getLemmas()[sample.getTargetPosition()])) {
             try {
               senseKey = synWord.getSenseKey();
             } catch (JWNLException e) {
               e.printStackTrace();
             }
             break;
           }
         }
         senses[i] = params.source.name() + " " + senseKey + " "
             + wsenses.get(i).getScore();

       }
     } else { // get the MFS if no overlaps
       senses = new String[1];
       senses[0] = MFS.getMostFrequentSense(sample) + " -1";
     }
     return senses;
   }

   @Override
   public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
       String[] lemmas, int ambiguousTokenIndex) {
     return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
         ambiguousTokenIndex));
   }

 }