OPENNLP-843 - removed the unnecessary files

commit: f40736d15df35a293bc7bb88ec43b0b20c9c5295 [log] [tgz]
author: Anthony Beylerian <beylerian@apache.org> Tue Jun 07 09:26:31 2016 +0000
committer: Anthony Beylerian <beylerian@apache.org> Tue Jun 07 09:26:31 2016 +0000
tree: 1c5a436ab7a91e583443b668ce868dd97959815a
parent: 0f08de2f24ab14c52160dfbabcbc7c76852013b2 [diff]
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
deleted file mode 100644
index 71b928e..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
+++ /dev/null

@@ -1,165 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-
-import opennlp.tools.disambiguator.WSDHelper;
-import opennlp.tools.disambiguator.WSDSample;
-import opennlp.tools.disambiguator.ims.WTDIMS;
-
-/**
- * The default Context Generator of IMS
- */
-// TODO remove this class later
-public class DefaultIMSContextGenerator implements IMSContextGenerator {
-
-  public DefaultIMSContextGenerator() {
-  }
-
-  private String[] extractPosOfSurroundingWords(int index, String[] tags,
-    int windowSize) {
-
-    String[] windowTags = new String[2 * windowSize + 1];
-
-    int j = 0;
-
-    for (int i = index - windowSize; i < index + windowSize; i++) {
-      if (i < 0 || i >= tags.length) {
-        windowTags[j] = "null";
-      } else {
-        windowTags[j] = tags[i].toLowerCase();
-      }
-      j++;
-    }
-
-    return windowTags;
-  }
-
-  public String[] extractSurroundingWords(int index, String[] toks,
-    String[] lemmas, int windowSize) {
-
-    // TODO consider the windowSize
-    ArrayList<String> contextWords = new ArrayList<String>();
-
-    for (int i = 0; i < toks.length; i++) {
-      if (lemmas != null) {
-        if (!WSDHelper.stopWords.contains(toks[i].toLowerCase()) && (index
-          != i)) {
-
-          String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
-            .trim();
-
-          if (lemma.length() > 1) {
-            contextWords.add(lemma);
-          }
-
-        }
-      }
-    }
-
-    return contextWords.toArray(new String[contextWords.size()]);
-  }
-
-  private String[] extractLocalCollocations(int index, String[] sentence,
-    int ngram) {
-    /**
-     * Here the author used only 11 features of this type. the range was set to
-     * 3 (bigrams extracted in a way that they are at max separated by 1 word).
-     */
-
-    ArrayList<String> localCollocations = new ArrayList<String>();
-
-    for (int i = index - ngram; i <= index + ngram; i++) {
-
-      if (!(i < 0 || i > sentence.length - 2)) {
-        if ((i != index) && (i + 1 != index) && (i + 1 < index + ngram)) {
-          String lc = sentence[i] + " " + sentence[i + 1];
-          localCollocations.add(lc);
-        }
-        if ((i != index) && (i + 2 != index) && (i + 2 < index + ngram)) {
-          String lc = sentence[i] + " " + sentence[i + 2];
-          localCollocations.add(lc);
-        }
-      }
-
-    }
-    String[] res = new String[localCollocations.size()];
-    res = localCollocations.toArray(new String[localCollocations.size()]);
-
-    return res;
-  }
-
-  /**
-   * Get Context of a word To disambiguate
-   *
-   * @return The IMS context of the word to disambiguate
-   */
-  @Override public String[] getContext(int index, String[] toks, String[] tags,
-    String[] lemmas, int ngram, int windowSize, ArrayList<String> model) {
-
-    String[] posOfSurroundingWords = extractPosOfSurroundingWords(index, toks,
-      windowSize);
-
-    HashSet<String> surroundingWords = new HashSet<>();
-    surroundingWords.addAll(
-      Arrays.asList(extractSurroundingWords(index, toks, lemmas, windowSize)));
-
-    String[] localCollocations = extractLocalCollocations(index, toks, ngram);
-
-    String[] serializedFeatures = new String[posOfSurroundingWords.length
-      + localCollocations.length + model.size()];
-
-    int i = 0;
-
-    for (String feature : posOfSurroundingWords) {
-      serializedFeatures[i] = "F" + i + "=" + feature;
-      i++;
-    }
-
-    for (String feature : localCollocations) {
-      serializedFeatures[i] = "F" + i + "=" + feature;
-      i++;
-    }
-    for (String word : model) {
-
-      if (surroundingWords.contains(word.toString())) {
-        serializedFeatures[i] = "F" + i + "=1";
-      } else {
-        serializedFeatures[i] = "F" + i + "=0";
-      }
-      i++;
-
-    }
-
-    return serializedFeatures;
-
-  }
-
-  public String[] getContext(WSDSample sample, int ngram, int windowSize,
-    ArrayList<String> model) {
-
-    return getContext(sample.getTargetPosition(), sample.getSentence(),
-      sample.getTags(), sample.getLemmas(), ngram, windowSize, model);
-  }
-
-}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSSequenceValidator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSSequenceValidator.java
deleted file mode 100644
index 535c30f..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSSequenceValidator.java
+++ /dev/null

@@ -1,54 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one or more

- * contributor license agreements.  See the NOTICE file distributed with

- * this work for additional information regarding copyright ownership.

- * The ASF licenses this file to You under the Apache License, Version 2.0

- * (the "License"); you may not use this file except in compliance with

- * the License. You may obtain a copy of the License at

- *

- *     http://www.apache.org/licenses/LICENSE-2.0

- *

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-package opennlp.tools.disambiguator.ims;

-

-import opennlp.tools.util.SequenceValidator;

-

-// TODO remove this class later

-public class DefaultIMSSequenceValidator implements SequenceValidator<String> {

-

-  private boolean validOutcome(String outcome, String prevOutcome) {

-    if (outcome.startsWith("I-")) {

-      if (prevOutcome == null) {

-        return (false);

-      } else {

-        if (prevOutcome.equals("O")) {

-          return (false);

-        }

-        if (!prevOutcome.substring(2).equals(outcome.substring(2))) {

-          return (false);

-        }

-      }

-    }

-    return true;

-  }

-

-  protected boolean validOutcome(String outcome, String[] sequence) {

-    String prevOutcome = null;

-    if (sequence.length > 0) {

-      prevOutcome = sequence[sequence.length - 1];

-    }

-    return validOutcome(outcome, prevOutcome);

-  }

-

-  public boolean validSequence(int i, String[] sequence, String[] s,

-    String outcome) {

-    return validOutcome(outcome, s);

-  }

-

-}
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java
deleted file mode 100644
index 37405ef..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java
+++ /dev/null

@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-import java.util.ArrayList;
-
-import opennlp.tools.disambiguator.WSDSample;
-
-/**
- * Interface for {@link IMSME} context generators.
- */
-// TODO remove this class later
-public interface IMSContextGenerator {
-
-  String[] getContext(int index, String[] toks, String[] tags, String[] lemmas,
-    int ngram, int windowSize, ArrayList<String> model);
-
-  String[] getContext(WSDSample sample, int ngram, int windowSize,
-    ArrayList<String> model);
-}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
deleted file mode 100644
index a7bd2f4..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
+++ /dev/null

@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-import opennlp.tools.util.BaseToolFactory;
-import opennlp.tools.util.InvalidFormatException;
-import opennlp.tools.util.SequenceValidator;
-import opennlp.tools.util.ext.ExtensionLoader;
-
-// TODO remove this class later
-public class IMSFactory extends BaseToolFactory {
-
-  /**
-   * Creates a {@link IMSFactory} that provides the default implementation of
-   * the resources.
-   */
-  public IMSFactory() {
-
-  }
-
-  public static IMSFactory create(String subclassName)
-    throws InvalidFormatException {
-    if (subclassName == null) {
-      // will create the default factory
-      return new IMSFactory();
-    }
-    try {
-      IMSFactory theFactory = ExtensionLoader
-        .instantiateExtension(IMSFactory.class, subclassName);
-      return theFactory;
-    } catch (Exception e) {
-      String msg = "Could not instantiate the " + subclassName
-        + ". The initialization throw an exception.";
-      System.err.println(msg);
-      e.printStackTrace();
-      throw new InvalidFormatException(msg, e);
-    }
-  }
-
-  @Override public void validateArtifactMap() throws InvalidFormatException {
-    // no additional artifacts
-  }
-
-  public IMSContextGenerator getContextGenerator() {
-    return new DefaultIMSContextGenerator();
-  }
-
-  public SequenceValidator<String> getSequenceValidator() {
-    return new DefaultIMSSequenceValidator();
-  }
-}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
deleted file mode 100644
index 1755b33..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
+++ /dev/null

@@ -1,228 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one or more

- * contributor license agreements.  See the NOTICE file distributed with

- * this work for additional information regarding copyright ownership.

- * The ASF licenses this file to You under the Apache License, Version 2.0

- * (the "License"); you may not use this file except in compliance with

- * the License. You may obtain a copy of the License at

- *

- *     http://www.apache.org/licenses/LICENSE-2.0

- *

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-package opennlp.tools.disambiguator.ims;

-

-import java.io.File;

-import java.io.IOException;

-import java.util.ArrayList;

-import java.util.HashMap;

-

-import opennlp.tools.disambiguator.WSDHelper;

-import opennlp.tools.disambiguator.WSDSample;

-import opennlp.tools.disambiguator.WSDisambiguator;

-import opennlp.tools.disambiguator.MFS;

-import opennlp.tools.ml.EventTrainer;

-import opennlp.tools.ml.TrainerFactory;

-import opennlp.tools.ml.model.MaxentModel;

-import opennlp.tools.ml.model.Event;

-import opennlp.tools.util.InvalidFormatException;

-import opennlp.tools.util.ObjectStream;

-import opennlp.tools.util.ObjectStreamUtils;

-import opennlp.tools.util.TrainingParameters;

-

-public class IMSME extends WSDisambiguator {

-

-  protected IMSModel imsModel;

-

-  protected static IMSContextGenerator cg = new DefaultIMSContextGenerator();

-

-  public IMSME(IMSParameters params) {

-    this.params = params;

-  }

-

-  public IMSME(IMSModel model, IMSParameters params) {

-    this.imsModel = model;

-    this.params = params;

-  }

-

-  public IMSModel getModel() {

-    return imsModel;

-  }

-

-  public void setModel(IMSModel model) {

-    this.imsModel = model;

-  }

-

-  public void setParameters(IMSParameters parameters) {

-    this.params = parameters;

-  }

-

-  public static IMSModel train(String lang, ObjectStream<WSDSample> samples,

-      TrainingParameters mlParams, IMSParameters imsParams,

-      IMSFactory imsfactory) throws IOException {

-

-    ArrayList<String> surroundingWordModel = buildSurroundingWords(samples,

-        imsParams.getWindowSize());

-

-    HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();

-

-    MaxentModel imsModel = null;

-

-    ArrayList<Event> events = new ArrayList<Event>();

-    ObjectStream<Event> es = null;

-

-    WSDSample sample = samples.read();

-    String wordTag = "";

-    if (sample != null) {

-      wordTag = sample.getTargetWordTag();

-      do {

-

-        String sense = sample.getSenseIDs()[0];

-

-        String[] context = cg.getContext(sample, imsParams.ngram,

-            imsParams.windowSize, surroundingWordModel);

-        Event ev = new Event(sense + "", context);

-

-        events.add(ev);

-

-      } while ((sample = samples.read()) != null);

-    }

-

-    es = ObjectStreamUtils.createObjectStream(events);

-

-    EventTrainer trainer = TrainerFactory

-        .getEventTrainer(mlParams.getSettings(), manifestInfoEntries);

-    imsModel = trainer.train(es);

-

-    return new IMSModel(lang, wordTag, imsParams.windowSize, imsParams.ngram,

-        imsModel, surroundingWordModel, manifestInfoEntries, imsfactory);

-  }

-

-  public static ArrayList<String> buildSurroundingWords(

-      ObjectStream<WSDSample> samples, int windowSize) throws IOException {

-    DefaultIMSContextGenerator imsCG = new DefaultIMSContextGenerator();

-    ArrayList<String> surroundingWordsModel = new ArrayList<String>();

-    WSDSample sample;

-    while ((sample = samples.read()) != null) {

-      String[] words = imsCG.extractSurroundingWords(sample.getTargetPosition(),

-          sample.getSentence(), sample.getLemmas(), windowSize);

-

-      if (words.length > 0) {

-        for (String word : words) {

-          surroundingWordsModel.add(word);

-        }

-      }

-    }

-    samples.reset();

-    return surroundingWordsModel;

-  }

-

-  @Override

-  public String disambiguate(WSDSample sample) {

-    if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {

-      String wordTag = sample.getTargetWordTag();

-

-      if (imsModel == null

-          || !imsModel.getWordTag().equals(sample.getTargetWordTag())) {

-

-        String trainingFile = ((IMSParameters) this.getParams())

-            .getTrainingDataDirectory() + sample.getTargetWordTag();

-

-        File file = new File(trainingFile + ".ims.model");

-        if (file.exists() && !file.isDirectory()) {

-          try {

-            setModel(new IMSModel(file));

-

-          } catch (InvalidFormatException e) {

-            // TODO Auto-generated catch block

-            e.printStackTrace();

-          } catch (IOException e) {

-            // TODO Auto-generated catch block

-            e.printStackTrace();

-          }

-

-          String outcome = "";

-

-          String[] context = cg.getContext(sample,

-              ((IMSParameters) this.params).ngram,

-              ((IMSParameters) this.params).windowSize,

-              imsModel.getSurroundingWords());

-

-          double[] outcomeProbs = imsModel.getIMSMaxentModel().eval(context);

-          outcome = imsModel.getIMSMaxentModel().getBestOutcome(outcomeProbs);

-

-          if (outcome != null && !outcome.equals("")) {

-

-            return this.getParams().getSenseSource().name() + " "

-                + wordTag.split("\\.")[0] + "%" + outcome;

-

-          } else {

-            MFS mfs = new MFS();

-            return mfs.disambiguate(wordTag);

-          }

-

-        } else {

-          MFS mfs = new MFS();

-          return mfs.disambiguate(wordTag);

-        }

-      } else {

-

-        String outcome = "";

-

-        String[] context = cg.getContext(sample,

-            ((IMSParameters) this.params).ngram,

-            ((IMSParameters) this.params).windowSize,

-            imsModel.getSurroundingWords());

-

-        double[] outcomeProbs = imsModel.getIMSMaxentModel().eval(context);

-        outcome = imsModel.getIMSMaxentModel().getBestOutcome(outcomeProbs);

-

-        if (outcome != null && !outcome.equals("")) {

-

-          return this.getParams().getSenseSource().name() + " "

-              + wordTag.split("\\.")[0] + "%" + outcome;

-

-        } else {

-

-          MFS mfs = new MFS();

-          return mfs.disambiguate(wordTag);

-        }

-      }

-    } else {

-

-      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {

-        return IMSParameters.SenseSource.WSDHELPER.name() + " "

-            + sample.getTargetTag();

-      } else {

-        return null;

-      }

-

-    }

-

-  }

-

-  /**

-   * The IMS disambiguation method for a single word

-   * 

-   * @param tokenizedContext

-   *          : the text containing the word to disambiguate

-   * @param tokenTags

-   *          : the tags corresponding to the context

-   * @param lemmas

-   *          : the lemmas of ALL the words in the context

-   * @param index

-   *          : the index of the word to disambiguate

-   * @return an array of the senses of the word to disambiguate

-   */

-  public String disambiguate(String[] tokenizedContext, String[] tokenTags,

-      String[] lemmas, int index) {

-    return disambiguate(

-        new WSDSample(tokenizedContext, tokenTags, lemmas, index));

-  }

-

-}


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSModel.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSModel.java
deleted file mode 100644
index 9bdfd45..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSModel.java
+++ /dev/null

@@ -1,178 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one or more

- * contributor license agreements.  See the NOTICE file distributed with

- * this work for additional information regarding copyright ownership.

- * The ASF licenses this file to You under the Apache License, Version 2.0

- * (the "License"); you may not use this file except in compliance with

- * the License. You may obtain a copy of the License at

- *

- *     http://www.apache.org/licenses/LICENSE-2.0

- *

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-package opennlp.tools.disambiguator.ims;

-

-import java.io.File;

-import java.io.IOException;

-import java.io.InputStream;

-import java.util.ArrayList;

-import java.util.Arrays;

-import java.util.Map;

-import java.util.Properties;

-import java.net.URL;

-

-import org.apache.commons.lang3.StringUtils;

-

-import opennlp.tools.cmdline.CmdLineUtil;

-import opennlp.tools.ml.model.AbstractModel;

-import opennlp.tools.ml.model.MaxentModel;

-import opennlp.tools.ml.model.SequenceClassificationModel;

-import opennlp.tools.util.BaseToolFactory;

-import opennlp.tools.util.InvalidFormatException;

-import opennlp.tools.util.model.BaseModel;

-

-// TODO remove this class later

-public class IMSModel extends BaseModel {

-

-  private static final String COMPONENT_NAME = "IMSME";

-  private static final String IMS_MODEL_ENTRY_NAME = "IMS.model";

-

-  private static final String WORDTAG = "wordtag";

-  private static final String WINSIZE = "winsize";

-  private static final String NGRAM = "ngram";

-  private static final String SURROUNDINGS = "surroundings";

-

-  private ArrayList<String> surroundingWords = new ArrayList<String>();

-  private String wordTag;

-

-  private int windowSize;

-  private int ngram;

-

-  public ArrayList<String> getSurroundingWords() {

-    return surroundingWords;

-  }

-

-  public int getWindowSize() {

-    return windowSize;

-  }

-

-  public void setWindowSize(int windowSize) {

-    this.windowSize = windowSize;

-  }

-

-  public int getNgram() {

-    return ngram;

-  }

-

-  public void setNgram(int ngram) {

-    this.ngram = ngram;

-  }

-

-  public void setSurroundingWords(ArrayList<String> surroundingWords) {

-    this.surroundingWords = surroundingWords;

-  }

-

-  public String getWordTag() {

-    return wordTag;

-  }

-

-  public void setWordTag(String wordTag) {

-    this.wordTag = wordTag;

-  }

-

-  public IMSModel(String languageCode, String wordTag, int windowSize,

-    int ngram, MaxentModel imsModel, ArrayList<String> surroundingWords,

-    Map<String, String> manifestInfoEntries, IMSFactory factory) {

-    super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);

-

-    artifactMap.put(IMS_MODEL_ENTRY_NAME, imsModel);

-    this.setManifestProperty(WORDTAG, wordTag);

-    this.setManifestProperty(WINSIZE, windowSize + "");

-    this.setManifestProperty(NGRAM, ngram + "");

-    this.setManifestProperty(SURROUNDINGS,

-      StringUtils.join(surroundingWords, ","));

-

-    this.surroundingWords = surroundingWords;

-    checkArtifactMap();

-  }

-

-  public IMSModel(String languageCode, String wordTag, int windowSize,

-    int ngram, MaxentModel imsModel, ArrayList<String> surroundingWords,

-    IMSFactory factory) {

-    this(languageCode, wordTag, windowSize, ngram, imsModel, surroundingWords,

-      null, factory);

-  }

-

-  public IMSModel(InputStream in) throws IOException, InvalidFormatException {

-    super(COMPONENT_NAME, in);

-    updateAttributes();

-  }

-

-  public IMSModel(File modelFile) throws IOException, InvalidFormatException {

-    super(COMPONENT_NAME, modelFile);

-    updateAttributes();

-    /*

-     * String modelPath = modelFile.getPath(); String surrPath =

-     * modelPath.substring(0, modelPath.length() - 6) + ".surr";

-     * 

-     * ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(

-     * new FileInputStream(surrPath))); try {

-     * this.setSurroundingWords((ArrayList<String>) ois.readObject()); } catch

-     * (ClassNotFoundException e) { // TODO Auto-generated catch block

-     * e.printStackTrace(); } finally { ois.close(); }

-     */

-  }

-

-  public IMSModel(URL modelURL) throws IOException, InvalidFormatException {

-    super(COMPONENT_NAME, modelURL);

-    updateAttributes();

-  }

-

-  // path must include the word.tag i.e. : write.v

-  public boolean writeModel(String path) {

-    File outFile = new File(path + ".ims.model");

-    CmdLineUtil.writeModel("ims model", outFile, this);

-    return true;

-  }

-

-  @Override protected void validateArtifactMap() throws InvalidFormatException {

-    super.validateArtifactMap();

-

-    if (!(artifactMap.get(IMS_MODEL_ENTRY_NAME) instanceof AbstractModel)) {

-      throw new InvalidFormatException("IMS model is incomplete!");

-    }

-  }

-

-  public MaxentModel getIMSMaxentModel() {

-    if (artifactMap.get(IMS_MODEL_ENTRY_NAME) instanceof MaxentModel) {

-      return (MaxentModel) artifactMap.get(IMS_MODEL_ENTRY_NAME);

-    } else {

-      return null;

-    }

-  }

-

-  public void updateAttributes() {

-    Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);

-    String surroundings = (String) manifest.get(SURROUNDINGS);

-

-    this.surroundingWords = new ArrayList(

-      Arrays.asList(surroundings.split(",")));

-    this.wordTag = (String) manifest.get(WORDTAG);

-    this.windowSize = Integer.parseInt((String) manifest.get(WINSIZE));

-    this.ngram = Integer.parseInt((String) manifest.get(NGRAM));

-  }

-

-  @Override protected Class<? extends BaseToolFactory> getDefaultFactory() {

-    return IMSFactory.class;

-  }

-

-  public IMSFactory getFactory() {

-    return (IMSFactory) this.toolFactory;

-  }

-

-}
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
deleted file mode 100644
index 6680335..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
+++ /dev/null

@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-import java.io.File;
-import java.security.InvalidParameterException;
-
-import opennlp.tools.disambiguator.WSDParameters;
-
-/**
- * This class contains the parameters for the IMS approach as well as the
- * directories containing the files used
- */
-// TODO remove this class later
-public class IMSParameters extends WSDParameters {
-
-  protected String languageCode;
-  protected int windowSize;
-  protected int ngram;
-
-  protected String trainingDataDirectory;
-
-  protected static final int DFLT_WIN_SIZE = 3;
-  protected static final int DFLT_NGRAM = 2;
-  protected static final String DFLT_LANG_CODE = "En";
-  protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
-
-  /**
-   * This constructor takes only two parameters. The default language used is
-   * <i>English</i>
-   *
-   * @param windowSize  the size of the window used for the extraction of the features
-   *                    qualified of Surrounding Words
-   * @param ngram       the number words used for the extraction of features qualified of
-   *                    Local Collocations
-   * @param senseSource the source of the training data
-   */
-  public IMSParameters(int windowSize, int ngram, SenseSource senseSource,
-    String trainingDataDirectory) {
-
-    this.languageCode = DFLT_LANG_CODE;
-    this.windowSize = windowSize;
-    this.ngram = ngram;
-    this.senseSource = senseSource;
-    this.trainingDataDirectory = trainingDataDirectory;
-
-    File folder = new File(trainingDataDirectory);
-    if (!folder.exists())
-      folder.mkdirs();
-  }
-
-  public IMSParameters(String trainingDataDirectory) {
-    this(DFLT_WIN_SIZE, DFLT_NGRAM, DFLT_SOURCE, trainingDataDirectory);
-  }
-
-  public String getLanguageCode() {
-    return languageCode;
-  }
-
-  public void setLanguageCode(String languageCode) {
-    this.languageCode = languageCode;
-  }
-
-  public int getWindowSize() {
-    return windowSize;
-  }
-
-  public void setWindowSize(int windowSize) {
-    this.windowSize = windowSize;
-  }
-
-  public int getNgram() {
-    return ngram;
-  }
-
-  public void setNgram(int ngram) {
-    this.ngram = ngram;
-  }
-
-  void init() {
-  }
-
-  /**
-   * Creates the context generator of IMS
-   */
-  public IMSContextGenerator createContextGenerator() {
-    return new DefaultIMSContextGenerator();
-  }
-
-  public String getTrainingDataDirectory() {
-    return trainingDataDirectory;
-  }
-
-  public void setTrainingDataDirectory(String trainingDataDirectory) {
-    this.trainingDataDirectory = trainingDataDirectory;
-  }
-
-  @Override public boolean isValid() {
-    // TODO recheck this pattern switch to maps
-    return true;
-  }
-
-}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
deleted file mode 100644
index 32bb5da..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
+++ /dev/null

@@ -1,200 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import net.sf.extjwnl.data.POS;
-import opennlp.tools.disambiguator.WSDHelper;
-import opennlp.tools.disambiguator.WSDSample;
-
-public class WTDIMS {
-
-  // Attributes related to the context
-  protected String[] sentence;
-  protected String[] posTags;
-  protected String[] lemmas;
-  protected int wordIndex;
-  protected int sense;
-  protected String[] senseIDs;
-
-  // Attributes related to IMS features
-  protected String[] posOfSurroundingWords;
-  protected String[] surroundingWords;
-  protected String[] localCollocations;
-  protected String[] features;
-
-  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
-      int wordIndex) {
-    this.sentence = sentence;
-    this.posTags = posTags;
-    this.wordIndex = wordIndex;
-    this.lemmas = lemmas;
-  }
-
-  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
-      int wordIndex, String[] senseIDs) {
-    this.sentence = sentence;
-    this.posTags = posTags;
-    this.wordIndex = wordIndex;
-    this.lemmas = lemmas;
-    this.senseIDs = senseIDs;
-
-  }
-
-  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
-      String word, String[] senseIDs) {
-    super();
-
-    this.sentence = sentence;
-    this.posTags = posTags;
-    this.lemmas = lemmas;
-
-    for (int i = 0; i < sentence.length; i++) {
-      if (word.equals(sentence[i])) {
-        this.wordIndex = i;
-        break;
-      }
-    }
-
-    this.senseIDs = senseIDs;
-
-  }
-
-  public WTDIMS(WSDSample sample) {
-    this.sentence = sample.getSentence();
-    this.posTags = sample.getTags();
-    this.lemmas = sample.getLemmas();
-    this.wordIndex = sample.getTargetPosition();
-    this.senseIDs = sample.getSenseIDs();
-
-  }
-
-  public String[] getSentence() {
-    return sentence;
-  }
-
-  public void setSentence(String[] sentence) {
-    this.sentence = sentence;
-  }
-
-  public String[] getPosTags() {
-    return posTags;
-  }
-
-  public void setPosTags(String[] posTags) {
-    this.posTags = posTags;
-  }
-
-  public int getWordIndex() {
-    return wordIndex;
-  }
-
-  public void setWordIndex(int wordIndex) {
-    this.wordIndex = wordIndex;
-  }
-
-  public String[] getLemmas() {
-    return lemmas;
-  }
-
-  public void setLemmas(String[] lemmas) {
-    this.lemmas = lemmas;
-  }
-
-  public int getSense() {
-    return sense;
-  }
-
-  public void setSense(int sense) {
-    this.sense = sense;
-  }
-
-  public String[] getSenseIDs() {
-    return senseIDs;
-  }
-
-  public void setSenseIDs(String[] senseIDs) {
-    this.senseIDs = senseIDs;
-  }
-
-  public String getWord() {
-    return this.getSentence()[this.getWordIndex()];
-  }
-
-  public String getWordTag() {
-
-    String wordBaseForm = this.getLemmas()[this.getWordIndex()];
-
-    String ref = "";
-
-    if ((WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]) != null)) {
-      if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.VERB)) {
-        ref = wordBaseForm + ".v";
-      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.NOUN)) {
-        ref = wordBaseForm + ".n";
-      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.ADJECTIVE)) {
-        ref = wordBaseForm + ".a";
-      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.ADVERB)) {
-        ref = wordBaseForm + ".r";
-      }
-    }
-
-    return ref;
-  }
-
-  public String[] getPosOfSurroundingWords() {
-    return posOfSurroundingWords;
-  }
-
-  public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
-    this.posOfSurroundingWords = posOfSurroundingWords;
-  }
-
-  public String[] getSurroundingWords() {
-    return surroundingWords;
-  }
-
-  public void setSurroundingWords(String[] surroundingWords) {
-    this.surroundingWords = surroundingWords;
-  }
-
-  public String[] getLocalCollocations() {
-    return localCollocations;
-  }
-
-  public void setLocalCollocations(String[] localCollocations) {
-    this.localCollocations = localCollocations;
-  }
-
-  public String[] getFeatures() {
-    return this.features;
-  }
-
-  public void setFeatures(String[] features) {
-    this.features = features;
-  }
-
-}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java
deleted file mode 100644
index f7247c0..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java
+++ /dev/null

@@ -1,112 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one

- * or more contributor license agreements.  See the NOTICE file

- * distributed with this work for additional information

- * regarding copyright ownership.  The ASF licenses this file

- * to you under the Apache License, Version 2.0 (the

- * "License"); you may not use this file except in compliance

- * with the License.  You may obtain a copy of the License at

- * 

- *   http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing,

- * software distributed under the License is distributed on an

- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

- * KIND, either express or implied.  See the License for the

- * specific language governing permissions and limitations

- * under the License.

- */

-

-package opennlp.tools.disambiguator.oscc;

-

-import java.util.ArrayList;

-import java.util.Arrays;

-import java.util.HashSet;

-

-import net.sf.extjwnl.data.Synset;

-import opennlp.tools.disambiguator.WSDHelper;

-import opennlp.tools.disambiguator.WSDSample;

-import opennlp.tools.disambiguator.WordPOS;

-

-/**

- * The default Context Generator of IMS

- */

-// TODO remove this class later

-public class DefaultOSCCContextGenerator implements OSCCContextGenerator {

-

-  public DefaultOSCCContextGenerator() {

-  }

-

-  public String[] extractSurroundingContextClusters(int index, String[] toks,

-    String[] tags, String[] lemmas, int windowSize) {

-

-    ArrayList<String> contextClusters = new ArrayList<String>();

-

-    for (int i = 0; i < toks.length; i++) {

-      if (lemmas != null) {

-

-        if (!WSDHelper.stopWords.contains(toks[i].toLowerCase()) && (index

-          != i)) {

-

-          String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")

-            .trim();

-

-          WordPOS word = new WordPOS(lemma, tags[i]);

-

-          if (lemma.length() > 1) {

-            try {

-              ArrayList<Synset> synsets = word.getSynsets();

-              if (synsets != null && synsets.size() > 0) {

-                for (Synset syn : synsets) {

-                  contextClusters.add(syn.getOffset() + "");

-                }

-              }

-            } catch (NullPointerException ex) {

-              // TODO tagger mistake add proper exception

-            }

-          }

-

-        }

-      }

-    }

-

-    return contextClusters.toArray(new String[contextClusters.size()]);

-

-  }

-

-  /**

-   * Get Context of a word To disambiguate

-   *

-   * @return The OSCC context of the word to disambiguate

-   */

-  @Override public String[] getContext(int index, String[] toks, String[] tags,

-    String[] lemmas, int windowSize, ArrayList<String> model) {

-

-    HashSet<String> surroundingContextClusters = new HashSet<>();

-    surroundingContextClusters.addAll(Arrays.asList(

-      extractSurroundingContextClusters(index, toks, tags, lemmas,

-        windowSize)));

-

-    String[] serializedFeatures = new String[model.size()];

-

-    int i = 0;

-    for (String word : model) {

-      if (surroundingContextClusters.contains(word.toString())) {

-        serializedFeatures[i] = "F" + i + "=1";

-      } else {

-        serializedFeatures[i] = "F" + i + "=0";

-      }

-      i++;

-    }

-

-    return serializedFeatures;

-  }

-

-  public String[] getContext(WSDSample sample, int windowSize,

-    ArrayList<String> model) {

-

-    return getContext(sample.getTargetPosition(), sample.getSentence(),

-      sample.getTags(), sample.getLemmas(), windowSize, model);

-  }

-

-}


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java
deleted file mode 100644
index fad17d5..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java
+++ /dev/null

@@ -1,37 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one

- * or more contributor license agreements.  See the NOTICE file

- * distributed with this work for additional information

- * regarding copyright ownership.  The ASF licenses this file

- * to you under the Apache License, Version 2.0 (the

- * "License"); you may not use this file except in compliance

- * with the License.  You may obtain a copy of the License at

- * 

- *   http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing,

- * software distributed under the License is distributed on an

- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

- * KIND, either express or implied.  See the License for the

- * specific language governing permissions and limitations

- * under the License.

- */

-

-package opennlp.tools.disambiguator.oscc;

-

-import java.util.ArrayList;

-

-import opennlp.tools.disambiguator.WSDSample;

-

-/**

- * Interface for {@link OSCCME} context generators.

- */

-// TODO remove this class later

-public interface OSCCContextGenerator {

-

-  String[] getContext(int index, String[] toks, String[] tags, String[] lemmas,

-    int windowSize, ArrayList<String> model);

-

-  String[] getContext(WSDSample sample, int windowSize,

-    ArrayList<String> model);

-}


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCFactory.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCFactory.java
deleted file mode 100644
index 0f6ce53..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCFactory.java
+++ /dev/null

@@ -1,62 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one or more

- * contributor license agreements.  See the NOTICE file distributed with

- * this work for additional information regarding copyright ownership.

- * The ASF licenses this file to You under the Apache License, Version 2.0

- * (the "License"); you may not use this file except in compliance with

- * the License. You may obtain a copy of the License at

- *

- *     http://www.apache.org/licenses/LICENSE-2.0

- *

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-package opennlp.tools.disambiguator.oscc;

-

-import opennlp.tools.util.BaseToolFactory;

-import opennlp.tools.util.InvalidFormatException;

-import opennlp.tools.util.ext.ExtensionLoader;

-

-// TODO remove this class later

-public class OSCCFactory extends BaseToolFactory {

-

-  /**

-   * Creates a {@link OSCCFactory} that provides the default implementation of

-   * the resources.

-   */

-  public OSCCFactory() {

-

-  }

-

-  public static OSCCFactory create(String subclassName)

-    throws InvalidFormatException {

-    if (subclassName == null) {

-      // will create the default factory

-      return new OSCCFactory();

-    }

-    try {

-      OSCCFactory theFactory = ExtensionLoader

-        .instantiateExtension(OSCCFactory.class, subclassName);

-      return theFactory;

-    } catch (Exception e) {

-      String msg = "Could not instantiate the " + subclassName

-        + ". The initialization throw an exception.";

-      System.err.println(msg);

-      e.printStackTrace();

-      throw new InvalidFormatException(msg, e);

-    }

-  }

-

-  @Override public void validateArtifactMap() throws InvalidFormatException {

-    // no additional artifacts

-  }

-

-  public OSCCContextGenerator getContextGenerator() {

-    return new DefaultOSCCContextGenerator();

-  }

-

-}


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
deleted file mode 100644
index f06f140..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
+++ /dev/null

@@ -1,245 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one or more

- * contributor license agreements.  See the NOTICE file distributed with

- * this work for additional information regarding copyright ownership.

- * The ASF licenses this file to You under the Apache License, Version 2.0

- * (the "License"); you may not use this file except in compliance with

- * the License. You may obtain a copy of the License at

- *

- *     http://www.apache.org/licenses/LICENSE-2.0

- *

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-package opennlp.tools.disambiguator.oscc;

-

-import java.io.File;

-import java.io.IOException;

-import java.util.ArrayList;

-import java.util.HashMap;

-

-import opennlp.tools.disambiguator.WSDHelper;

-import opennlp.tools.disambiguator.WSDSample;

-import opennlp.tools.disambiguator.WSDisambiguator;

-import opennlp.tools.disambiguator.MFS;

-import opennlp.tools.ml.EventTrainer;

-import opennlp.tools.ml.TrainerFactory;

-import opennlp.tools.ml.model.MaxentModel;

-import opennlp.tools.ml.model.Event;

-import opennlp.tools.util.InvalidFormatException;

-import opennlp.tools.util.ObjectStream;

-import opennlp.tools.util.ObjectStreamUtils;

-import opennlp.tools.util.TrainingParameters;

-

-/**

- * Maximum Entropy version of the <b>one sence per cluster</b> approach in

- * 

- * http://nlp.cs.rpi.edu/paper/wsd.pdf

- * 

- * The approach is a hybrid approach using unsupervised context clustering to

- * enhance disambiguation using a typical classifier.

- * 

- * The context clusters are considered a group of words representing an enriched

- * context of a target word.

- * 

- * The clusters can be formed by clustering techniques like K-means, or a

- * simpler version can use WordNet to get clusters simply from SynSets.

- * 

- * Please see {@link DefaultOSCCContextGenerator}

- * 

- * The approach finds the context clusters surrounding the target and uses a

- * classifier to judge on the best case.

- * 

- * Here an ME classifier is used.

- * 

- */

-public class OSCCME extends WSDisambiguator {

-

-  protected OSCCModel osccModel;

-

-  protected static OSCCContextGenerator cg = new DefaultOSCCContextGenerator();

-

-  public OSCCME(OSCCParameters params) {

-    this.params = params;

-  }

-

-  public OSCCME(OSCCModel model, OSCCParameters params) {

-    this.osccModel = model;

-    this.params = params;

-  }

-

-  public OSCCModel getModel() {

-    return osccModel;

-  }

-

-  public void setModel(OSCCModel model) {

-    this.osccModel = model;

-  }

-

-  public void setParameters(OSCCParameters parameters) {

-    this.params = parameters;

-  }

-

-  public static OSCCModel train(String lang, ObjectStream<WSDSample> samples,

-      TrainingParameters mlParams, OSCCParameters osccParams,

-      OSCCFactory osccFactory) throws IOException {

-

-    ArrayList<String> surroundingClusterModel = buildSurroundingClusters(

-        samples, osccParams.getWindowSize());

-

-    HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();

-

-    MaxentModel osccModel = null;

-

-    ArrayList<Event> events = new ArrayList<Event>();

-    ObjectStream<Event> es = null;

-

-    WSDSample sample = samples.read();

-    String wordTag = "";

-    if (sample != null) {

-      wordTag = sample.getTargetWordTag();

-      do {

-        String sense = sample.getSenseIDs()[0];

-        String[] context = cg.getContext(sample, osccParams.windowSize,

-            surroundingClusterModel);

-        Event ev = new Event(sense + "", context);

-        events.add(ev);

-      } while ((sample = samples.read()) != null);

-    }

-

-    es = ObjectStreamUtils.createObjectStream(events);

-    EventTrainer trainer = TrainerFactory

-        .getEventTrainer(mlParams.getSettings(), manifestInfoEntries);

-

-    osccModel = trainer.train(es);

-

-    return new OSCCModel(lang, wordTag, osccParams.windowSize, osccModel,

-        surroundingClusterModel, manifestInfoEntries, osccFactory);

-  }

-

-  public static ArrayList<String> buildSurroundingClusters(

-      ObjectStream<WSDSample> samples, int windowSize) throws IOException {

-    // TODO modify to clusters

-    DefaultOSCCContextGenerator osccCG = new DefaultOSCCContextGenerator();

-    ArrayList<String> surroundingWordsModel = new ArrayList<String>();

-    WSDSample sample;

-    while ((sample = samples.read()) != null) {

-      String[] words = osccCG.extractSurroundingContextClusters(

-          sample.getTargetPosition(), sample.getSentence(), sample.getTags(),

-          sample.getLemmas(), windowSize);

-

-      if (words.length > 0) {

-        for (String word : words) {

-          surroundingWordsModel.add(word);

-        }

-      }

-    }

-    samples.reset();

-    return surroundingWordsModel;

-  }

-

-  @Override

-  public String disambiguate(WSDSample sample) {

-    if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {

-      String wordTag = sample.getTargetWordTag();

-

-      if (osccModel == null

-          || !osccModel.getWordTag().equals(sample.getTargetWordTag())) {

-

-        String trainingFile = ((OSCCParameters) this.getParams())

-            .getTrainingDataDirectory() + sample.getTargetWordTag();

-

-        File file = new File(trainingFile + ".oscc.model");

-        if (file.exists() && !file.isDirectory()) {

-          try {

-            setModel(new OSCCModel(file));

-

-          } catch (InvalidFormatException e) {

-            // TODO Auto-generated catch block

-            e.printStackTrace();

-          } catch (IOException e) {

-            // TODO Auto-generated catch block

-            e.printStackTrace();

-          }

-

-          String outcome = "";

-

-          String[] context = cg.getContext(sample,

-              ((OSCCParameters) this.params).windowSize,

-              osccModel.getContextClusters());

-

-          double[] outcomeProbs = osccModel.getOSCCMaxentModel().eval(context);

-          outcome = osccModel.getOSCCMaxentModel().getBestOutcome(outcomeProbs);

-

-          if (outcome != null && !outcome.equals("")) {

-

-            return this.getParams().getSenseSource().name() + " "

-                + wordTag.split("\\.")[0] + "%" + outcome;

-

-          } else {

-            MFS mfs = new MFS();

-            return mfs.disambiguate(wordTag);

-          }

-

-        } else {

-

-          MFS mfs = new MFS();

-          return mfs.disambiguate(wordTag);

-        }

-      } else {

-        String outcome = "";

-

-        String[] context = cg.getContext(sample,

-            ((OSCCParameters) this.params).windowSize,

-            osccModel.getContextClusters());

-

-        double[] outcomeProbs = osccModel.getOSCCMaxentModel().eval(context);

-        outcome = osccModel.getOSCCMaxentModel().getBestOutcome(outcomeProbs);

-

-        if (outcome != null && !outcome.equals("")) {

-

-          return this.getParams().getSenseSource().name() + " "

-              + wordTag.split("\\.")[0] + "%" + outcome;

-        } else {

-

-          MFS mfs = new MFS();

-          return mfs.disambiguate(wordTag);

-        }

-      }

-    } else {

-

-      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {

-        return OSCCParameters.SenseSource.WSDHELPER.name() + " "

-            + sample.getTargetTag();

-      } else {

-        return null;

-      }

-

-    }

-

-  }

-

-  /**

-   * The IMS disambiguation method for a single word

-   * 

-   * @param tokenizedContext

-   *          : the text containing the word to disambiguate

-   * @param tokenTags

-   *          : the tags corresponding to the context

-   * @param lemmas

-   *          : the lemmas of ALL the words in the context

-   * @param index

-   *          : the index of the word to disambiguate

-   * @return an array of the senses of the word to disambiguate

-   */

-  public String disambiguate(String[] tokenizedContext, String[] tokenTags,

-      String[] lemmas, int index) {

-    return disambiguate(

-        new WSDSample(tokenizedContext, tokenTags, lemmas, index));

-  }

-

-}


diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
deleted file mode 100644
index 65495c2..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
+++ /dev/null

@@ -1,155 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one or more

- * contributor license agreements.  See the NOTICE file distributed with

- * this work for additional information regarding copyright ownership.

- * The ASF licenses this file to You under the Apache License, Version 2.0

- * (the "License"); you may not use this file except in compliance with

- * the License. You may obtain a copy of the License at

- *

- *     http://www.apache.org/licenses/LICENSE-2.0

- *

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-package opennlp.tools.disambiguator.oscc;

-

-import java.io.File;

-import java.io.IOException;

-import java.io.InputStream;

-import java.util.ArrayList;

-import java.util.Arrays;

-import java.util.Map;

-import java.util.Properties;

-import java.net.URL;

-

-import org.apache.commons.lang3.StringUtils;

-

-import opennlp.tools.cmdline.CmdLineUtil;

-import opennlp.tools.ml.model.AbstractModel;

-import opennlp.tools.ml.model.MaxentModel;

-import opennlp.tools.util.BaseToolFactory;

-import opennlp.tools.util.InvalidFormatException;

-import opennlp.tools.util.model.BaseModel;

-

-// TODO remove this class later

-public class OSCCModel extends BaseModel {

-

-  private static final String COMPONENT_NAME = "OSCCME";

-  private static final String OSCC_MODEL_ENTRY_NAME = "OSCC.model";

-

-  private static final String WORDTAG = "wordtag";

-  private static final String WINSIZE = "winsize";

-  private static final String CONTEXTCLUSTERS = "contextclusters";

-

-  private ArrayList<String> contextClusters = new ArrayList<String>();

-  private String wordTag;

-  private int windowSize;

-

-  public ArrayList<String> getContextClusters() {

-    return contextClusters;

-  }

-

-  public int getWindowSize() {

-    return windowSize;

-  }

-

-  public void setWindowSize(int windowSize) {

-    this.windowSize = windowSize;

-  }

-

-  public void setContextClusters(ArrayList<String> contextClusters) {

-    this.contextClusters = contextClusters;

-  }

-

-  public String getWordTag() {

-    return wordTag;

-  }

-

-  public void setWordTag(String wordTag) {

-    this.wordTag = wordTag;

-  }

-

-  public OSCCModel(String languageCode, String wordTag, int windowSize,

-    MaxentModel osccModel, ArrayList<String> contextClusters,

-    Map<String, String> manifestInfoEntries, OSCCFactory factory) {

-    super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);

-

-    artifactMap.put(OSCC_MODEL_ENTRY_NAME, osccModel);

-    this.setManifestProperty(WORDTAG, wordTag);

-    this.setManifestProperty(WINSIZE, windowSize + "");

-

-    this.setManifestProperty(CONTEXTCLUSTERS,

-      StringUtils.join(contextClusters, ","));

-

-    this.contextClusters = contextClusters;

-    checkArtifactMap();

-  }

-

-  public OSCCModel(String languageCode, String wordTag, int windowSize,

-    int ngram, MaxentModel osccModel, ArrayList<String> contextClusters,

-    OSCCFactory factory) {

-    this(languageCode, wordTag, windowSize, osccModel, contextClusters, null,

-      factory);

-  }

-

-  public OSCCModel(InputStream in) throws IOException, InvalidFormatException {

-    super(COMPONENT_NAME, in);

-    updateAttributes();

-  }

-

-  public OSCCModel(File modelFile) throws IOException, InvalidFormatException {

-    super(COMPONENT_NAME, modelFile);

-    updateAttributes();

-  }

-

-  public OSCCModel(URL modelURL) throws IOException, InvalidFormatException {

-    super(COMPONENT_NAME, modelURL);

-    updateAttributes();

-  }

-

-  // path must include the word.tag i.e. : write.v

-  public boolean writeModel(String path) {

-    File outFile = new File(path + ".oscc.model");

-    CmdLineUtil.writeModel("oscc model", outFile, this);

-    return true;

-  }

-

-  @Override protected void validateArtifactMap() throws InvalidFormatException {

-    super.validateArtifactMap();

-

-    if (!(artifactMap.get(OSCC_MODEL_ENTRY_NAME) instanceof AbstractModel)) {

-      throw new InvalidFormatException("OSCC model is incomplete!");

-    }

-  }

-

-  public MaxentModel getOSCCMaxentModel() {

-    if (artifactMap.get(OSCC_MODEL_ENTRY_NAME) instanceof MaxentModel) {

-      return (MaxentModel) artifactMap.get(OSCC_MODEL_ENTRY_NAME);

-    } else {

-      return null;

-    }

-  }

-

-  public void updateAttributes() {

-    Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);

-    String contextClusters = (String) manifest.get(CONTEXTCLUSTERS);

-

-    this.contextClusters = new ArrayList(

-      Arrays.asList(contextClusters.split(",")));

-    this.wordTag = (String) manifest.get(WORDTAG);

-    this.windowSize = Integer.parseInt((String) manifest.get(WINSIZE));

-  }

-

-  @Override protected Class<? extends BaseToolFactory> getDefaultFactory() {

-    return OSCCFactory.class;

-  }

-

-  public OSCCFactory getFactory() {

-    return (OSCCFactory) this.toolFactory;

-  }

-

-}
\ No newline at end of file

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
deleted file mode 100644
index 3f0eb2c..0000000
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
+++ /dev/null

@@ -1,111 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one

- * or more contributor license agreements.  See the NOTICE file

- * distributed with this work for additional information

- * regarding copyright ownership.  The ASF licenses this file

- * to you under the Apache License, Version 2.0 (the

- * "License"); you may not use this file except in compliance

- * with the License.  You may obtain a copy of the License at

- * 

- *   http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing,

- * software distributed under the License is distributed on an

- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

- * KIND, either express or implied.  See the License for the

- * specific language governing permissions and limitations

- * under the License.

- */

-

-package opennlp.tools.disambiguator.oscc;

-

-import java.io.File;

-

-import opennlp.tools.disambiguator.WSDParameters;

-

-/**

- * This class contains the parameters for the OSCC approach as well as the

- * directories containing the files used

- */

-// TODO remove this class later

-public class OSCCParameters extends WSDParameters {

-

-  protected String languageCode;

-  protected int windowSize;

-  protected String trainingDataDirectory;

-

-  protected static final int DFLT_WIN_SIZE = 3;

-  protected static final String DFLT_LANG_CODE = "En";

-  protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;

-

-  /**

-   * This constructor takes only two parameters. The default language used is

-   * <i>English</i>

-   *

-   * @param windowSize  the size of the window used for the extraction of the features

-   *                    qualified of Surrounding Context Clusters

-   * @param senseSource the source of the training data

-   */

-  public OSCCParameters(int windowSize, SenseSource senseSource,

-    String trainingDataDirectory) {

-    this.languageCode = DFLT_LANG_CODE;

-    this.windowSize = windowSize;

-    this.senseSource = senseSource;

-    this.trainingDataDirectory = trainingDataDirectory;

-

-    File folder = new File(trainingDataDirectory);

-    if (!folder.exists())

-      folder.mkdirs();

-  }

-

-  public OSCCParameters(String trainingDataDirectory) {

-    this(DFLT_WIN_SIZE, DFLT_SOURCE, trainingDataDirectory);

-

-    File folder = new File(trainingDataDirectory);

-    if (!folder.exists())

-      folder.mkdirs();

-  }

-

-  public OSCCParameters() {

-    this(DFLT_WIN_SIZE, DFLT_SOURCE, null);

-  }

-

-  public OSCCParameters(int windowSize) {

-    this(windowSize, DFLT_SOURCE, null);

-  }

-

-  public String getLanguageCode() {

-    return languageCode;

-  }

-

-  public void setLanguageCode(String languageCode) {

-    this.languageCode = languageCode;

-  }

-

-  public int getWindowSize() {

-    return windowSize;

-  }

-

-  public void setWindowSize(int windowSize) {

-    this.windowSize = windowSize;

-  }

-

-  public OSCCContextGenerator createContextGenerator() {

-

-    return new DefaultOSCCContextGenerator();

-  }

-

-  public String getTrainingDataDirectory() {

-    return trainingDataDirectory;

-  }

-

-  public void setTrainingDataDirectory(String trainingDataDirectory) {

-    this.trainingDataDirectory = trainingDataDirectory;

-  }

-

-  @Override public boolean isValid() {

-    // TODO make validity check

-    return true;

-  }

-

-}


diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
deleted file mode 100644
index c5e63cf..0000000
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
+++ /dev/null

@@ -1,75 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one

- * or more contributor license agreements.  See the NOTICE file

- * distributed with this work for additional information

- * regarding copyright ownership.  The ASF licenses this file

- * to you under the Apache License, Version 2.0 (the

- * "License"); you may not use this file except in compliance

- * with the License.  You may obtain a copy of the License at

- * 

- *   http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing,

- * software distributed under the License is distributed on an

- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

- * KIND, either express or implied.  See the License for the

- * specific language governing permissions and limitations

- * under the License.

- */

-

-package opennlp.tools.disambiguator;

-

-import java.util.ArrayList;

-

-import opennlp.tools.disambiguator.datareader.SensevalReader;

-import opennlp.tools.disambiguator.ims.IMSME;

-import opennlp.tools.disambiguator.ims.IMSParameters;

-

-import org.junit.Test;

-

-public class IMSEvaluatorTest {

-

-  static SensevalReader seReader = new SensevalReader();

-

-  @Test

-  public static void main(String[] args) {

-    

-    

-    WSDHelper.print("Evaluation Started");

-    

-    // TODO write unit test

-    String modelsDir = "src\\test\\resources\\models\\";

-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

-    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

-

-    IMSParameters imsParams = new IMSParameters("");

-    IMSME ims = new IMSME(imsParams);

-

-    ArrayList<String> words = seReader.getSensevalWords();

-

-    for (String word : words) {

-      WSDEvaluator evaluator = new WSDEvaluator(ims);

-

-      // don't take verbs because they are not from WordNet

-      if (!word.split("\\.")[1].equals("v")) {

-

-        ArrayList<WSDSample> instances = seReader.getSensevalData(word);

-        if (instances != null) {

-          WSDHelper.print("------------------" + word + "------------------");

-          for (WSDSample instance : instances) {

-            if (instance.getSenseIDs() != null

-                && !instance.getSenseIDs()[0].equals("null")) {

-              evaluator.evaluateSample(instance);

-            }

-          }

-          WSDHelper.print(evaluator.toString());

-        } else {

-          WSDHelper.print("null instances");

-        }

-      }

-

-    }

-

-  }

-}


diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
deleted file mode 100644
index 881de6a..0000000
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
+++ /dev/null

@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import static org.junit.Assert.*;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
-import opennlp.tools.disambiguator.ims.IMSFactory;
-import opennlp.tools.disambiguator.ims.IMSME;
-import opennlp.tools.disambiguator.ims.IMSModel;
-import opennlp.tools.disambiguator.ims.IMSParameters;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.Span;
-import opennlp.tools.util.TrainingParameters;
-
-/**
- * This is the test class for {@link IMSME}.
- * 
- * The scope of this test is to make sure that the IMS disambiguator code can be
- * executed. This test can not detect mistakes which lead to incorrect feature
- * generation or other mistakes which decrease the disambiguation performance of
- * the disambiguator.
- * 
- * In this test the {@link IMSME} is trained with Semcor and then the computed
- * model is used to predict sentences from the training sentences.
- */
-public class IMSMETester {
-  // TODO write more tests
-  // TODO modify when we fix the parameter model
-
-  static String modelsDir = "src\\test\\resources\\models\\";
-  static String trainingDataDirectory = "src\\test\\resources\\supervised\\models\\";
-
-  static IMSParameters IMSParams;
-  static IMSME ims;
-  static IMSFactory IMSFactory;
-  static IMSModel model;
-
-  static String test = "please.v";
-  static File outFile;
-
-  static String test1 = "We need to discuss an important topic, please write to me soon.";
-  static String test2 = "The component was highly radioactive to the point that"
-      + " it has been activated the second it touched water";
-  static String test3 = "The summer is almost over and I did not go to the beach even once";
-
-  static String[] sentence1;
-  static String[] sentence2;
-  static String[] sentence3;
-
-  static String[] tags1;
-  static String[] tags2;
-  static String[] tags3;
-
-  static String[] lemmas1;
-  static String[] lemmas2;
-  static String[] lemmas3;
-
-  /*
-   * Setup the testing variables
-   */
-  @BeforeClass
-  public static void setUpAndTraining() {
-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
-
-    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
-    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
-    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-
-    tags1 = WSDHelper.getTagger().tag(sentence1);
-    tags2 = WSDHelper.getTagger().tag(sentence2);
-    tags3 = WSDHelper.getTagger().tag(sentence3);
-
-    List<String> tempLemmas1 = new ArrayList<String>();
-    for (int i = 0; i < sentence1.length; i++) {
-      tempLemmas1
-          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
-    }
-    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
-
-    List<String> tempLemmas2 = new ArrayList<String>();
-    for (int i = 0; i < sentence2.length; i++) {
-      tempLemmas2
-          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
-    }
-    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
-
-    List<String> tempLemmas3 = new ArrayList<String>();
-    for (int i = 0; i < sentence3.length; i++) {
-      tempLemmas3
-          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
-    }
-    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
-
-    IMSParams = new IMSParameters("");
-    IMSParams.setTrainingDataDirectory(trainingDataDirectory);
-    IMSFactory = new IMSFactory();
-    TrainingParameters trainingParams = new TrainingParameters();
-    SemcorReaderExtended sr = new SemcorReaderExtended();
-    ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(test);
-
-    IMSModel writeModel = null;
-    /*
-     * Tests training the disambiguator We test both writing and reading a model
-     * file trained by semcor
-     */
-
-    try {
-      writeModel = IMSME.train("en", sampleStream, trainingParams, IMSParams,
-          IMSFactory);
-      assertNotNull("Checking the model to be written", writeModel);
-      writeModel.writeModel(IMSParams.getTrainingDataDirectory() + test);
-      outFile = new File(
-          IMSParams.getTrainingDataDirectory() + test + ".ims.model");
-      model = new IMSModel(outFile);
-      assertNotNull("Checking the read model", model);
-      ims = new IMSME(model, IMSParams);
-      assertNotNull("Checking the disambiguator", ims);
-    } catch (IOException e1) {
-      e1.printStackTrace();
-      fail("Exception in training");
-    }
-  }
-
-  /*
-   * Tests disambiguating only one word : The ambiguous word "please"
-   */
-  @Test
-  public void testOneWordDisambiguation() {
-    String sense = ims.disambiguate(sentence1, tags1, lemmas1, 8);
-    assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
-  }
-
-  /*
-   * Tests disambiguating a word Span In this case we test a mix of monosemous
-   * and polysemous words as well as words that do not need disambiguation such
-   * as determiners
-   */
-  @Test
-  public void testWordSpanDisambiguation() {
-    Span span = new Span(3, 7);
-    List<String> senses = ims.disambiguate(sentence2, tags2, lemmas2, span);
-
-    assertEquals("Check number of returned words", 5, senses.size());
-    assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
-        senses.get(0));
-    assertEquals("Check 'radioactive' sense ID",
-        "WORDNET radioactive%3:00:00::", senses.get(1));
-    assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
-    assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
-  }
-
-  /*
-   * Tests disambiguating all the words
-   */
-  @Test
-  public void testAllWordsDisambiguation() {
-    List<String> senses = ims.disambiguate(sentence3, tags3, lemmas3);
-
-    assertEquals("Check number of returned words", 15, senses.size());
-    assertEquals("Check preposition", "WSDHELPER personal pronoun",
-        senses.get(6));
-  }
-
-}

diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
deleted file mode 100644
index f46a58b..0000000
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
+++ /dev/null

@@ -1,92 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one

- * or more contributor license agreements.  See the NOTICE file

- * distributed with this work for additional information

- * regarding copyright ownership.  The ASF licenses this file

- * to you under the Apache License, Version 2.0 (the

- * "License"); you may not use this file except in compliance

- * with the License.  You may obtain a copy of the License at

- * 

- *   http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing,

- * software distributed under the License is distributed on an

- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

- * KIND, either express or implied.  See the License for the

- * specific language governing permissions and limitations

- * under the License.

- */

-

-package opennlp.tools.disambiguator;

-

-import java.io.IOException;

-import java.util.ArrayList;

-

-import opennlp.tools.disambiguator.datareader.SensevalReader;

-import opennlp.tools.disambiguator.oscc.OSCCFactory;

-import opennlp.tools.disambiguator.oscc.OSCCME;

-import opennlp.tools.disambiguator.oscc.OSCCModel;

-import opennlp.tools.disambiguator.oscc.OSCCParameters;

-import opennlp.tools.util.TrainingParameters;

-

-import org.junit.Test;

-

-public class OSCCEvaluatorTest {

-

-  static SensevalReader seReader = new SensevalReader();

-

-  @Test

-  public static void main(String[] args) {

-    

-    

-    WSDHelper.print("Evaluation Started");

-    

-    // TODO write unit test

-    String modelsDir = "src\\test\\resources\\models\\";

-    String trainingDataDirectory = "src\\test\\resources\\supervised\\models\\";

-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

-    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

-

-    OSCCParameters OSCCParams = new OSCCParameters("");

-    OSCCParams.setTrainingDataDirectory(trainingDataDirectory);

-    OSCCME oscc = new OSCCME(OSCCParams);

-    OSCCModel model = null;

-    ArrayList<String> words = seReader.getSensevalWords();

-

-    for (String word : words) {

-      // don't take verbs because they are not from WordNet

-      if (!word.split("\\.")[1].equals("v")) {

-      try {

-        model = OSCCME.train("en", seReader.getSensevalDataStream(word), new TrainingParameters(), OSCCParams,

-            new OSCCFactory());

-        model.writeModel(OSCCParams.getTrainingDataDirectory() + word);

-        oscc = new OSCCME(model, OSCCParams);

-        

-      } catch (IOException e) {

-        e.printStackTrace();

-        WSDHelper.print("skipped sample");

-      }

-      

-      WSDEvaluator evaluator = new WSDEvaluator(oscc);

-        ArrayList<WSDSample> instances = seReader.getSensevalData(word);

-        if (instances != null) {

-          WSDHelper.print("------------------" + word + "------------------");

-          for (WSDSample instance : instances) {

-            if (instance.getSenseIDs() != null

-                && !instance.getSenseIDs()[0].equals("null")) {

-              evaluator.evaluateSample(instance);

-            }else{

-              WSDHelper.print("skipped sample");

-            }

-          }

-          WSDHelper.print(evaluator.toString());

-        } else {

-          WSDHelper.print("null instances");

-        }

-      }

-

-    }

-

-  }

-}


diff --git a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java b/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
deleted file mode 100644
index 63fb07d..0000000
--- a/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
+++ /dev/null

@@ -1,195 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one

- * or more contributor license agreements.  See the NOTICE file

- * distributed with this work for additional information

- * regarding copyright ownership.  The ASF licenses this file

- * to you under the Apache License, Version 2.0 (the

- * "License"); you may not use this file except in compliance

- * with the License.  You may obtain a copy of the License at

- * 

- *   http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing,

- * software distributed under the License is distributed on an

- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

- * KIND, either express or implied.  See the License for the

- * specific language governing permissions and limitations

- * under the License.

- */

-

-package opennlp.tools.disambiguator;

-

-import static org.junit.Assert.assertEquals;

-import static org.junit.Assert.assertNotNull;

-import static org.junit.Assert.fail;

-

-import java.io.File;

-import java.io.IOException;

-import java.util.ArrayList;

-import java.util.List;

-

-import org.junit.BeforeClass;

-import org.junit.Test;

-

-import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;

-import opennlp.tools.disambiguator.oscc.OSCCFactory;

-import opennlp.tools.disambiguator.oscc.OSCCME;

-import opennlp.tools.disambiguator.oscc.OSCCModel;

-import opennlp.tools.disambiguator.oscc.OSCCParameters;

-import opennlp.tools.util.ObjectStream;

-import opennlp.tools.util.Span;

-import opennlp.tools.util.TrainingParameters;

-

-/**

- * This is the test class for {@link OSCCME}.

- * 

- * The scope of this test is to make sure that the OSCC disambiguator code can

- * be executed. This test can not detect mistakes which lead to incorrect

- * feature generation or other mistakes which decrease the disambiguation

- * performance of the disambiguator.

- * 

- * In this test the {@link OSCCME} is trained with Semcor and then the computed

- * model is used to predict sentences from the training sentences.

- */

-public class OSCCMETester {

-  // TODO write more tests

-  // TODO modify when we fix the parameter model

-

-  static String modelsDir = "src\\test\\resources\\models\\";

-  static String trainingDataDirectory = "src\\test\\resources\\supervised\\models\\";

-

-  static OSCCParameters OSCCParams;

-  static OSCCME oscc;

-  static OSCCFactory osccFactory;

-  static OSCCModel model;

-

-  static String test = "please.v";

-  static File outFile;

-

-  static String test1 = "We need to discuss an important topic, please write to me soon.";

-  static String test2 = "The component was highly radioactive to the point that"

-      + " it has been activated the second it touched water";

-  static String test3 = "The summer is almost over and I did not go to the beach even once";

-

-  static String[] sentence1;

-  static String[] sentence2;

-  static String[] sentence3;

-

-  static String[] tags1;

-  static String[] tags2;

-  static String[] tags3;

-

-  static String[] lemmas1;

-  static String[] lemmas2;

-  static String[] lemmas3;

-

-  /*

-   * Setup the testing variables

-   */

-  @BeforeClass

-  public static void setUpAndTraining() {

-    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");

-    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");

-    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

-

-    sentence1 = WSDHelper.getTokenizer().tokenize(test1);

-    sentence2 = WSDHelper.getTokenizer().tokenize(test2);

-    sentence3 = WSDHelper.getTokenizer().tokenize(test3);

-

-    tags1 = WSDHelper.getTagger().tag(sentence1);

-    tags2 = WSDHelper.getTagger().tag(sentence2);

-    tags3 = WSDHelper.getTagger().tag(sentence3);

-

-    List<String> tempLemmas1 = new ArrayList<String>();

-    for (int i = 0; i < sentence1.length; i++) {

-      tempLemmas1

-          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));

-    }

-    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);

-

-    List<String> tempLemmas2 = new ArrayList<String>();

-    for (int i = 0; i < sentence2.length; i++) {

-      tempLemmas2

-          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));

-    }

-    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);

-

-    List<String> tempLemmas3 = new ArrayList<String>();

-    for (int i = 0; i < sentence3.length; i++) {

-      tempLemmas3

-          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));

-    }

-    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);

-

-    OSCCParams = new OSCCParameters("");

-    OSCCParams.setTrainingDataDirectory(trainingDataDirectory);

-    osccFactory = new OSCCFactory();

-    TrainingParameters trainingParams = new TrainingParameters();

-    SemcorReaderExtended sr = new SemcorReaderExtended();

-    ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(test);

-

-    OSCCModel writeModel = null;

-    /*

-     * Tests training the disambiguator We test both writing and reading a model

-     * file trained by semcor

-     */

-

-    try {

-      writeModel = OSCCME.train("en", sampleStream, trainingParams, OSCCParams,

-          osccFactory);

-      assertNotNull("Checking the model to be written", writeModel);

-      writeModel.writeModel(OSCCParams.getTrainingDataDirectory() + test);

-      outFile = new File(

-          OSCCParams.getTrainingDataDirectory() + test + ".oscc.model");

-      model = new OSCCModel(outFile);

-      assertNotNull("Checking the read model", model);

-      oscc = new OSCCME(model, OSCCParams);

-      assertNotNull("Checking the disambiguator", oscc);

-    } catch (IOException e1) {

-      e1.printStackTrace();

-      fail("Exception in training");

-    }

-  }

-

-  /*

-   * Tests disambiguating only one word : The ambiguous word "please"

-   */

-  @Test

-  public void testOneWordDisambiguation() {

-    String sense = oscc.disambiguate(sentence1, tags1, lemmas1, 8);

-    assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);

-  }

-

-  /*

-   * Tests disambiguating a word Span In this case we test a mix of monosemous

-   * and polysemous words as well as words that do not need disambiguation such

-   * as determiners

-   */

-  @Test

-  public void testWordSpanDisambiguation() {

-    Span span = new Span(3, 7);

-    List<String> senses = oscc.disambiguate(sentence2, tags2, lemmas2, span);

-

-

-    assertEquals("Check number of returned words", 5, senses.size());

-    assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",

-        senses.get(0));

-    assertEquals("Check 'radioactive' sense ID",

-        "WORDNET radioactive%3:00:00::", senses.get(1));

-    assertEquals("Check preposition", "WSDHELPER to", senses.get(2));

-    assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));

-  }

-

-  /*

-   * Tests disambiguating all the words

-   */

-  @Test

-  public void testAllWordsDisambiguation() {

-    List<String> senses = oscc.disambiguate(sentence3, tags3, lemmas3);

-

-    assertEquals("Check number of returned words", 15, senses.size());

-    assertEquals("Check preposition", "WSDHELPER personal pronoun",

-        senses.get(6));

-  }

-

-}
\ No newline at end of file
commit	f40736d15df35a293bc7bb88ec43b0b20c9c5295	[log] [tgz]
author	Anthony Beylerian <beylerian@apache.org>	Tue Jun 07 09:26:31 2016 +0000
committer	Anthony Beylerian <beylerian@apache.org>	Tue Jun 07 09:26:31 2016 +0000
tree	1c5a436ab7a91e583443b668ce868dd97959815a
parent	0f08de2f24ab14c52160dfbabcbc7c76852013b2 [diff]