OPENNLP-791  Reads the mentioned clustering files, could also switch to objectstream. Thanks to Anthony Beylerian for providing a patch.

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
new file mode 100644
index 0000000..afc5084
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
@@ -0,0 +1,70 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one

+ * or more contributor license agreements.  See the NOTICE file

+ * distributed with this work for additional information

+ * regarding copyright ownership.  The ASF licenses this file

+ * to you under the Apache License, Version 2.0 (the

+ * "License"); you may not use this file except in compliance

+ * with the License.  You may obtain a copy of the License at

+ * 

+ *   http://www.apache.org/licenses/LICENSE-2.0

+ * 

+ * Unless required by applicable law or agreed to in writing,

+ * software distributed under the License is distributed on an

+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+ * KIND, either express or implied.  See the License for the

+ * specific language governing permissions and limitations

+ * under the License.

+ */

+

+package opennlp.tools.disambiguator.contextclustering;

+

+import java.security.InvalidParameterException;

+

+import opennlp.tools.disambiguator.WSDParameters;

+import opennlp.tools.disambiguator.WSDisambiguator;

+import opennlp.tools.util.Span;

+

+/**

+ * Implementation of the <b>Context Clustering</b> approach. This approach

+ * returns uses n-gram based clusters.

+ * 

+ * This implementation is based on {@link http://nlp.cs.rpi.edu/paper/wsd.pdf}

+ */

+public class ContextClusterer implements WSDisambiguator {

+

+  protected ContextClustererParameters params;

+

+  @Override

+  public WSDParameters getParams() {

+    return params;

+  }

+

+  @Override

+  public void setParams(WSDParameters params) throws InvalidParameterException {

+    if (params == null) {

+      this.params = new ContextClustererParameters();

+    } else {

+      if (params.isValid()) {

+        this.params = (ContextClustererParameters) params;

+      } else {

+        throw new InvalidParameterException("wrong params");

+      }

+    }

+  }

+

+  @Override

+  public String[] disambiguate(String[] tokenizedContext,

+      int ambiguousTokenIndex) {

+    // TODO Auto-generated method stub

+    return null;

+  }

+

+  @Override

+  public String[][] disambiguate(String[] tokenizedContext,

+      Span[] ambiguousTokenIndexSpans) {

+    // TODO Auto-generated method stub

+    return null;

+  }

+

+}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java
new file mode 100644
index 0000000..bb69fd7
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClustererParameters.java
@@ -0,0 +1,41 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one

+ * or more contributor license agreements.  See the NOTICE file

+ * distributed with this work for additional information

+ * regarding copyright ownership.  The ASF licenses this file

+ * to you under the Apache License, Version 2.0 (the

+ * "License"); you may not use this file except in compliance

+ * with the License.  You may obtain a copy of the License at

+ * 

+ *   http://www.apache.org/licenses/LICENSE-2.0

+ * 

+ * Unless required by applicable law or agreed to in writing,

+ * software distributed under the License is distributed on an

+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+ * KIND, either express or implied.  See the License for the

+ * specific language governing permissions and limitations

+ * under the License.

+ */

+

+package opennlp.tools.disambiguator.contextclustering;

+

+import opennlp.tools.disambiguator.WSDParameters;

+

+public class ContextClustererParameters extends WSDParameters {

+

+  protected int ngram;

+

+  public int getNgram() {

+    return ngram;

+  }

+

+  public void setNgram(int ngram) {

+    this.ngram = ngram;

+  }

+

+  @Override

+  public boolean isValid() {

+    return ngram > 0;

+  }

+

+}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
new file mode 100644
index 0000000..2b3fbf7
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
@@ -0,0 +1,37 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one

+ * or more contributor license agreements.  See the NOTICE file

+ * distributed with this work for additional information

+ * regarding copyright ownership.  The ASF licenses this file

+ * to you under the Apache License, Version 2.0 (the

+ * "License"); you may not use this file except in compliance

+ * with the License.  You may obtain a copy of the License at

+ * 

+ *   http://www.apache.org/licenses/LICENSE-2.0

+ * 

+ * Unless required by applicable law or agreed to in writing,

+ * software distributed under the License is distributed on an

+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+ * KIND, either express or implied.  See the License for the

+ * specific language governing permissions and limitations

+ * under the License.

+ */

+

+package opennlp.tools.disambiguator.datareader;

+

+public class ClusterMembership {

+

+  public int clusterID;

+  public double centroidSimilarity;

+  public String phrase;

+  public String[] phraseWords;

+

+  public ClusterMembership(int clusterID, double centroidSimilarity) {

+    this.clusterID = clusterID;

+    this.centroidSimilarity = centroidSimilarity;

+  }

+

+  public ClusterMembership() {

+    this(0, 0.0);

+  }

+}

diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
new file mode 100644
index 0000000..e8b384e
--- /dev/null
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
@@ -0,0 +1,84 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one

+ * or more contributor license agreements.  See the NOTICE file

+ * distributed with this work for additional information

+ * regarding copyright ownership.  The ASF licenses this file

+ * to you under the Apache License, Version 2.0 (the

+ * "License"); you may not use this file except in compliance

+ * with the License.  You may obtain a copy of the License at

+ * 

+ *   http://www.apache.org/licenses/LICENSE-2.0

+ * 

+ * Unless required by applicable law or agreed to in writing,

+ * software distributed under the License is distributed on an

+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

+ * KIND, either express or implied.  See the License for the

+ * specific language governing permissions and limitations

+ * under the License.

+ */

+

+package opennlp.tools.disambiguator.datareader;

+

+import java.io.BufferedReader;

+import java.io.File;

+import java.io.FileReader;

+import java.io.IOException;

+import java.util.ArrayList;

+import java.util.HashMap;

+

+public class ClustersReader {

+

+  public static String path = "src\\test\\resources\\phraseclusters\\";

+  private static HashMap<String, ArrayList<ClusterMembership>> map = new HashMap<String, ArrayList<ClusterMembership>>();

+

+  public void readFile(String url) {

+

+    File file = new File(url);

+

+    try (BufferedReader clusterList = new BufferedReader(new FileReader(file))) {

+

+      String line;

+

+      // Read the file

+      while ((line = clusterList.readLine()) != null) {

+

+        String[] parts = line.split("\\t");

+        String phraseKey = parts[0];

+        String[] phraseWords = phraseKey.split("\\s");

+

+        System.out.println(phraseKey);

+

+        ArrayList<ClusterMembership> memberships = new ArrayList<ClusterMembership>();

+

+        for (int i = 1; i < parts.length; i += 2) {

+          ClusterMembership membership = new ClusterMembership(

+              Integer.parseInt(parts[i]), Double.parseDouble(parts[i + 1]));

+          membership.phrase = phraseKey;

+          membership.phraseWords = phraseWords;

+

+          memberships.add(membership);

+        }

+        map.put(phraseKey, memberships);

+      }

+    } catch (IOException e) {

+      e.printStackTrace();

+    }

+  }

+

+  public boolean getNgramClusters(String word) {

+

+    File folder = new File(path);

+    if (folder.isDirectory()) {

+      for (File file : folder.listFiles()) {

+        readFile(file.getAbsolutePath());

+      }

+

+    } else {

+      return false;

+    }

+

+    return true;

+

+  }

+

+}