formatting fixed by applying template OPENNLP-419 write a doc which will introduce potential users to the Component

commit: 20d048b22fbb3f7ad4dbc84e653afb0a60357842 [log] [tgz]
author: Boris Galitsky <bgalitsky@apache.org> Thu Apr 05 23:43:57 2012 +0000
committer: Boris Galitsky <bgalitsky@apache.org> Thu Apr 05 23:43:57 2012 +0000
tree: 0fd1080a563ca2c58a8843971c2475c8692914fa
parent: b1ad93ded003a2b487cf60ab04eedec7c47d8261 [diff]
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
index ee3cfb1..e84b4fc 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java

@@ -32,11 +32,12 @@
 

 public class BingQueryRunner {

   protected static final String APP_ID = "DD4E2A5DF8B7E5801ED443E47DC600D5F3E62713";

-  //TODO user needs to have own APP_ID from Bing API

+  // TODO user needs to have own APP_ID from Bing API

 

   private float snapshotSimilarityThreshold = 0.4f;

 

-  private static final Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.BingQueryRunner");

+  private static final Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.BingQueryRunner");

 

   public void setSnapshotSimilarityThreshold(float thr) {

     snapshotSimilarityThreshold = thr;

@@ -49,10 +50,11 @@
   public BingQueryRunner() {

 

   }

+

   /*

    * 

    */

-  

+

   private String constructBingUrl(String query, String domainWeb, String lang,

       int numbOfHits) throws Exception {

     String codedQuery = URLEncoder.encode(query, "UTF-8");


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java
index b51e773..c4e2a3e 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java

@@ -35,26 +35,24 @@
 import org.json.JSONObject;

 

 public class BingWebQueryRunner {

-  private static final Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.BingWebQueryRunner");

+  private static final Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.BingWebQueryRunner");

 

-  private String constructBingWebUrl(String query, int numbOfHits) throws Exception {

+  private String constructBingWebUrl(String query, int numbOfHits)

+      throws Exception {

     String codedQuery = URLEncoder.encode(query, "UTF-8");

 

     String yahooRequest = "http://api.search.live.net/json.aspx?Appid="

-        + BingQueryRunner.APP_ID + "&query=" + codedQuery // +

-        // "&sources=web"+

+        + BingQueryRunner.APP_ID + "&query=" + codedQuery 

         + "&Sources=Web"

         // Common request fields (optional)

         + "&Version=2.0" + "&Market=en-us&web.count=" + numbOfHits

-        // + "&Options=EnableHighlighting"

-

-        // News-specific request fields (optional)

+         // News-specific request fields (optional)

         + "&News.Offset=0";

 

     return yahooRequest;

   }

 

-  

   public BingResponse populateBingHit(String response) throws Exception {

     BingResponse resp = new BingResponse();

     JSONObject rootObject = new JSONObject(response);

@@ -70,25 +68,25 @@
       resp.setTotalHits(new Integer(count));

     } catch (Exception e) {

       e.printStackTrace();

-      LOG.severe("\nNo search results " +  e);

-      

+      LOG.severe("\nNo search results " + e);

+

     }

     if (resultSet != null) {

       for (int i = 0; i < resultSet.length(); i++) {

         try {

-			HitBase hit = new HitBase();

-			JSONObject singleResult = resultSet.getJSONObject(i);

-			hit.setAbstractText(singleResult.getString("Description"));

-			hit.setDate(singleResult.getString("DateTime"));

-			String title = StringUtils.replace(singleResult.getString("Title"),

-			    "", " ");

-			hit.setTitle(title);

-			hit.setUrl(singleResult.getString("Url"));

+          HitBase hit = new HitBase();

+          JSONObject singleResult = resultSet.getJSONObject(i);

+          hit.setAbstractText(singleResult.getString("Description"));

+          hit.setDate(singleResult.getString("DateTime"));

+          String title = StringUtils.replace(singleResult.getString("Title"),

+              "", " ");

+          hit.setTitle(title);

+          hit.setUrl(singleResult.getString("Url"));

 

-			resp.appendHits(hit);

-		} catch (Exception e) {

-			// incomplete search result: do not through exception

-		}

+          resp.appendHits(hit);

+        } catch (Exception e) {

+          // incomplete search result: do not through exception

+        }

       }

     }

     return resp;

@@ -132,7 +130,6 @@
     return hits;

   }

 

-  

   public List<HitBase> runSearch(String query, int num) {

     BingResponse resp = null;

     try {


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
index 7d7a21a..47e0d04 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java

@@ -24,167 +24,180 @@
 

 import org.apache.commons.lang.StringUtils;

 

-

 public class GeneratedSentenceProcessor {

-		public static String acceptableMinedSentence(String sent)

-		{

-			// if too many commas => seo text

+  public static String acceptableMinedSentence(String sent) {

+    // if too many commas => seo text

 

-			String[] commas = StringUtils.split(sent, ',');

-			String[] spaces = StringUtils.split(sent, ' ');

-			if ((float) commas.length / (float) spaces.length > 0.7)

-			{

-				System.out.println("Rejection: too many commas");

-				return null;

-			}

+    String[] commas = StringUtils.split(sent, ',');

+    String[] spaces = StringUtils.split(sent, ' ');

+    if ((float) commas.length / (float) spaces.length > 0.7) {

+      System.out.println("Rejection: too many commas");

+      return null;

+    }

 

-			String[] pipes = StringUtils.split(sent, '|');

-			if (StringUtils.split(sent, '|').length > 2 || StringUtils.split(sent, '>').length > 2)

-			{

-				System.out.println("Rejection: too many |s or >s ");

-				return null;

-			}

-			String sentTry = sent.toLowerCase();

-			// if too many long spaces

-			String sentSpaces = sentTry.replace("   ", "");

-			if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -

-																// suspicious

-				return null;

+    String[] pipes = StringUtils.split(sent, '|');

+    if (StringUtils.split(sent, '|').length > 2

+        || StringUtils.split(sent, '>').length > 2) {

+      System.out.println("Rejection: too many |s or >s ");

+      return null;

+    }

+    String sentTry = sent.toLowerCase();

+    // if too many long spaces

+    String sentSpaces = sentTry.replace("   ", "");

+    if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -

+      // suspicious

+      return null;

 

-			if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1 || sentTry.indexOf("copyright") > -1

-				|| sentTry.indexOf("operating hours") > -1 || sentTry.indexOf("days per week") > -1

-				|| sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1

-				|| sentTry.indexOf("find the latest") > -1 || sentTry.startsWith("subscribe")

-				|| sentTry.indexOf("Terms of Service") > -1 || sentTry.indexOf("clicking here") > -1

-				|| sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1 || sentTry.indexOf("Tags:") > -1

-				|| sentTry.startsWith("Posted by") || sentTry.indexOf("available online") > 0

-				|| sentTry.indexOf("get online") > 0 || sentTry.indexOf("buy online") > 0

-				|| sentTry.indexOf("not valid") > 0 || sentTry.indexOf("discount") > 0

-				|| sentTry.indexOf("official site") > 0 || sentTry.indexOf("this video") > 0 || sentTry.indexOf("this book") > 0

-				|| sentTry.indexOf("this product") > 0 || sentTry.indexOf("paperback") > 0 || sentTry.indexOf("hardcover") > 0 ||

-				sentTry.indexOf("audio cd") > 0

-				|| sentTry.indexOf("related searches") > 0 || sentTry.indexOf("permission is granted") > 0

-				|| sentTry.indexOf("[edit") > 0 || sentTry.indexOf("edit categories") > 0

-				|| sentTry.indexOf("free license") > 0 || sentTry.indexOf("permission is granted") > 0

-				|| sentTry.indexOf("under the terms") > 0 		|| sentTry.indexOf("rights reserved") > 0 	

-				|| sentTry.indexOf("wikipedia") > 0 || sentTry.endsWith("the") || sentTry.endsWith("the.")

-				|| sentTry.startsWith("below") 

-			

-			)

-				return null;

+    if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1

+        || sentTry.indexOf("copyright") > -1

+        || sentTry.indexOf("operating hours") > -1

+        || sentTry.indexOf("days per week") > -1

+        || sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1

+        || sentTry.indexOf("find the latest") > -1

+        || sentTry.startsWith("subscribe")

+        || sentTry.indexOf("Terms of Service") > -1

+        || sentTry.indexOf("clicking here") > -1

+        || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1

+        || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")

+        || sentTry.indexOf("available online") > 0

+        || sentTry.indexOf("get online") > 0

+        || sentTry.indexOf("buy online") > 0

+        || sentTry.indexOf("not valid") > 0 || sentTry.indexOf("discount") > 0

+        || sentTry.indexOf("official site") > 0

+        || sentTry.indexOf("this video") > 0

+        || sentTry.indexOf("this book") > 0

+        || sentTry.indexOf("this product") > 0

+        || sentTry.indexOf("paperback") > 0 || sentTry.indexOf("hardcover") > 0

+        || sentTry.indexOf("audio cd") > 0

+        || sentTry.indexOf("related searches") > 0

+        || sentTry.indexOf("permission is granted") > 0

+        || sentTry.indexOf("[edit") > 0

+        || sentTry.indexOf("edit categories") > 0

+        || sentTry.indexOf("free license") > 0

+        || sentTry.indexOf("permission is granted") > 0

+        || sentTry.indexOf("under the terms") > 0

+        || sentTry.indexOf("rights reserved") > 0

+        || sentTry.indexOf("wikipedia") > 0 || sentTry.endsWith("the")

+        || sentTry.endsWith("the.") || sentTry.startsWith("below")

 

-			// count symbols indicating wrong parts of page to mine for text

-			// if short and contains too many symbols indicating wrong area: reject

-			String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&").replace("|", "&&&").replace(":", "&&&")

-				.replace("/", "&&&").replace("-", "&&&").replace("%", "&&&");

-			if ((sentWrongSym.length() - sentTry.length()) >= 4 && sentTry.length()<200) // twice ot more

-				return null;

+    )

+      return null;

 

-			sent = sent.replace('[', ' ').replace(']', ' ').replace("_should_find_orig_", "").replace(".   .", ". ")

-				.replace("amp;", " ").replace("1.", " ").replace("2.", " ").replace("3.", " ").replace("4.", " ")

-				.replace("2009", "2011").replace("2008", "2011").replace("2006", "2011").replace("2007", "2011").

-				replace("VIDEO:", " ").replace("Video:", " ").replace("no comments", " ")

-				.replace("  ", " ").replace("  ", " ").replace("(more.)", "").replace("more.", "").replace("<more>", "").

-				replace("[more]", "").replace(".,",".").replace("&lt;", "").replace("p&gt;","" ).

-				replace("product description", "");

-		

-				// TODO .replace("a.", ".");

+    // count symbols indicating wrong parts of page to mine for text

+    // if short and contains too many symbols indicating wrong area: reject

+    String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")

+        .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")

+        .replace("-", "&&&").replace("%", "&&&");

+    if ((sentWrongSym.length() - sentTry.length()) >= 4

+        && sentTry.length() < 200) // twice ot more

+      return null;

 

-			int endIndex = sent.indexOf(" posted");

-			if (endIndex > 0)

-				sent = sent.substring(0, endIndex);

+    sent = sent.replace('[', ' ').replace(']', ' ')

+        .replace("_should_find_orig_", "").replace(".   .", ". ")

+        .replace("amp;", " ").replace("1.", " ").replace("2.", " ")

+        .replace("3.", " ").replace("4.", " ").replace("2009", "2011")

+        .replace("2008", "2011").replace("2006", "2011")

+        .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")

+        .replace("no comments", " ").replace("  ", " ").replace("  ", " ")

+        .replace("(more.)", "").replace("more.", "").replace("<more>", "")

+        .replace("[more]", "").replace(".,", ".").replace("&lt;", "")

+        .replace("p&gt;", "").replace("product description", "");

 

-			return sent;

-		}

+    // TODO .replace("a.", ".");

 

-		public static String processSentence(String pageSentence)

-		{

-			if (pageSentence == null)

-				return "";

-			pageSentence = Utils.fullStripHTML(pageSentence);

-			pageSentence = StringUtils.chomp(pageSentence, "..");

-			pageSentence = StringUtils.chomp(pageSentence, ". .");

-			pageSentence = StringUtils.chomp(pageSentence, " .");

-			pageSentence = StringUtils.chomp(pageSentence, ".");

-			pageSentence = StringUtils.chomp(pageSentence, "...");

-			pageSentence = StringUtils.chomp(pageSentence, " ....");

-			pageSentence = pageSentence.replace("::", ":").replace(".,", ". ").replace("(.)", "");

-			

-			pageSentence = pageSentence.trim();

-			pageSentence = pageSentence.replaceAll("\\s+", " "); // make single

-																	// spaces

-																	// everywhere

+    int endIndex = sent.indexOf(" posted");

+    if (endIndex > 0)

+      sent = sent.substring(0, endIndex);

 

-			String[] pipes = StringUtils.split(pageSentence, '|'); // removed

-																	// shorter part

-																	// of sentence

-																	// at the end

-																	// after pipe

-			if (pipes.length == 2 && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0))

-			{

-				int pipePos = pageSentence.indexOf("|");

-				if (pipePos > -1)

-					pageSentence = pageSentence.substring(0, pipePos - 1).trim();

+    return sent;

+  }

 

-			}

+  public static String processSentence(String pageSentence) {

+    if (pageSentence == null)

+      return "";

+    pageSentence = Utils.fullStripHTML(pageSentence);

+    pageSentence = StringUtils.chomp(pageSentence, "..");

+    pageSentence = StringUtils.chomp(pageSentence, ". .");

+    pageSentence = StringUtils.chomp(pageSentence, " .");

+    pageSentence = StringUtils.chomp(pageSentence, ".");

+    pageSentence = StringUtils.chomp(pageSentence, "...");

+    pageSentence = StringUtils.chomp(pageSentence, " ....");

+    pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")

+        .replace("(.)", "");

 

-			if (!StringUtils.contains(pageSentence, '.') && !StringUtils.contains(pageSentence, '?')

-				&& !StringUtils.contains(pageSentence, '!'))

-				pageSentence = pageSentence + ". ";

+    pageSentence = pageSentence.trim();

+    pageSentence = pageSentence.replaceAll("\\s+", " "); // make single

+    // spaces

+    // everywhere

 

-			pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();

-			if (!pageSentence.endsWith("."))

-				pageSentence += ". ";

-			return pageSentence;

-		}

-		

-		public static void main(String[] args)

-		{

-			

-			String para = "About Albert Einstein     15 External links  16 Credits         Youth and schooling  Albert Einstein was born into a Jewish family";

-			para = "inventions of albert einstein                            what was albert einsteins invention                            invention of einstein                            what were albert einsteins inventions ";

-			

-			para = para.replaceAll("  [A-Z]", ". $0");

-			System.out.println(para);

-			

-			para = "Page 2 of 93";

-		    

-			System.exit(0);

-			RelatedSentenceFinder f = new RelatedSentenceFinder();

-			try

-			{

-				List<HitBase> hits = f

-					.findRelatedOpinionsForSentence(

-						"Give me a break, there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader",

-						Arrays

-							.asList(new String[] { "Give me a break there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader. For example you went to cash in 2008 and stay in cash until now you made nothing. Whereas people who rode out the storm are doing fine so let's quit focusing on the loser who think they are so smart and went to 100% cash and are wondering what happen. Its a market that always moves unlike your mattress.", }));

-				StringBuffer buf = new StringBuffer();

+    String[] pipes = StringUtils.split(pageSentence, '|'); // removed

+    // shorter part

+    // of sentence

+    // at the end

+    // after pipe

+    if (pipes.length == 2

+        && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {

+      int pipePos = pageSentence.indexOf("|");

+      if (pipePos > -1)

+        pageSentence = pageSentence.substring(0, pipePos - 1).trim();

 

-				for (HitBase h : hits)

-				{

-					List<Fragment> frags = h.getFragments();

-					for (Fragment fr : frags)

-					{

-						if (fr.getResultText() != null && fr.getResultText().length() > 3)

-							buf.append(fr.getResultText());

-					}

-				}

+    }

 

-			}

-			catch (Exception e)

-			{

-				// TODO Auto-generated catch block

-				e.printStackTrace();

-			}

+    if (!StringUtils.contains(pageSentence, '.')

+        && !StringUtils.contains(pageSentence, '?')

+        && !StringUtils.contains(pageSentence, '!'))

+      pageSentence = pageSentence + ". ";

 

-		}

+    pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();

+    if (!pageSentence.endsWith("."))

+      pageSentence += ". ";

+    return pageSentence;

+  }

 

-		public static String normalizeForSentenceSplitting(String pageContent) {

-			pageContent.replace("Jan.", "January").replace("Feb.", "February").replace("Mar.", "March").replace("Apr.", "April").

-			replace("Jun.", "June").replace("Jul.", "July").replace("Aug.", "August").replace("Sep.", "September").

-			replace("Oct.", "October").replace("Nov.", "November").replace("Dec.", "December");

-			

-			return pageContent;

-			

-		}

-	}
\ No newline at end of file
+  public static void main(String[] args) {

+

+    String para = "About Albert Einstein     15 External links  16 Credits         Youth and schooling  Albert Einstein was born into a Jewish family";

+    para = "inventions of albert einstein                            what was albert einsteins invention                            invention of einstein                            what were albert einsteins inventions ";

+

+    para = para.replaceAll("  [A-Z]", ". $0");

+    System.out.println(para);

+

+    para = "Page 2 of 93";

+

+    System.exit(0);

+    RelatedSentenceFinder f = new RelatedSentenceFinder();

+    try {

+      List<HitBase> hits = f

+          .findRelatedOpinionsForSentence(

+              "Give me a break, there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader",

+              Arrays

+                  .asList(new String[] { "Give me a break there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader. For example you went to cash in 2008 and stay in cash until now you made nothing. Whereas people who rode out the storm are doing fine so let's quit focusing on the loser who think they are so smart and went to 100% cash and are wondering what happen. Its a market that always moves unlike your mattress.", }));

+      StringBuffer buf = new StringBuffer();

+

+      for (HitBase h : hits) {

+        List<Fragment> frags = h.getFragments();

+        for (Fragment fr : frags) {

+          if (fr.getResultText() != null && fr.getResultText().length() > 3)

+            buf.append(fr.getResultText());

+        }

+      }

+

+    } catch (Exception e) {

+      // TODO Auto-generated catch block

+      e.printStackTrace();

+    }

+

+  }

+

+  public static String normalizeForSentenceSplitting(String pageContent) {

+    pageContent.replace("Jan.", "January").replace("Feb.", "February")

+        .replace("Mar.", "March").replace("Apr.", "April")

+        .replace("Jun.", "June").replace("Jul.", "July")

+        .replace("Aug.", "August").replace("Sep.", "September")

+        .replace("Oct.", "October").replace("Nov.", "November")

+        .replace("Dec.", "December");

+

+    return pageContent;

+

+  }

+}
\ No newline at end of file

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java
index 6dfe189..c8d4d6a 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java

@@ -25,252 +25,215 @@
 

 import org.apache.commons.lang.StringUtils;

 

-public class HitBase

-{

-	private static final Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.HitBase");

+public class HitBase {

+  private static final Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.HitBase");

 

-	private String abstractText;

+  private String abstractText;

 

-	private String clickUrl;

+  private String clickUrl;

 

-	private String displayUrl;

+  private String displayUrl;

 

-	private String url;

+  private String url;

 

-	private String date;

+  private String date;

 

-	private String title;

+  private String title;

 

-	private Double generWithQueryScore;

+  private Double generWithQueryScore;

 

-	private String source;

+  private String source;

 

-	private List<String> originalSentences;

+  private List<String> originalSentences;

 

-	private String pageContent;

+  private String pageContent;

 

-	private List<Fragment> fragments;

+  private List<Fragment> fragments;

 

-	public HitBase()

-	{

-		super();

-	}

+  public HitBase() {

+    super();

+  }

 

-	public String getPageContent()

-	{

-		return pageContent;

-	}

+  public String getPageContent() {

+    return pageContent;

+  }

 

-	public HitBase(String orig, String[] generateds)

-	{

-		originalSentences = new ArrayList<String>();

-		originalSentences.add(orig);

+  public HitBase(String orig, String[] generateds) {

+    originalSentences = new ArrayList<String>();

+    originalSentences.add(orig);

 

-		fragments = new ArrayList<Fragment>();

-		for (String sent : generateds)

-		{

-			Fragment f = new Fragment(sent, 0.0);

-			fragments.add(f);

-		}

-		// the rest of params are null

-	}

+    fragments = new ArrayList<Fragment>();

+    for (String sent : generateds) {

+      Fragment f = new Fragment(sent, 0.0);

+      fragments.add(f);

+    }

+    // the rest of params are null

+  }

 

-	public void setPageContent(String pageContent)

-	{

-		this.pageContent = pageContent;

-	}

+  public void setPageContent(String pageContent) {

+    this.pageContent = pageContent;

+  }

 

-	public List<Fragment> getFragments()

-	{

-		return fragments;

-	}

+  public List<Fragment> getFragments() {

+    return fragments;

+  }

 

-	public void setFragments(List<Fragment> fragments)

-	{

-		this.fragments = fragments;

-	}

+  public void setFragments(List<Fragment> fragments) {

+    this.fragments = fragments;

+  }

 

-	public String getSource()

-	{

-		return source;

-	}

+  public String getSource() {

+    return source;

+  }

 

-	public void setSource(String source)

-	{

-		this.source = source;

-	}

+  public void setSource(String source) {

+    this.source = source;

+  }

 

-	public List<String> getOriginalSentences()

-	{

-		return originalSentences;

-	}

+  public List<String> getOriginalSentences() {

+    return originalSentences;

+  }

 

-	public void setOriginalSentences(List<String> originalSentences)

-	{

-		this.originalSentences = originalSentences;

-	}

+  public void setOriginalSentences(List<String> originalSentences) {

+    this.originalSentences = originalSentences;

+  }

 

-	public String getTitle()

-	{

-		return title;

-	}

+  public String getTitle() {

+    return title;

+  }

 

-	public void setTitle(String title)

-	{

-		this.title = title;

-	}

+  public void setTitle(String title) {

+    this.title = title;

+  }

 

-	public String getAbstractText()

-	{

-		return abstractText;

-	}

+  public String getAbstractText() {

+    return abstractText;

+  }

 

-	public void setAbstractText(String abstractText)

-	{

-		this.abstractText = abstractText;

-	}

+  public void setAbstractText(String abstractText) {

+    this.abstractText = abstractText;

+  }

 

-	public String getClickUrl()

-	{

-		return clickUrl;

-	}

+  public String getClickUrl() {

+    return clickUrl;

+  }

 

-	public void setClickUrl(String clickUrl)

-	{

-		this.clickUrl = clickUrl;

-	}

+  public void setClickUrl(String clickUrl) {

+    this.clickUrl = clickUrl;

+  }

 

-	public String getDisplayUrl()

-	{

-		return displayUrl;

-	}

+  public String getDisplayUrl() {

+    return displayUrl;

+  }

 

-	public void setDisplayUrl(String displayUrl)

-	{

-		this.displayUrl = displayUrl;

-	}

+  public void setDisplayUrl(String displayUrl) {

+    this.displayUrl = displayUrl;

+  }

 

-	public String getUrl()

-	{

-		return url;

-	}

+  public String getUrl() {

+    return url;

+  }

 

-	public void setUrl(String url)

-	{

-		this.url = url;

-	}

+  public void setUrl(String url) {

+    this.url = url;

+  }

 

-	public String getDate()

-	{

-		return date;

-	}

+  public String getDate() {

+    return date;

+  }

 

-	public void setDate(String date)

-	{

-		this.date = date;

-	}

+  public void setDate(String date) {

+    this.date = date;

+  }

 

-	public Double getGenerWithQueryScore()

-	{

-		return generWithQueryScore;

-	}

+  public Double getGenerWithQueryScore() {

+    return generWithQueryScore;

+  }

 

-	public void setGenerWithQueryScore(Double generWithQueryScore)

-	{

-		this.generWithQueryScore = generWithQueryScore;

-	}

+  public void setGenerWithQueryScore(Double generWithQueryScore) {

+    this.generWithQueryScore = generWithQueryScore;

+  }

 

-	public String toString()

-	{

-		// return "\n"+this.getUrl()+" | " +this.getTitle()+ " | "+ this.abstractText ;

-		if (this.getFragments() != null && this.getFragments().size() > 0)

-			return this.getFragments().toString();

-		else

-			return this.title;

-	}

+  public String toString() {

+    // return "\n"+this.getUrl()+" | " +this.getTitle()+ " | "+

+    // this.abstractText ;

+    if (this.getFragments() != null && this.getFragments().size() > 0)

+      return this.getFragments().toString();

+    else

+      return this.title;

+  }

 

-	public static String toString(List<HitBase> hits)

-	{

-		StringBuffer buf = new StringBuffer();

-		Boolean pBreak = true;

-		for (HitBase hit : hits)

-		{

-			String fragm = (hit.toString());

-			if (fragm.length() > 15)

-			{

-				if (pBreak)

-					buf.append(fragm + " | ");

-				else

-					buf.append(fragm + " | \n");

-				// switch to opposite

-				if (pBreak)

-					pBreak = false;

-				else

-					pBreak = true;

-			}

+  public static String toString(List<HitBase> hits) {

+    StringBuffer buf = new StringBuffer();

+    Boolean pBreak = true;

+    for (HitBase hit : hits) {

+      String fragm = (hit.toString());

+      if (fragm.length() > 15) {

+        if (pBreak)

+          buf.append(fragm + " | ");

+        else

+          buf.append(fragm + " | \n");

+        // switch to opposite

+        if (pBreak)

+          pBreak = false;

+        else

+          pBreak = true;

+      }

 

-		}

-		return buf.toString();

-	}

-	

-	public static String toResultantString(List<HitBase> hits)

-	{

-		StringBuffer buf = new StringBuffer();

-		Boolean pBreak = true;

-		for (HitBase hit : hits)

-		{

-			String fragm = hit.getFragments().toString();

-			if (fragm.length() > 15)

-			{

-				if (pBreak)

-					buf.append(fragm + " | 	");

-				else

-					buf.append(fragm + " | \n");

-				// switch to opposite

-				if (pBreak)

-					pBreak = false;

-				else

-					pBreak = true;

-			}

+    }

+    return buf.toString();

+  }

 

-		}

-		return buf.toString().replace("[", "").replace("]", "").replace(" | ", "").replace(".,",".").

-		replace(".\"", "\"").replace(". .", ".").replace(",.", ".");

-	}

+  public static String toResultantString(List<HitBase> hits) {

+    StringBuffer buf = new StringBuffer();

+    Boolean pBreak = true;

+    for (HitBase hit : hits) {

+      String fragm = hit.getFragments().toString();

+      if (fragm.length() > 15) {

+        if (pBreak)

+          buf.append(fragm + " | 	");

+        else

+          buf.append(fragm + " | \n");

+        // switch to opposite

+        if (pBreak)

+          pBreak = false;

+        else

+          pBreak = true;

+      }

 

-	public static List<HitBase> removeDuplicates(List<HitBase> hits)

-	{

-		StringDistanceMeasurer meas = new StringDistanceMeasurer();

-		double imageDupeThresh = 0.8; // if more similar, then considered dupes

-		List<Integer> idsToRemove = new ArrayList<Integer>();

-		List<HitBase> hitsDedup = new ArrayList<HitBase>();

-		try

-		{

-			for (int i = 0; i < hits.size(); i++)

-				for (int j = i + 1; j < hits.size(); j++)

-				{

-					String title1 = hits.get(i).getTitle();

-					String title2 = hits.get(j).getTitle();

-					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

-						continue;

-					if (meas.measureStringDistance(title1, title2) > imageDupeThresh)

-					{

-						idsToRemove.add(j); // dupes found, later list member to be deleted

-					}

-				}

-			for (int i = 0; i < hits.size(); i++)

-				if (!idsToRemove.contains(i))

-					hitsDedup.add(hits.get(i));

-			if (hitsDedup.size() < hits.size())

-			{

-				LOG.info("Removed duplicates from relevant search results, including "

-					+ hits.get(idsToRemove.get(0)).getTitle());

-			}

-		}

-		catch (Exception e)

-		{

-			LOG.severe("Problem removing duplicates from relevant images: " + e);

-		}

-		return hitsDedup;

-	}

+    }

+    return buf.toString().replace("[", "").replace("]", "").replace(" | ", "")

+        .replace(".,", ".").replace(".\"", "\"").replace(". .", ".")

+        .replace(",.", ".");

+  }

+

+  public static List<HitBase> removeDuplicates(List<HitBase> hits) {

+    StringDistanceMeasurer meas = new StringDistanceMeasurer();

+    double imageDupeThresh = 0.8; // if more similar, then considered dupes

+    List<Integer> idsToRemove = new ArrayList<Integer>();

+    List<HitBase> hitsDedup = new ArrayList<HitBase>();

+    try {

+      for (int i = 0; i < hits.size(); i++)

+        for (int j = i + 1; j < hits.size(); j++) {

+          String title1 = hits.get(i).getTitle();

+          String title2 = hits.get(j).getTitle();

+          if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

+            continue;

+          if (meas.measureStringDistance(title1, title2) > imageDupeThresh) {

+            idsToRemove.add(j); // dupes found, later list member to be deleted

+          }

+        }

+      for (int i = 0; i < hits.size(); i++)

+        if (!idsToRemove.contains(i))

+          hitsDedup.add(hits.get(i));

+      if (hitsDedup.size() < hits.size()) {

+        LOG.info("Removed duplicates from relevant search results, including "

+            + hits.get(idsToRemove.get(0)).getTitle());

+      }

+    } catch (Exception e) {

+      LOG.severe("Problem removing duplicates from relevant images: " + e);

+    }

+    return hitsDedup;

+  }

 }
\ No newline at end of file

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java
index 0508dce..1f1fcc6 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java

@@ -2,9 +2,10 @@
 

 import java.util.Comparator;

 

-public class HitBaseComparable implements Comparator<HitBase>{

-	//@Override

-	public int compare(HitBase o1, HitBase o2) {

-		return (o1.getGenerWithQueryScore()>o2.getGenerWithQueryScore() ? -1 : (o1==o2 ? 0 : 1));

-	}

+public class HitBaseComparable implements Comparator<HitBase> {

+  // @Override

+  public int compare(HitBase o1, HitBase o2) {

+    return (o1.getGenerWithQueryScore() > o2.getGenerWithQueryScore() ? -1

+        : (o1 == o2 ? 0 : 1));

+  }

 }
\ No newline at end of file

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
index cd53d9f..8f0cb67 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java

@@ -22,7 +22,6 @@
 import java.util.List;

 import java.util.logging.Logger;

 

-

 import opennlp.tools.similarity.apps.utils.PageFetcher;

 import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

 import opennlp.tools.similarity.apps.utils.Utils;

@@ -43,571 +42,576 @@
  * 

  */

 

-public class RelatedSentenceFinder

-{

-	private static Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.RelatedSentenceFinder");

-	PageFetcher pFetcher = new PageFetcher();

+public class RelatedSentenceFinder {

+  private static Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.RelatedSentenceFinder");

+  PageFetcher pFetcher = new PageFetcher();

 

-	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

-	private ParseTreeChunk parseTreeChunk  = new ParseTreeChunk(); 

+  private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+  private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();

 

-	static StringDistanceMeasurer STRING_DISTANCE_MEASURER = new StringDistanceMeasurer();

+  static StringDistanceMeasurer STRING_DISTANCE_MEASURER = new StringDistanceMeasurer();

 

-	// used to indicate that a sentence is an opinion, so more appropriate

-	static List<String> MENTAL_VERBS = new ArrayList<String>(Arrays.asList(new String[] { "want", "know", "believe",

-			"appeal", "ask", "accept", "agree", "allow", "appeal", "ask", "assume", "believe", "check", "confirm",

-			"convince", "deny", "disagree", "explain", "ignore", "inform", "remind", "request", "suggest", "suppose",

-			"think", "threaten", "try", "understand" }));

+  // used to indicate that a sentence is an opinion, so more appropriate

+  static List<String> MENTAL_VERBS = new ArrayList<String>(

+      Arrays.asList(new String[] { "want", "know", "believe", "appeal", "ask",

+          "accept", "agree", "allow", "appeal", "ask", "assume", "believe",

+          "check", "confirm", "convince", "deny", "disagree", "explain",

+          "ignore", "inform", "remind", "request", "suggest", "suppose",

+          "think", "threaten", "try", "understand" }));

 

-	private static final int MAX_FRAGMENT_SENTS = 10;

+  private static final int MAX_FRAGMENT_SENTS = 10;

 

-	public RelatedSentenceFinder()

-	{

+  public RelatedSentenceFinder() {

 

-	}

+  }

 

-	public List<HitBase> findRelatedOpinionsForSentenceFastAndDummy(String word, List<String> sents) throws Exception

-	{

-		BingWebQueryRunner yrunner = new BingWebQueryRunner();

-		List<HitBase> searchResult = yrunner.runSearch(word);

-		return searchResult;

-	}

+  public List<HitBase> findRelatedOpinionsForSentenceFastAndDummy(String word,

+      List<String> sents) throws Exception {

+    BingWebQueryRunner yrunner = new BingWebQueryRunner();

+    List<HitBase> searchResult = yrunner.runSearch(word);

+    return searchResult;

+  }

 

+  public List<HitBase> findRelatedOpinionsForSentence(String sentence,

+      List<String> sents) throws Exception {

+    List<HitBase> opinionSentencesToAdd = new ArrayList<HitBase>();

+    System.out.println(" \n\n=== Sentence  = " + sentence);

+    List<String> nounPhraseQueries = buildSearchEngineQueryFromSentence(sentence);

 

+    BingWebQueryRunner yrunner = new BingWebQueryRunner();

+    for (String query : nounPhraseQueries) {

+      System.out.println("\nquery = " + query);

+      // query += " "+join(MENTAL_VERBS, " OR ") ;

+      List<HitBase> searchResult = yrunner.runSearch(query);

+      if (searchResult != null) {

+        for (HitBase item : searchResult) { // got some text from .html

+          if (item.getAbstractText() != null

+              && !(item.getUrl().indexOf(".pdf") > 0)) { // exclude

+                                                         // pdf

+            opinionSentencesToAdd

+                .add(augmentWithMinedSentencesAndVerifyRelevance(item,

+                    sentence, sents));

+          }

+        }

+      }

+    }

 

-	public List<HitBase> findRelatedOpinionsForSentence(String sentence, List<String> sents) throws Exception

-	{

-		List<HitBase> opinionSentencesToAdd = new ArrayList<HitBase>();

-		System.out.println(" \n\n=== Sentence  = " + sentence);

-		List<String> nounPhraseQueries = buildSearchEngineQueryFromSentence(sentence);

+    opinionSentencesToAdd = removeDuplicatesFromResultantHits(opinionSentencesToAdd);

+    return opinionSentencesToAdd;

+  }

 

-		BingWebQueryRunner yrunner = new BingWebQueryRunner();

-		for (String query : nounPhraseQueries)

-		{

-			System.out.println("\nquery = " + query);

-			// query += " "+join(MENTAL_VERBS, " OR ") ;

-			List<HitBase> searchResult = yrunner.runSearch(query);

-			if (searchResult != null)

-			{

-				for (HitBase item : searchResult)

-				{ // got some text from .html

-					if (item.getAbstractText() != null && !(item.getUrl().indexOf(".pdf") > 0))

-					{ // exclude

-						// pdf

-						opinionSentencesToAdd.add(augmentWithMinedSentencesAndVerifyRelevance(item, sentence, sents));

-					}

-				}

-			}

-		}

+  /**

+   * Main content generation function which takes a seed as a person, rock

+   * group, or other entity name and produce a list of text fragments by web

+   * mining for <br>

+   * 

+   * @param String

+   *          entity name

+   * @return List<HitBase> of text fragment structures which contain approved

+   *         (in terms of relevance) mined sentences, as well as original search

+   *         results objects such as doc titles, abstracts, and urls.

+   */

 

-		opinionSentencesToAdd = removeDuplicatesFromResultantHits(opinionSentencesToAdd);

-		return opinionSentencesToAdd;

-	}

-	

-	/**

-	   * Main content generation function which takes a seed as a person, rock group, or other entity name and produce a list of text fragments by web mining for

-	   *  <br>

-	   * @param String entity name

-	   * @return List<HitBase> of text fragment structures which contain approved (in terms of relevance) mined sentences, as well as original search results objects

-	   * such as doc titles, abstracts, and urls.

-	   */

+  public List<HitBase> generateContentAbout(String sentence) throws Exception {

+    List<HitBase> opinionSentencesToAdd = new ArrayList<HitBase>();

+    System.out.println(" \n=== Entity to write about = " + sentence);

+    List<String> nounPhraseQueries = new ArrayList<String>();

 

-	public List<HitBase> generateContentAbout(String sentence) throws Exception

-	{

-		List<HitBase> opinionSentencesToAdd = new ArrayList<HitBase>();

-		System.out.println(" \n=== Entity to write about = " + sentence);

-		List<String> nounPhraseQueries = new ArrayList<String>();

+    // nounPhraseQueries.add(sentence + frequentPerformingVerbs);

 

+    BingWebQueryRunner yrunner = new BingWebQueryRunner();

+    for (String verbAddition : StoryDiscourseNavigator.frequentPerformingVerbs) {

+      List<HitBase> searchResult = yrunner.runSearch(sentence + " "

+          + verbAddition);

+      if (searchResult != null) {

+        for (HitBase item : searchResult) { // got some text from .html

+          if (item.getAbstractText() != null

+              && !(item.getUrl().indexOf(".pdf") > 0)) { // exclude pdf

+            opinionSentencesToAdd

+                .add(augmentWithMinedSentencesAndVerifyRelevance(item,

+                    sentence, null));

+          }

+        }

+      }

+    }

 

-		//nounPhraseQueries.add(sentence + frequentPerformingVerbs);

+    opinionSentencesToAdd = removeDuplicatesFromResultantHits(opinionSentencesToAdd);

+    return opinionSentencesToAdd;

+  }

 

-		BingWebQueryRunner yrunner = new BingWebQueryRunner();

-		for (String verbAddition : StoryDiscourseNavigator.frequentPerformingVerbs)

-		{

-			List<HitBase> searchResult = yrunner.runSearch(sentence + " " + verbAddition);

-			if (searchResult != null)

-			{

-				for (HitBase item : searchResult)

-				{ // got some text from .html

-					if (item.getAbstractText() != null && !(item.getUrl().indexOf(".pdf") > 0))

-					{ // exclude pdf

-						opinionSentencesToAdd.add(augmentWithMinedSentencesAndVerifyRelevance(item, sentence, null));

-					}

-				}

-			}

-		}

+  /**

+   * Takes a sentence and extracts noun phrases and entity names to from search

+   * queries for finding relevant sentences on the web, which are then subject

+   * to relevance assessment by Similarity. Search queries should not be too

+   * general (irrelevant search results) or too specific (too few search

+   * results)

+   * 

+   * @param String

+   *          input sentence to form queries

+   * @return List<String> of search expressions

+   */

+  public static List<String> buildSearchEngineQueryFromSentence(String sentence) {

+    ParseTreeChunk matcher = new ParseTreeChunk();

+    ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor

+        .getInstance();

+    List<List<ParseTreeChunk>> sent1GrpLst = null;

 

-		opinionSentencesToAdd = removeDuplicatesFromResultantHits(opinionSentencesToAdd);

-		return opinionSentencesToAdd;

-	}

+    List<ParseTreeChunk> nPhrases = pos

+        .formGroupedPhrasesFromChunksForSentence(sentence).get(0);

+    List<String> queryArrayStr = new ArrayList<String>();

+    for (ParseTreeChunk ch : nPhrases) {

+      String query = "";

+      int size = ch.getLemmas().size();

 

-	/**

-	   * Takes a sentence and extracts noun phrases and entity names to from search queries for finding relevant sentences on the web, which are 

-	   * then subject to relevance assessment by Similarity. Search queries should not be too general (irrelevant search results) or too specific (too few 

-	   * search results)

-	   * @param String input sentence to form queries

-	   * @return List<String> of search expressions 

-	   */

-	public static List<String> buildSearchEngineQueryFromSentence(String sentence)

-	{

-		ParseTreeChunk matcher = new ParseTreeChunk();

-		ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor.getInstance();

-		List<List<ParseTreeChunk>> sent1GrpLst = null;

+      for (int i = 0; i < size; i++) {

+        if (ch.getPOSs().get(i).startsWith("N")

+            || ch.getPOSs().get(i).startsWith("J")) {

+          query += ch.getLemmas().get(i) + " ";

+        }

+      }

+      query = query.trim();

+      int len = query.split(" ").length;

+      if (len < 2 || len > 5)

+        continue;

+      if (len < 4) { // every word should start with capital

+        String[] qs = query.split(" ");

+        boolean bAccept = true;

+        for (String w : qs) {

+          if (w.toLowerCase().equals(w)) // idf only two words then

+            // has to be person name,

+            // title or geo location

+            bAccept = false;

+        }

+        if (!bAccept)

+          continue;

+      }

 

-		List<ParseTreeChunk> nPhrases = pos.formGroupedPhrasesFromChunksForSentence(sentence).get(0);

-		List<String> queryArrayStr = new ArrayList<String>();

-		for (ParseTreeChunk ch : nPhrases)

-		{

-			String query = "";

-			int size = ch.getLemmas().size();

+      query = query.trim().replace(" ", " +");

+      query = " +" + query;

 

-			for (int i = 0; i < size; i++)

-			{

-				if (ch.getPOSs().get(i).startsWith("N") || ch.getPOSs().get(i).startsWith("J"))

-				{

-					query += ch.getLemmas().get(i) + " ";

-				}

-			}

-			query = query.trim();

-			int len = query.split(" ").length;

-			if (len < 2 || len > 5)

-				continue;

-			if (len < 4)

-			{ // every word should start with capital

-				String[] qs = query.split(" ");

-				boolean bAccept = true;

-				for (String w : qs)

-				{

-					if (w.toLowerCase().equals(w)) // idf only two words then

-						// has to be person name,

-						// title or geo location

-						bAccept = false;

-				}

-				if (!bAccept)

-					continue;

-			}

+      queryArrayStr.add(query);

 

-			query = query.trim().replace(" ", " +");

-			query = " +" + query;

+    }

+    if (queryArrayStr.size() < 1) { // release constraints on NP down to 2

+                                    // keywords

+      for (ParseTreeChunk ch : nPhrases) {

+        String query = "";

+        int size = ch.getLemmas().size();

 

-			queryArrayStr.add(query);

+        for (int i = 0; i < size; i++) {

+          if (ch.getPOSs().get(i).startsWith("N")

+              || ch.getPOSs().get(i).startsWith("J")) {

+            query += ch.getLemmas().get(i) + " ";

+          }

+        }

+        query = query.trim();

+        int len = query.split(" ").length;

+        if (len < 2)

+          continue;

 

-		}

-		if (queryArrayStr.size() < 1)

-		{ // release constraints on NP down to 2

-			// keywords

-			for (ParseTreeChunk ch : nPhrases)

-			{

-				String query = "";

-				int size = ch.getLemmas().size();

+        query = query.trim().replace(" ", " +");

+        query = " +" + query;

 

-				for (int i = 0; i < size; i++)

-				{

-					if (ch.getPOSs().get(i).startsWith("N") || ch.getPOSs().get(i).startsWith("J"))

-					{

-						query += ch.getLemmas().get(i) + " ";

-					}

-				}

-				query = query.trim();

-				int len = query.split(" ").length;

-				if (len < 2)

-					continue;

+        queryArrayStr.add(query);

 

-				query = query.trim().replace(" ", " +");

-				query = " +" + query;

+      }

+    }

 

-				queryArrayStr.add(query);

+    queryArrayStr = removeDuplicatesFromQueries(queryArrayStr);

+    queryArrayStr.add(sentence);

 

-			}

-		}

+    return queryArrayStr;

 

-		queryArrayStr = removeDuplicatesFromQueries(queryArrayStr);

-		queryArrayStr.add(sentence);

+  }

 

-		return queryArrayStr;

+  /**

+   * remove dupes from queries to easy cleaning dupes and repetitive search

+   * afterwards

+   * 

+   * @param List

+   *          <String> of sentences (search queries, or search results

+   *          abstracts, or titles

+   * @return List<String> of sentences where dupes are removed

+   */

+  public static List<String> removeDuplicatesFromQueries(List<String> hits) {

+    StringDistanceMeasurer meas = new StringDistanceMeasurer();

+    double dupeThresh = 0.8; // if more similar, then considered dupes was

+    // 0.7

+    List<Integer> idsToRemove = new ArrayList<Integer>();

+    List<String> hitsDedup = new ArrayList<String>();

+    try {

+      for (int i = 0; i < hits.size(); i++)

+        for (int j = i + 1; j < hits.size(); j++) {

+          String title1 = hits.get(i);

+          String title2 = hits.get(j);

+          if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

+            continue;

+          if (meas.measureStringDistance(title1, title2) > dupeThresh) {

+            idsToRemove.add(j); // dupes found, later list member to

+            // be deleted

 

-	}

+          }

+        }

 

-	/** remove dupes from queries to easy cleaning dupes and repetitive search

-	 * afterwards

-	 * 

-	 * @param List<String> of sentences (search queries, or search results abstracts, or titles

-	 * @return List<String> of sentences where dupes are removed

-	 */

-	public static List<String> removeDuplicatesFromQueries(List<String> hits)

-	{

-		StringDistanceMeasurer meas = new StringDistanceMeasurer();

-		double dupeThresh = 0.8; // if more similar, then considered dupes was

-		// 0.7

-		List<Integer> idsToRemove = new ArrayList<Integer>();

-		List<String> hitsDedup = new ArrayList<String>();

-		try

-		{

-			for (int i = 0; i < hits.size(); i++)

-				for (int j = i + 1; j < hits.size(); j++)

-				{

-					String title1 = hits.get(i);

-					String title2 = hits.get(j);

-					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

-						continue;

-					if (meas.measureStringDistance(title1, title2) > dupeThresh)

-					{

-						idsToRemove.add(j); // dupes found, later list member to

-						// be deleted

+      for (int i = 0; i < hits.size(); i++)

+        if (!idsToRemove.contains(i))

+          hitsDedup.add(hits.get(i));

 

-					}

-				}

+      if (hitsDedup.size() < hits.size()) {

+        LOG.info("Removed duplicates from formed query, including "

+            + hits.get(idsToRemove.get(0)));

+      }

 

-			for (int i = 0; i < hits.size(); i++)

-				if (!idsToRemove.contains(i))

-					hitsDedup.add(hits.get(i));

+    } catch (Exception e) {

+      LOG.severe("Problem removing duplicates from query list");

+    }

 

-			if (hitsDedup.size() < hits.size())

-			{

-				LOG.info("Removed duplicates from formed query, including " + hits.get(idsToRemove.get(0)));

-			}

+    return hitsDedup;

 

-		}

-		catch (Exception e)

-		{

-			LOG.severe("Problem removing duplicates from query list");

-		}

+  }

 

-		return hitsDedup;

+  /**

+   * remove dupes from search results

+   * 

+   * @param List

+   *          <HitBase> of search results objects

+   * @return List<String> of search results objects where dupes are removed

+   */

+  public static List<HitBase> removeDuplicatesFromResultantHits(

+      List<HitBase> hits) {

+    StringDistanceMeasurer meas = new StringDistanceMeasurer();

+    double dupeThresh = // 0.8; // if more similar, then considered dupes was

+    0.7;

+    List<Integer> idsToRemove = new ArrayList<Integer>();

+    List<HitBase> hitsDedup = new ArrayList<HitBase>();

+    try {

+      for (int i = 0; i < hits.size(); i++)

+        for (int j = i + 1; j < hits.size(); j++) {

+          HitBase hit2 = hits.get(j);

+          List<Fragment> fragmList1 = hits.get(i).getFragments();

+          List<Fragment> fragmList2 = hits.get(j).getFragments();

+          List<Fragment> fragmList2Results = new ArrayList<Fragment>(fragmList2);

+          for (Fragment f1 : fragmList1)

+            for (Fragment f2 : fragmList2) {

+              String sf1 = f1.getResultText();

+              String sf2 = f2.getResultText();

+              if (StringUtils.isEmpty(sf1) || StringUtils.isEmpty(sf1))

+                continue;

+              if (meas.measureStringDistance(sf1, sf2) > dupeThresh) {

+                fragmList2Results.remove(f2);

+                LOG.info("Removed duplicates from formed fragments list: "

+                    + sf2);

+              }

+            }

 

-	}

+          hit2.setFragments(fragmList2Results);

+          hits.set(j, hit2);

+        }

+    } catch (Exception e) {

+      LOG.severe("Problem removing duplicates from list of fragment");

+    }

+    return hits;

+  }

 

-	/** remove dupes from search results

-	 * 

-	 * @param List<HitBase> of search results objects 

-	 * @return List<String> of search results objects  where dupes are removed

-	 */

-	public static List<HitBase> removeDuplicatesFromResultantHits(List<HitBase> hits)

-	{

-		StringDistanceMeasurer meas = new StringDistanceMeasurer();

-		double dupeThresh = //0.8; // if more similar, then considered dupes was

-		 0.7;

-		List<Integer> idsToRemove = new ArrayList<Integer>();

-		List<HitBase> hitsDedup = new ArrayList<HitBase>();

-		try

-		{

-			for (int i = 0; i < hits.size(); i++)

-				for (int j = i + 1; j < hits.size(); j++)

-				{

-					HitBase hit2 = hits.get(j);

-					List<Fragment> fragmList1 =  hits.get(i).getFragments();

-					List<Fragment> fragmList2 =  hits.get(j).getFragments();

-					List<Fragment> fragmList2Results = new ArrayList<Fragment>(fragmList2);

-					for(Fragment f1: fragmList1)

-						for(Fragment f2: fragmList2){

-							String sf1 = f1.getResultText();

-							String sf2 = f2.getResultText();

-							if (StringUtils.isEmpty(sf1) || StringUtils.isEmpty(sf1))

-								continue;

-							if (meas.measureStringDistance(sf1, sf2) > dupeThresh)

-							{

-								fragmList2Results.remove(f2);	

-								LOG.info("Removed duplicates from formed fragments list: " + sf2);

-							}

-						}

+  /**

+   * Takes single search result for an entity which is the subject of the essay

+   * to be written and forms essey sentences from the title, abstract, and

+   * possibly original page

+   * 

+   * @param HitBase

+   *          item : search result

+   * @param originalSentence

+   *          : seed for the essay to be written

+   * @param sentsAll

+   *          : list<String> of other sentences in the seed if it is

+   *          multi-sentence

+   * @return search result

+   */

 

-					hit2.setFragments(fragmList2Results);

-					hits.set(j, hit2 );

-				}

-		}

-		catch (Exception e)

-		{

-			LOG.severe("Problem removing duplicates from list of fragment");

-		}

-		return hits;

-	}

-	/**

-	 * Takes single search result for an entity which is the subject of the essay to be written and forms essey sentences 

-	 * from the title, abstract, and possibly original page

-	 * @param HitBase item : search result

-	 * @param originalSentence : seed for the essay to be written 

-	 * @param sentsAll: list<String> of other sentences in the seed if it is multi-sentence

-	 * @return search result 

-	 */

-	

-	public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item, String originalSentence,

-			List<String> sentsAll)

-	{

-		if (sentsAll==null)

-			sentsAll = new ArrayList<String>();

-		// put orig sentence in structure

-		List<String> origs = new ArrayList<String>();

-		origs.add(originalSentence);

-		item.setOriginalSentences(origs);

-		String title = item.getTitle().replace("<b>", " ").replace("</b>", " ").replace("  ", " ").replace("  ", " ");

-		// generation results for this sentence

-		List<Fragment> result = new ArrayList<Fragment>();

-		// form plain text from snippet

-		String snapshot = item.getAbstractText().replace("<b>", " ").replace("</b>", " ").replace("  ", " ")

-		.replace("  ", " ");

-

-		ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor.getInstance();

-		// fix a template expression which can be substituted by original if

-		// relevant

-		String snapshotMarked = snapshot.replace("...", " _should_find_orig_ . _should_find_orig_");

-		String[] fragments = sm.splitSentences(snapshotMarked);

-		List<String> allFragms = new ArrayList<String>();

-		allFragms.addAll(Arrays.asList(fragments));

-

-		String[] sents = null; String downloadedPage;

-		try

-		{

-			if (snapshotMarked.length() != snapshot.length())

-			{

-				downloadedPage = pFetcher.fetchPage(item.getUrl());

-				if (downloadedPage != null && downloadedPage.length() > 100)

-				{

-					item.setPageContent(downloadedPage);

-					String pageContent = Utils.fullStripHTML(item.getPageContent());

-					pageContent = GeneratedSentenceProcessor.normalizeForSentenceSplitting(pageContent);

-					pageContent = pageContent.trim().replaceAll("  [A-Z]", ". $0")//.replace("  ", ". ")

-					.replace("..", ".").replace(". . .", " ")

-					.trim(); // sometimes html breaks are converted into ' ' (two spaces), so we need to put '.'

-					sents = sm.splitSentences(snapshotMarked);;

-					sents = cleanListOfSents(sents);

-				}

-			}

-		}

-		catch (Exception e)

-		{

-			// TODO Auto-generated catch block

-			// e.printStackTrace();

-			System.err.println("Problem downloading  the page and splitting into sentences");

-			return item;

-		}

+  public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item,

+      String originalSentence, List<String> sentsAll) {

+    if (sentsAll == null)

+      sentsAll = new ArrayList<String>();

+    // put orig sentence in structure

+    List<String> origs = new ArrayList<String>();

+    origs.add(originalSentence);

+    item.setOriginalSentences(origs);

+    String title = item.getTitle().replace("<b>", " ").replace("</b>", " ")

+        .replace("  ", " ").replace("  ", " ");

+    // generation results for this sentence

+    List<Fragment> result = new ArrayList<Fragment>();

+    // form plain text from snippet

+    String snapshot = item.getAbstractText().replace("<b>", " ")

+        .replace("</b>", " ").replace("  ", " ").replace("  ", " ");

 

-		for (String fragment : allFragms)

-		{

-			String followSent = null;

-			if (fragment.length() < 50)

-				continue;

-			String pageSentence = "";

-			// try to find original sentence from webpage

-			if (fragment.indexOf("_should_find_orig_") > -1 && sents != null && sents.length > 0)

-				try

-			{

-					String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(

-							fragment.replace("_should_find_orig_", ""), sents);

-					pageSentence = mainAndFollowSent[0];

-					followSent = mainAndFollowSent[1];

+    ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor

+        .getInstance();

+    // fix a template expression which can be substituted by original if

+    // relevant

+    String snapshotMarked = snapshot.replace("...",

+        " _should_find_orig_ . _should_find_orig_");

+    String[] fragments = sm.splitSentences(snapshotMarked);

+    List<String> allFragms = new ArrayList<String>();

+    allFragms.addAll(Arrays.asList(fragments));

 

-			}

-			catch (Exception e)

-			{

+    String[] sents = null;

+    String downloadedPage;

+    try {

+      if (snapshotMarked.length() != snapshot.length()) {

+        downloadedPage = pFetcher.fetchPage(item.getUrl());

+        if (downloadedPage != null && downloadedPage.length() > 100) {

+          item.setPageContent(downloadedPage);

+          String pageContent = Utils.fullStripHTML(item.getPageContent());

+          pageContent = GeneratedSentenceProcessor

+              .normalizeForSentenceSplitting(pageContent);

+          pageContent = pageContent.trim().replaceAll("  [A-Z]", ". $0")// .replace("  ",

+                                                                        // ". ")

+              .replace("..", ".").replace(". . .", " ").trim(); // sometimes

+                                                                // html breaks

+                                                                // are converted

+                                                                // into ' ' (two

+                                                                // spaces), so

+                                                                // we need to

+                                                                // put '.'

+          sents = sm.splitSentences(snapshotMarked);

+          ;

+          sents = cleanListOfSents(sents);

+        }

+      }

+    } catch (Exception e) {

+      // TODO Auto-generated catch block

+      // e.printStackTrace();

+      System.err

+          .println("Problem downloading  the page and splitting into sentences");

+      return item;

+    }

 

-				// TODO Auto-generated catch block

-				e.printStackTrace();

-			}

-			else

-				// or get original snippet

-				pageSentence = fragment;

-			if (pageSentence != null)

-				pageSentence.replace("_should_find_orig_", "");

+    for (String fragment : allFragms) {

+      String followSent = null;

+      if (fragment.length() < 50)

+        continue;

+      String pageSentence = "";

+      // try to find original sentence from webpage

+      if (fragment.indexOf("_should_find_orig_") > -1 && sents != null

+          && sents.length > 0)

+        try {

+          String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(

+              fragment.replace("_should_find_orig_", ""), sents);

+          pageSentence = mainAndFollowSent[0];

+          followSent = mainAndFollowSent[1];

 

-			// resultant sentence SHOULD NOT be longer than twice the size of

-			// snippet fragment

-			if (pageSentence != null && (float) pageSentence.length() / (float) fragment.length() < 4.0)

-			{ // was 2.0, but since snippet sentences are rather short now...

-				try

-				{ // get score from syntactic match between sentence in

-					// original text and mined sentence

-					double measScore = 0.0, syntScore = 0.0, mentalScore = 0.0;

+        } catch (Exception e) {

 

-					SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence + " " + title, originalSentence);

-					List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

-					if (!matchRes.isVerbExists() || matchRes.isImperativeVerb())

-					{

-						System.out.println("Rejected Sentence : No verb OR Yes imperative verb :" + pageSentence);

-						continue;

-					}

+          // TODO Auto-generated catch block

+          e.printStackTrace();

+        }

+      else

+        // or get original snippet

+        pageSentence = fragment;

+      if (pageSentence != null)

+        pageSentence.replace("_should_find_orig_", "");

 

-					syntScore =parseTreeChunkListScorer.getParseTreeChunkListScore(match);

-					System.out.println(parseTreeChunk.listToString(match) + " " + syntScore

-							+ "\n pre-processed sent = '" + pageSentence);

+      // resultant sentence SHOULD NOT be longer than twice the size of

+      // snippet fragment

+      if (pageSentence != null

+          && (float) pageSentence.length() / (float) fragment.length() < 4.0) { // was

+                                                                                // 2.0,

+                                                                                // but

+                                                                                // since

+                                                                                // snippet

+                                                                                // sentences

+                                                                                // are

+                                                                                // rather

+                                                                                // short

+                                                                                // now...

+        try { // get score from syntactic match between sentence in

+              // original text and mined sentence

+          double measScore = 0.0, syntScore = 0.0, mentalScore = 0.0;

 

-					if (syntScore < 1.5)

-					{ // trying other sents

-						for (String currSent : sentsAll)

-						{

-							if (currSent.startsWith(originalSentence))

-								continue;

-							match = sm.assessRelevance(currSent, pageSentence).getMatchResult();

-							double syntScoreCurr = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

-							if (syntScoreCurr > syntScore)

-							{

-								syntScore = syntScoreCurr;

-							}

-						}

-						if (syntScore > 1.5)

-						{

-							System.out.println("Got match with other sent: " + parseTreeChunk.listToString(match) + " "

-									+ syntScore);

-						}

-					}

+          SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence

+              + " " + title, originalSentence);

+          List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

+          if (!matchRes.isVerbExists() || matchRes.isImperativeVerb()) {

+            System.out

+                .println("Rejected Sentence : No verb OR Yes imperative verb :"

+                    + pageSentence);

+            continue;

+          }

 

-					measScore = STRING_DISTANCE_MEASURER.measureStringDistance(originalSentence, pageSentence);

+          syntScore = parseTreeChunkListScorer

+              .getParseTreeChunkListScore(match);

+          System.out.println(parseTreeChunk.listToString(match) + " "

+              + syntScore + "\n pre-processed sent = '" + pageSentence);

 

-					// now possibly increase score by finding mental verbs

-					// indicating opinions

-					for (String s : MENTAL_VERBS)

-					{

-						if (pageSentence.indexOf(s) > -1)

-						{

-							mentalScore += 0.3;

-							break;

-						}

-					}

+          if (syntScore < 1.5) { // trying other sents

+            for (String currSent : sentsAll) {

+              if (currSent.startsWith(originalSentence))

+                continue;

+              match = sm.assessRelevance(currSent, pageSentence)

+                  .getMatchResult();

+              double syntScoreCurr = parseTreeChunkListScorer

+                  .getParseTreeChunkListScore(match);

+              if (syntScoreCurr > syntScore) {

+                syntScore = syntScoreCurr;

+              }

+            }

+            if (syntScore > 1.5) {

+              System.out.println("Got match with other sent: "

+                  + parseTreeChunk.listToString(match) + " " + syntScore);

+            }

+          }

 

-					if ((syntScore > 1.5 || measScore > 0.5 || mentalScore > 0.5) && measScore < 0.8

-							&& pageSentence.length() > 40) // >70

-					{

-						String pageSentenceProc = GeneratedSentenceProcessor.acceptableMinedSentence(pageSentence);

-						if (pageSentenceProc != null)

-						{

-							pageSentenceProc = GeneratedSentenceProcessor.processSentence(pageSentenceProc);

-							if (followSent != null)

-							{

-								pageSentenceProc += " " + GeneratedSentenceProcessor.processSentence(followSent);

-							}

+          measScore = STRING_DISTANCE_MEASURER.measureStringDistance(

+              originalSentence, pageSentence);

 

-							pageSentenceProc = Utils.convertToASCII(pageSentenceProc);

-							Fragment f = new Fragment(pageSentenceProc, syntScore + measScore + mentalScore

-									+ (double) pageSentenceProc.length() / (double) 50);

-							f.setSourceURL(item.getUrl());

-							f.fragment = fragment;

-							result.add(f);

-							System.out.println("Accepted sentence: " + pageSentenceProc + "| with title= " + title);

-							System.out.println("For fragment = " + fragment);

-						}

-						else

-							System.out.println("Rejected sentence due to wrong area at webpage: " + pageSentence);

-					}

-					else

-						System.out.println("Rejected sentence due to low score: " + pageSentence);

-					// }

-				}

-				catch (Throwable t)

-				{

-					t.printStackTrace();

-				}

-			}

-		}

-		item.setFragments(result);

-		return item;

-	}

+          // now possibly increase score by finding mental verbs

+          // indicating opinions

+          for (String s : MENTAL_VERBS) {

+            if (pageSentence.indexOf(s) > -1) {

+              mentalScore += 0.3;

+              break;

+            }

+          }

 

-	public static String[] cleanListOfSents(String[] sents)

-	{

-		List<String> sentsClean = new ArrayList<String>();

-		for (String s : sents)

-		{

-			if (s == null || s.trim().length() < 30 || s.length() < 20)

-				continue;

-			sentsClean.add(s);

-		}

-		return (String[]) sentsClean.toArray(new String[0]);

-	}

+          if ((syntScore > 1.5 || measScore > 0.5 || mentalScore > 0.5)

+              && measScore < 0.8 && pageSentence.length() > 40) // >70

+          {

+            String pageSentenceProc = GeneratedSentenceProcessor

+                .acceptableMinedSentence(pageSentence);

+            if (pageSentenceProc != null) {

+              pageSentenceProc = GeneratedSentenceProcessor

+                  .processSentence(pageSentenceProc);

+              if (followSent != null) {

+                pageSentenceProc += " "

+                    + GeneratedSentenceProcessor.processSentence(followSent);

+              }

 

-	// given a fragment from snippet, finds an original sentence at a webpage by optimizing alignmemt score

-	public static String[] getFullOriginalSentenceFromWebpageBySnippetFragment(String fragment, String[] sents)

-	{

-		if (fragment.trim().length() < 15)

-			return null;

+              pageSentenceProc = Utils.convertToASCII(pageSentenceProc);

+              Fragment f = new Fragment(pageSentenceProc, syntScore + measScore

+                  + mentalScore + (double) pageSentenceProc.length()

+                  / (double) 50);

+              f.setSourceURL(item.getUrl());

+              f.fragment = fragment;

+              result.add(f);

+              System.out.println("Accepted sentence: " + pageSentenceProc

+                  + "| with title= " + title);

+              System.out.println("For fragment = " + fragment);

+            } else

+              System.out

+                  .println("Rejected sentence due to wrong area at webpage: "

+                      + pageSentence);

+          } else

+            System.out.println("Rejected sentence due to low score: "

+                + pageSentence);

+          // }

+        } catch (Throwable t) {

+          t.printStackTrace();

+        }

+      }

+    }

+    item.setFragments(result);

+    return item;

+  }

 

-		StringDistanceMeasurer meas = new StringDistanceMeasurer();

-		Double dist = 0.0;

-		String result = null, followSent = null;

-		for (int i = 0; i < sents.length; i++)

-		{

-			String s = sents[i];

-			if (s == null || s.length() < 30)

-				continue;

-			Double distCurr = meas.measureStringDistance(s, fragment);

-			if (distCurr > dist && distCurr > 0.4)

-			{

-				result = s;

-				dist = distCurr;

-				if (i < sents.length - 1 && sents[i + 1].length() > 60)

-				{

-					followSent = sents[i + 1];

-				}

+  public static String[] cleanListOfSents(String[] sents) {

+    List<String> sentsClean = new ArrayList<String>();

+    for (String s : sents) {

+      if (s == null || s.trim().length() < 30 || s.length() < 20)

+        continue;

+      sentsClean.add(s);

+    }

+    return (String[]) sentsClean.toArray(new String[0]);

+  }

 

-			}

-		}

-		return new String[] { result, followSent };

-	}

+  // given a fragment from snippet, finds an original sentence at a webpage by

+  // optimizing alignmemt score

+  public static String[] getFullOriginalSentenceFromWebpageBySnippetFragment(

+      String fragment, String[] sents) {

+    if (fragment.trim().length() < 15)

+      return null;

 

-	// given a fragment from snippet, finds an original sentence at a webpage by optimizing alignmemt score

-	public static String[] getBestFullOriginalSentenceFromWebpageBySnippetFragment(String fragment, String[] sents)

-	{

-		if (fragment.trim().length() < 15)

-			return null;

-		int bestSentIndex = -1;

-		StringDistanceMeasurer meas = new StringDistanceMeasurer();

-		Double distBest = 10.0; // + sup

-		String result = null, followSent = null;

-		for (int i = 0; i < sents.length; i++)

-		{

-			String s = sents[i];

-			if (s == null || s.length() < 30)

-				continue;

-			Double distCurr = meas.measureStringDistance(s, fragment);

-			if (distCurr>distBest){

-				distBest = distCurr;

-				bestSentIndex = i;			

-			}

+    StringDistanceMeasurer meas = new StringDistanceMeasurer();

+    Double dist = 0.0;

+    String result = null, followSent = null;

+    for (int i = 0; i < sents.length; i++) {

+      String s = sents[i];

+      if (s == null || s.length() < 30)

+        continue;

+      Double distCurr = meas.measureStringDistance(s, fragment);

+      if (distCurr > dist && distCurr > 0.4) {

+        result = s;

+        dist = distCurr;

+        if (i < sents.length - 1 && sents[i + 1].length() > 60) {

+          followSent = sents[i + 1];

+        }

 

-		}

-		if (distBest > 0.4)

-		{

-			result = sents[bestSentIndex];

+      }

+    }

+    return new String[] { result, followSent };

+  }

 

-			if (bestSentIndex < sents.length - 1 && sents[bestSentIndex + 1].length() > 60)

-			{

-				followSent = sents[bestSentIndex + 1];

-			}

+  // given a fragment from snippet, finds an original sentence at a webpage by

+  // optimizing alignmemt score

+  public static String[] getBestFullOriginalSentenceFromWebpageBySnippetFragment(

+      String fragment, String[] sents) {

+    if (fragment.trim().length() < 15)

+      return null;

+    int bestSentIndex = -1;

+    StringDistanceMeasurer meas = new StringDistanceMeasurer();

+    Double distBest = 10.0; // + sup

+    String result = null, followSent = null;

+    for (int i = 0; i < sents.length; i++) {

+      String s = sents[i];

+      if (s == null || s.length() < 30)

+        continue;

+      Double distCurr = meas.measureStringDistance(s, fragment);

+      if (distCurr > distBest) {

+        distBest = distCurr;

+        bestSentIndex = i;

+      }

 

-		}

+    }

+    if (distBest > 0.4) {

+      result = sents[bestSentIndex];

 

-		return new String[] { result, followSent };

-	}

+      if (bestSentIndex < sents.length - 1

+          && sents[bestSentIndex + 1].length() > 60) {

+        followSent = sents[bestSentIndex + 1];

+      }

 

-	public static void main(String[] args)

-	{

-		RelatedSentenceFinder f = new RelatedSentenceFinder();

+    }

 

-		List<HitBase> hits = null; 

-		try

-		{

-			// uncomment the sentence you would like to serve as a seed sentence for content generation for an event description

+    return new String[] { result, followSent };

+  }

 

-			// uncomment the sentence you would like to serve as a seed sentence for content generation for an event description

-			hits = f.generateContentAbout(

-					"Albert Einstein"

-					//"Britney Spears - The Femme Fatale Tour"

-					// "Rush Time Machine",

-					// "Blue Man Group" ,

-					// "Belly Dance With Zaharah",

-					// "Hollander Musicology Lecture: Danielle Fosler-Lussier, Guest Lecturer",

-					// "Jazz Master and arguably the most famous jazz musician alive, trumpeter Wynton Marsalis",

-			);

-			System.out.println(HitBase.toString(hits));

-			System.out.println(HitBase.toResultantString(hits));

-			//WordFileGenerator.createWordDoc("Essey about Albert Einstein", hits.get(0).getTitle(), hits);

+  public static void main(String[] args) {

+    RelatedSentenceFinder f = new RelatedSentenceFinder();

 

+    List<HitBase> hits = null;

+    try {

+      // uncomment the sentence you would like to serve as a seed sentence for

+      // content generation for an event description

 

+      // uncomment the sentence you would like to serve as a seed sentence for

+      // content generation for an event description

+      hits = f.generateContentAbout("Albert Einstein"

+      // "Britney Spears - The Femme Fatale Tour"

+      // "Rush Time Machine",

+      // "Blue Man Group" ,

+      // "Belly Dance With Zaharah",

+      // "Hollander Musicology Lecture: Danielle Fosler-Lussier, Guest Lecturer",

+      // "Jazz Master and arguably the most famous jazz musician alive, trumpeter Wynton Marsalis",

+          );

+      System.out.println(HitBase.toString(hits));

+      System.out.println(HitBase.toResultantString(hits));

+      // WordFileGenerator.createWordDoc("Essey about Albert Einstein",

+      // hits.get(0).getTitle(), hits);

 

-		}

-		catch (Exception e)

-		{

-			e.printStackTrace();

-		}

+    } catch (Exception e) {

+      e.printStackTrace();

+    }

 

-	}

+  }

 

 }
\ No newline at end of file

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SearchResultsProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SearchResultsProcessor.java
index d81cc23..9886807 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SearchResultsProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SearchResultsProcessor.java

@@ -26,86 +26,88 @@
 import opennlp.tools.textsimilarity.SentencePairMatchResult;

 import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

 

-

 public class SearchResultsProcessor extends BingWebQueryRunner {

-	private static Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.SearchResultsProcessor");

-	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

-	ParserChunker2MatcherProcessor sm ;

+  private static Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.SearchResultsProcessor");

+  private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+  ParserChunker2MatcherProcessor sm;

 

-	/*

-	 * Takes Bing API search results and calculates the parse tree similarity between the question and each snippet.

-	 * Ranks those snippets with higher similarity score up

-	 */

-	private	BingResponse calculateMatchScoreResortHits(BingResponse resp, String searchQuery){

-		// TODO

-		/*if query is multi-sentence, special handling

-		int indexDot = searchQuery.indexOf("."); 

-		if (indexDot>0 && indexDot<searchQuery.length()-1){

-			MultipleSentenceQueryAnswerer ans = new MultipleSentenceQueryAnswerer();

-			return ans.calculateMatchScoreResortHits(resp, searchQuery);		

-		} */

-		List<HitBase> newHitList =	new ArrayList<HitBase>();

-		sm = ParserChunker2MatcherProcessor.getInstance();

+  /*

+   * Takes Bing API search results and calculates the parse tree similarity

+   * between the question and each snippet. Ranks those snippets with higher

+   * similarity score up

+   */

+  private BingResponse calculateMatchScoreResortHits(BingResponse resp,

+      String searchQuery) {

+    // TODO

+    /*

+     * if query is multi-sentence, special handling int indexDot =

+     * searchQuery.indexOf("."); if (indexDot>0 &&

+     * indexDot<searchQuery.length()-1){ MultipleSentenceQueryAnswerer ans = new

+     * MultipleSentenceQueryAnswerer(); return

+     * ans.calculateMatchScoreResortHits(resp, searchQuery); }

+     */

+    List<HitBase> newHitList = new ArrayList<HitBase>();

+    sm = ParserChunker2MatcherProcessor.getInstance();

 

-		for(HitBase hit: resp.getHits()){

-			String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ").replace("<b>", "").replace("</b>","");

-			snapshot=snapshot.replace("</B>", "").replace("<B>", "").replace("<br>", "").replace("</br>", "").replace("...", ". ").replace("|", " ").replace(">", " ");

-			snapshot+=" . "+hit.getTitle();

-			Double score = 0.0;

-			try {

-				SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);

-				List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

-				score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

-				LOG.finest(score + " | "+ snapshot );

-			}

-			catch (Exception e){

-				LOG.severe("Problem processing snapshot "+snapshot);

-				e.printStackTrace();

-			}

-			hit.setGenerWithQueryScore(score);

-			newHitList.add(hit);

-		}

-		Collections.sort(newHitList,new HitBaseComparable());

-		resp.setHits(newHitList);

-		LOG.info("\n\n ============= NEW ORDER ================= ");

-		for(HitBase hit: newHitList){

-			LOG.info(hit.toString());

-		}

+    for (HitBase hit : resp.getHits()) {

+      String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ")

+          .replace("<b>", "").replace("</b>", "");

+      snapshot = snapshot.replace("</B>", "").replace("<B>", "")

+          .replace("<br>", "").replace("</br>", "").replace("...", ". ")

+          .replace("|", " ").replace(">", " ");

+      snapshot += " . " + hit.getTitle();

+      Double score = 0.0;

+      try {

+        SentencePairMatchResult matchRes = sm.assessRelevance(snapshot,

+            searchQuery);

+        List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

+        score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

+        LOG.finest(score + " | " + snapshot);

+      } catch (Exception e) {

+        LOG.severe("Problem processing snapshot " + snapshot);

+        e.printStackTrace();

+      }

+      hit.setGenerWithQueryScore(score);

+      newHitList.add(hit);

+    }

+    Collections.sort(newHitList, new HitBaseComparable());

+    resp.setHits(newHitList);

+    LOG.info("\n\n ============= NEW ORDER ================= ");

+    for (HitBase hit : newHitList) {

+      LOG.info(hit.toString());

+    }

 

+    return resp;

+  }

 

-		return resp; 

-	}

-	

-	public void close(){

-		sm.close();

-	}

+  public void close() {

+    sm.close();

+  }

 

-	public List<HitBase> runSearch(String query) {

-		BingResponse resp = null, // obtained from bing

-		newResp = null; // re-sorted based on similarity

-		try {

-			List<String> resultList = search(query, "", "", 30);

-			resp = populateBingHit(resultList.get(0));

-			// now we apply our own relevance filter

-			newResp=calculateMatchScoreResortHits(resp, query);

+  public List<HitBase> runSearch(String query) {

+    BingResponse resp = null, // obtained from bing

+    newResp = null; // re-sorted based on similarity

+    try {

+      List<String> resultList = search(query, "", "", 30);

+      resp = populateBingHit(resultList.get(0));

+      // now we apply our own relevance filter

+      newResp = calculateMatchScoreResortHits(resp, query);

 

-		} catch (Exception e) {

-			// e.printStackTrace();

-			LOG.info("No search results for query '" + query);

-			e.printStackTrace();

-			return null;

-		}

-		// cast to super class

-		List<HitBase> hits = new ArrayList<HitBase>();

-		for (HitBase h : resp.getHits())

-			hits.add((HitBase) h);

+    } catch (Exception e) {

+      // e.printStackTrace();

+      LOG.info("No search results for query '" + query);

+      e.printStackTrace();

+      return null;

+    }

+    // cast to super class

+    List<HitBase> hits = new ArrayList<HitBase>();

+    for (HitBase h : resp.getHits())

+      hits.add((HitBase) h);

 

-		hits = removeDuplicates(hits, 0.9);

+    hits = removeDuplicates(hits, 0.9);

 

-		return hits;

-	}

-

-

-

+    return hits;

+  }

 

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java
index ad02894..48eb9fe 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java

@@ -27,129 +27,148 @@
 import opennlp.tools.textsimilarity.SentencePairMatchResult;

 import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

 

-

 public class SpeechRecognitionResultsProcessor extends BingWebQueryRunner {

-	private static Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.SpeechRecognitionResultsProcessor");

-	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

-	ParserChunker2MatcherProcessor sm;

+  private static Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.SpeechRecognitionResultsProcessor");

+  private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+  ParserChunker2MatcherProcessor sm;

 

-	/**

-	 * Gets an expression and tries to find it on the web. If search results are syntactically similar to this phrase, then 

-	 * we conclude that this phrase is meaningful (makes sense, someone have said something similar. If search results ate not similar 

-	 * to this phrase, we conclude that the phrase is meaningless (does not make sense, nobody has ever said something like that)

-	 * @param resp BingResponse, search results for a phrase being assesses with respect to meaningfulness

-	 * @param searchQuery the phrase we are assessing

-	 * @return total similarity score for all search results

-	 */

-	private	double calculateTotalMatchScoreForHits(BingResponse resp, String searchQuery){

-		

-		sm = ParserChunker2MatcherProcessor.getInstance();

-		double totalMatchScore = 0;

-		for(HitBase hit: resp.getHits()){

-			String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ").replace("<b>", "").replace("</b>","");

-			snapshot=snapshot.replace("</B>", "").replace("<B>", "").replace("<br>", "").replace("</br>", "").replace("...", ". ").replace("|", " ").replace(">", " ");

-			snapshot+=" . "+hit.getTitle();

-			Double score = 0.0;

-			try {

-				SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);

-				List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

-				score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

-				if (score > 1.5) {

-					LOG.info(score + " | "+ match );

-				}

-			}

-			catch (Exception e){

-				LOG.severe("Problem processing snapshot "+snapshot);

-				e.printStackTrace();

-			}

-			 totalMatchScore+=score;

-			

-		}

-		

-		return  totalMatchScore; 

-	}

-	

-	public void close(){

-		sm.close();

-	}

-	

-	/**

-	 * phrase meaningfulness assessment function which takes a list of phrases which are speech recognition results and 

-	 * re-ranks these phrases according to the meaningfulness score which is determined by 'calculateTotalMatchScoreForHits'

-	 * @param sents list of phrases which are speech recognition results

-	 * @return re-ranked list of phrases which are speech recognition results (from more meaningfulness to less meaningfulness)

-	 */

-	public List<SentenceMeaningfullnessScore> runSearchAndScoreMeaningfulness(List<String> sents) {

-		List<SentenceMeaningfullnessScore> res = new ArrayList<SentenceMeaningfullnessScore>();

-		double bestSentScore = -1; 

-		String bestSent = null;

-		for(String sentence : sents){

-			BingResponse resp = null, // obtained from bing

-			newResp = null; // re-sorted based on similarity

-			try {

-				List<String> resultList = search(sentence, "", "", 10);

-				resp = populateBingHit(resultList.get(0));

-				double scoreForSentence = calculateTotalMatchScoreForHits(resp, sentence);

-				System.out.println("Total meaningfulness score = "+scoreForSentence + " for sentence = "+sentence);

-				if (scoreForSentence > bestSentScore){

-					bestSentScore = scoreForSentence;

-					bestSent = sentence;

-				}

-				res.add(new SentenceMeaningfullnessScore(sentence, scoreForSentence));

-			} catch (Exception e) {

-				// e.printStackTrace();

-				LOG.info("No search results for query '" + sentence);

-				e.printStackTrace();

-				return null;

-			}

-		}

-		return res;

-		

-	}

+  /**

+   * Gets an expression and tries to find it on the web. If search results are

+   * syntactically similar to this phrase, then we conclude that this phrase is

+   * meaningful (makes sense, someone have said something similar. If search

+   * results ate not similar to this phrase, we conclude that the phrase is

+   * meaningless (does not make sense, nobody has ever said something like that)

+   * 

+   * @param resp

+   *          BingResponse, search results for a phrase being assesses with

+   *          respect to meaningfulness

+   * @param searchQuery

+   *          the phrase we are assessing

+   * @return total similarity score for all search results

+   */

+  private double calculateTotalMatchScoreForHits(BingResponse resp,

+      String searchQuery) {

 

-	public class SentenceMeaningfullnessScore{

-		String sentence;

-		double score;

-		public SentenceMeaningfullnessScore(String sent, double sc){

-			sentence = sent;

-			score = sc;

-		}

-		public String toString(){

-			return "Total meaningfulness score = "+score + " for sentence = "+sentence +"\n";

-		}

-		public double getScore(){

-			return score;

-		}

-	}

- public static void main(String[] args){

-	 SpeechRecognitionResultsProcessor proc = new  SpeechRecognitionResultsProcessor();

-	 proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{

-			 "meeting with alex at you for not to come over to 8 pm",

-			 "meeting with alex at you for not to come over to eat",

-			 "meeting with alex at il fornaio tomorrow to 8 pm"

-	 }));

-	 

-	 proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{

-			 "remember to buy milk tomorrow for details",

-			 "remember to buy milk tomorrow from trader joes",

-			 "remember to buy milk tomorrow from 3 to jones",

-			 "remember to buy milk tomorrow for for details",

-			 "remember to buy milk tomorrow from third to joes",

-			 "remember to buy milk tomorrow from third to jones",

-			 "remember to buy milk tomorrow from for d jones"

-	 }));

-	 

-	 proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{

-	 "I'm off tomorrow to shop at trader joes",

-	 "number to get milk tomorrow trader joes",

-	 "number 2 finals tomorrow from trader joes",

-	 "number 2 finals tomorrow trader joes",

-	 "number to buy move tomorrow from trader joes",

-	 "number to buy move tomorrow trader joes",

-	 "define move tomorrow from trader joes",

-	 "define move tomorrow trader joes",

-	 }));

- }

+    sm = ParserChunker2MatcherProcessor.getInstance();

+    double totalMatchScore = 0;

+    for (HitBase hit : resp.getHits()) {

+      String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ")

+          .replace("<b>", "").replace("</b>", "");

+      snapshot = snapshot.replace("</B>", "").replace("<B>", "")

+          .replace("<br>", "").replace("</br>", "").replace("...", ". ")

+          .replace("|", " ").replace(">", " ");

+      snapshot += " . " + hit.getTitle();

+      Double score = 0.0;

+      try {

+        SentencePairMatchResult matchRes = sm.assessRelevance(snapshot,

+            searchQuery);

+        List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

+        score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

+        if (score > 1.5) {

+          LOG.info(score + " | " + match);

+        }

+      } catch (Exception e) {

+        LOG.severe("Problem processing snapshot " + snapshot);

+        e.printStackTrace();

+      }

+      totalMatchScore += score;

 

+    }

+

+    return totalMatchScore;

+  }

+

+  public void close() {

+    sm.close();

+  }

+

+  /**

+   * phrase meaningfulness assessment function which takes a list of phrases

+   * which are speech recognition results and re-ranks these phrases according

+   * to the meaningfulness score which is determined by

+   * 'calculateTotalMatchScoreForHits'

+   * 

+   * @param sents

+   *          list of phrases which are speech recognition results

+   * @return re-ranked list of phrases which are speech recognition results

+   *         (from more meaningfulness to less meaningfulness)

+   */

+  public List<SentenceMeaningfullnessScore> runSearchAndScoreMeaningfulness(

+      List<String> sents) {

+    List<SentenceMeaningfullnessScore> res = new ArrayList<SentenceMeaningfullnessScore>();

+    double bestSentScore = -1;

+    String bestSent = null;

+    for (String sentence : sents) {

+      BingResponse resp = null, // obtained from bing

+      newResp = null; // re-sorted based on similarity

+      try {

+        List<String> resultList = search(sentence, "", "", 10);

+        resp = populateBingHit(resultList.get(0));

+        double scoreForSentence = calculateTotalMatchScoreForHits(resp,

+            sentence);

+        System.out.println("Total meaningfulness score = " + scoreForSentence

+            + " for sentence = " + sentence);

+        if (scoreForSentence > bestSentScore) {

+          bestSentScore = scoreForSentence;

+          bestSent = sentence;

+        }

+        res.add(new SentenceMeaningfullnessScore(sentence, scoreForSentence));

+      } catch (Exception e) {

+        // e.printStackTrace();

+        LOG.info("No search results for query '" + sentence);

+        e.printStackTrace();

+        return null;

+      }

+    }

+    return res;

+

+  }

+

+  public class SentenceMeaningfullnessScore {

+    String sentence;

+    double score;

+

+    public SentenceMeaningfullnessScore(String sent, double sc) {

+      sentence = sent;

+      score = sc;

+    }

+

+    public String toString() {

+      return "Total meaningfulness score = " + score + " for sentence = "

+          + sentence + "\n";

+    }

+

+    public double getScore() {

+      return score;

+    }

+  }

+

+  public static void main(String[] args) {

+    SpeechRecognitionResultsProcessor proc = new SpeechRecognitionResultsProcessor();

+    proc.runSearchAndScoreMeaningfulness(Arrays.asList(new String[] {

+        "meeting with alex at you for not to come over to 8 pm",

+        "meeting with alex at you for not to come over to eat",

+        "meeting with alex at il fornaio tomorrow to 8 pm" }));

+

+    proc.runSearchAndScoreMeaningfulness(Arrays.asList(new String[] {

+        "remember to buy milk tomorrow for details",

+        "remember to buy milk tomorrow from trader joes",

+        "remember to buy milk tomorrow from 3 to jones",

+        "remember to buy milk tomorrow for for details",

+        "remember to buy milk tomorrow from third to joes",

+        "remember to buy milk tomorrow from third to jones",

+        "remember to buy milk tomorrow from for d jones" }));

+

+    proc.runSearchAndScoreMeaningfulness(Arrays.asList(new String[] {

+        "I'm off tomorrow to shop at trader joes",

+        "number to get milk tomorrow trader joes",

+        "number 2 finals tomorrow from trader joes",

+        "number 2 finals tomorrow trader joes",

+        "number to buy move tomorrow from trader joes",

+        "number to buy move tomorrow trader joes",

+        "define move tomorrow from trader joes",

+        "define move tomorrow trader joes", }));

+  }

 

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
index 32b65d8..1347a2b 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java

@@ -18,32 +18,21 @@
 package opennlp.tools.similarity.apps;

 

 public class StoryDiscourseNavigator {

-	public static final String[] frequentPerformingVerbs = { 

-		" born raised meet learn ", 

-		" graduated enter discover",

-		" facts inventions life ", 

-		"accomplishments childhood timeline",

-		" acquire befriend encounter",

-		" achieve reache describe ",

-		" invent innovate improve ",

-		" impress outstanding award",

-		" curous sceptical pessimistic",

-		" spend enroll assume point",

-		" explain discuss dispute",

-		" learn teach study investigate",

-		" propose suggest indicate",

-		" pioneer explorer discoverer ",

-		" advance promote lead",

-		" direct control simulate ",

-		" guide lead assist ",

-		" inspire first initial",

-		" vision predict foresee",

-		" prediction inspiration achievement",

-		" approve agree confirm",

-		" deny argue disagree",

-		" emotional loud imagination",

-		" release announce celebrate discover", "introduce enjoy follow",

-		" open present show", "meet enjoy follow create", "discover continue produce" 

-		

-		};

+  public static final String[] frequentPerformingVerbs = {

+      " born raised meet learn ", " graduated enter discover",

+      " facts inventions life ", "accomplishments childhood timeline",

+      " acquire befriend encounter", " achieve reache describe ",

+      " invent innovate improve ", " impress outstanding award",

+      " curous sceptical pessimistic", " spend enroll assume point",

+      " explain discuss dispute", " learn teach study investigate",

+      " propose suggest indicate", " pioneer explorer discoverer ",

+      " advance promote lead", " direct control simulate ",

+      " guide lead assist ", " inspire first initial",

+      " vision predict foresee", " prediction inspiration achievement",

+      " approve agree confirm", " deny argue disagree",

+      " emotional loud imagination", " release announce celebrate discover",

+      "introduce enjoy follow", " open present show",

+      "meet enjoy follow create", "discover continue produce"

+

+  };

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/AriAdapter.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/AriAdapter.java
index fd4b67f..17b9eab 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/AriAdapter.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/AriAdapter.java

@@ -24,63 +24,70 @@
 import java.util.List;

 import java.util.Map;

 

-

 /**

- * This class makes it possible to use old prolog-files as the bases for taxonomy-learner.

- * It cleans the prolog files and returns with Strings which can be used for the taxonomy extender process.

- *

+ * This class makes it possible to use old prolog-files as the bases for

+ * taxonomy-learner. It cleans the prolog files and returns with Strings which

+ * can be used for the taxonomy extender process.

+ * 

  */

 public class AriAdapter {

-	//income_taks(state,company(cafeteria,_)):-do(71100).

-	Map<String, List<List<String>>> lemma_AssocWords = new HashMap<String, List<List<String>>>();

-	public void getChainsFromARIfile(String fileName) {

-		

-	    try {

-	        BufferedReader br = new BufferedReader( new InputStreamReader(new FileInputStream(fileName)));

-	        String line;

-	        while((line = br.readLine()) != null) {

-		       	if (line.length()<10 || line.startsWith("%") || line.startsWith(":"))

-		        	continue;

-	           String chain0 = line.replace("_,", "&").replace("_)", "&").replace(":-do(", "&").replace(":-var","&").

-	                    replace("taks","tax").

-	           			replace(":- do(", "&").replace("X=","&").replace(":-","&").replace("[X|_]","&").replace("nonvar","&").replace("var","&").

-	           					replace('(', '&').replace(')', '&').replace(',', '&').replace('.', '&').

-	           					replace("&&&","&").replace("&&","&").replace("&"," ");

-	           String[] chains = chain0.split(" ");

-	           List<String> chainList = new ArrayList<String>(); //Arrays.asList(chains);

-	           for(String word: chains){

-	        	   if (word!=null && word.length()>2 && word.indexOf("0")<0 && word.indexOf("1")<0 && word.indexOf("2")<0 

-	        			   && word.indexOf("3")<0 && word.indexOf("4")<0 && word.indexOf("5")<0 )

-	        		   chainList.add(word);

-	           }

-	           if (chains.length<1 || chainList.size()<1 || chainList.get(0).length()<3)

-	        	   continue;

-	           String entry = chainList.get(0);

-	           if (entry.length()<3)

-	           	  continue;

-	           chainList.remove(entry);

-	           List<List<String>> res =  lemma_AssocWords.get(entry);

-	           if (res==null){

-	        	   List<List<String>> resList = new ArrayList<List<String>>();

-	        	   resList.add(chainList);

-	        	   lemma_AssocWords.put(entry, resList);

-	           } else {

-	        	   res.add(chainList);

-	        	   lemma_AssocWords.put(entry, res);

-	           }

-	        }

-	     }catch (Exception e){

-	          e.printStackTrace();

+  // income_taks(state,company(cafeteria,_)):-do(71100).

+  Map<String, List<List<String>>> lemma_AssocWords = new HashMap<String, List<List<String>>>();

 

-	      }

-	  }

+  public void getChainsFromARIfile(String fileName) {

 

-	  public static void main(String[] args){

-		  

-		  AriAdapter ad = new AriAdapter();

-	      ad.getChainsFromARIfile("src/test/resources/taxonomies/irs_dom.ari");

-	      System.out.println(ad.lemma_AssocWords);

-	      

-	  }

+    try {

+      BufferedReader br = new BufferedReader(new InputStreamReader(

+          new FileInputStream(fileName)));

+      String line;

+      while ((line = br.readLine()) != null) {

+        if (line.length() < 10 || line.startsWith("%") || line.startsWith(":"))

+          continue;

+        String chain0 = line.replace("_,", "&").replace("_)", "&")

+            .replace(":-do(", "&").replace(":-var", "&").replace("taks", "tax")

+            .replace(":- do(", "&").replace("X=", "&").replace(":-", "&")

+            .replace("[X|_]", "&").replace("nonvar", "&").replace("var", "&")

+            .replace('(', '&').replace(')', '&').replace(',', '&')

+            .replace('.', '&').replace("&&&", "&").replace("&&", "&")

+            .replace("&", " ");

+        String[] chains = chain0.split(" ");

+        List<String> chainList = new ArrayList<String>(); // Arrays.asList(chains);

+        for (String word : chains) {

+          if (word != null && word.length() > 2 && word.indexOf("0") < 0

+              && word.indexOf("1") < 0 && word.indexOf("2") < 0

+              && word.indexOf("3") < 0 && word.indexOf("4") < 0

+              && word.indexOf("5") < 0)

+            chainList.add(word);

+        }

+        if (chains.length < 1 || chainList.size() < 1

+            || chainList.get(0).length() < 3)

+          continue;

+        String entry = chainList.get(0);

+        if (entry.length() < 3)

+          continue;

+        chainList.remove(entry);

+        List<List<String>> res = lemma_AssocWords.get(entry);

+        if (res == null) {

+          List<List<String>> resList = new ArrayList<List<String>>();

+          resList.add(chainList);

+          lemma_AssocWords.put(entry, resList);

+        } else {

+          res.add(chainList);

+          lemma_AssocWords.put(entry, res);

+        }

+      }

+    } catch (Exception e) {

+      e.printStackTrace();

+

+    }

+  }

+

+  public static void main(String[] args) {

+

+    AriAdapter ad = new AriAdapter();

+    ad.getChainsFromARIfile("src/test/resources/taxonomies/irs_dom.ari");

+    System.out.println(ad.lemma_AssocWords);

+

+  }

 

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/Languages.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/Languages.java
deleted file mode 100644
index 0467150..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/Languages.java
+++ /dev/null

@@ -1,21 +0,0 @@
-/*

- * Licensed to the Apache Software Foundation (ASF) under one or more

- * contributor license agreements.  See the NOTICE file distributed with

- * this work for additional information regarding copyright ownership.

- * The ASF licenses this file to You under the Apache License, Version 2.0

- * (the "License"); you may not use this file except in compliance with

- * the License. You may obtain a copy of the License at

- *

- *     http://www.apache.org/licenses/LICENSE-2.0

- *

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-package opennlp.tools.similarity.apps.taxo_builder;

-

-public enum Languages {

-	ENGLISH,SPANISH,GERMAN,FRENCH,ITALIAN

-}


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
index 367dbe3..3c6fc59 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java

@@ -120,11 +120,13 @@
 

   }

 

-  public void xmlWork (){

-    TaxoQuerySnapshotMatcher matcher = new TaxoQuerySnapshotMatcher("src/test/resources/taxonomies/irs_domTaxo.dat");

+  public void xmlWork() {

+    TaxoQuerySnapshotMatcher matcher = new TaxoQuerySnapshotMatcher(

+        "src/test/resources/taxonomies/irs_domTaxo.dat");

     XStream xStream = new XStream();

     FileHandler fileHandler = new FileHandler();

-    matcher.taxo = (TaxonomySerializer)xStream.fromXML(fileHandler.readFromTextFile("src/test/resources/taxo_English.xml"));

+    matcher.taxo = (TaxonomySerializer) xStream.fromXML(fileHandler

+        .readFromTextFile("src/test/resources/taxo_English.xml"));

   }

 

   public void close() {


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomySerializer.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomySerializer.java
index c1c8ddb..16e9fb2 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomySerializer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomySerializer.java

@@ -28,71 +28,78 @@
 

 /**

  * This class stores the taxonomy on the file-system

+ * 

  * @author Boris

- *

+ * 

  */

 public class TaxonomySerializer implements Serializable {

-	

-	private static final long serialVersionUID = 7431412616514648388L;

-	private Map<String, List<List<String>>> lemma_ExtendedAssocWords = new HashMap<String, List<List<String>>>();

-	private Map<List<String>, List<List<String>>> assocWords_ExtendedAssocWords = new HashMap<List<String>, List<List<String>>>();

-	

-	

-	public TaxonomySerializer(

-			Map<String, List<List<String>>> lemma_ExtendedAssocWords,

-			Map<List<String>, List<List<String>>> assocWords_ExtendedAssocWords) {

-		

-		this.lemma_ExtendedAssocWords = lemma_ExtendedAssocWords;

-		this.assocWords_ExtendedAssocWords = assocWords_ExtendedAssocWords;

-	}

-	public TaxonomySerializer() {

-		// TODO Auto-generated constructor stub

-	}

-	public Map<List<String>, List<List<String>>> getAssocWords_ExtendedAssocWords() {

-		return assocWords_ExtendedAssocWords;

-	}

-	public Map<String, List<List<String>>> getLemma_ExtendedAssocWords() {

-		return lemma_ExtendedAssocWords;

-	}

-	public void setLemma_ExtendedAssocWords(

-			Map<String, List<List<String>>> lemma_ExtendedAssocWords) {

-		this.lemma_ExtendedAssocWords = lemma_ExtendedAssocWords;

-	}

-	public void setAssocWords_ExtendedAssocWords(

-			Map<List<String>, List<List<String>>> assocWords_ExtendedAssocWords) {

-		this.assocWords_ExtendedAssocWords = assocWords_ExtendedAssocWords;

-	}

-	

-	public void writeTaxonomy(String filename){

-		FileOutputStream fos = null;

-		ObjectOutputStream out = null;

-		try {

-		  fos = new FileOutputStream(filename);

-		  out = new ObjectOutputStream(fos);

-		  out.writeObject(this);

-		  out.close();

-		}

-		catch(IOException ex)   {     ex.printStackTrace();   }

 

-	}

-	

-	public static TaxonomySerializer readTaxonomy(String filename){

-		TaxonomySerializer data = null;

-		FileInputStream fis = null;

-	    ObjectInputStream in = null;

-		try

-		{

-		   fis = new FileInputStream(filename);

-		   in = new ObjectInputStream(fis);

-		   data = (TaxonomySerializer)in.readObject();

-		   in.close();

-		}

-		catch(IOException ex) {  ex.printStackTrace();  }

-		catch(ClassNotFoundException ex) {  ex.printStackTrace();  }

-		

-		//System.out.print(data.lemma_ExtendedAssocWords);

-		

-		return data;

+  private static final long serialVersionUID = 7431412616514648388L;

+  private Map<String, List<List<String>>> lemma_ExtendedAssocWords = new HashMap<String, List<List<String>>>();

+  private Map<List<String>, List<List<String>>> assocWords_ExtendedAssocWords = new HashMap<List<String>, List<List<String>>>();

 

-	}

+  public TaxonomySerializer(

+      Map<String, List<List<String>>> lemma_ExtendedAssocWords,

+      Map<List<String>, List<List<String>>> assocWords_ExtendedAssocWords) {

+

+    this.lemma_ExtendedAssocWords = lemma_ExtendedAssocWords;

+    this.assocWords_ExtendedAssocWords = assocWords_ExtendedAssocWords;

+  }

+

+  public TaxonomySerializer() {

+    // TODO Auto-generated constructor stub

+  }

+

+  public Map<List<String>, List<List<String>>> getAssocWords_ExtendedAssocWords() {

+    return assocWords_ExtendedAssocWords;

+  }

+

+  public Map<String, List<List<String>>> getLemma_ExtendedAssocWords() {

+    return lemma_ExtendedAssocWords;

+  }

+

+  public void setLemma_ExtendedAssocWords(

+      Map<String, List<List<String>>> lemma_ExtendedAssocWords) {

+    this.lemma_ExtendedAssocWords = lemma_ExtendedAssocWords;

+  }

+

+  public void setAssocWords_ExtendedAssocWords(

+      Map<List<String>, List<List<String>>> assocWords_ExtendedAssocWords) {

+    this.assocWords_ExtendedAssocWords = assocWords_ExtendedAssocWords;

+  }

+

+  public void writeTaxonomy(String filename) {

+    FileOutputStream fos = null;

+    ObjectOutputStream out = null;

+    try {

+      fos = new FileOutputStream(filename);

+      out = new ObjectOutputStream(fos);

+      out.writeObject(this);

+      out.close();

+    } catch (IOException ex) {

+      ex.printStackTrace();

+    }

+

+  }

+

+  public static TaxonomySerializer readTaxonomy(String filename) {

+    TaxonomySerializer data = null;

+    FileInputStream fis = null;

+    ObjectInputStream in = null;

+    try {

+      fis = new FileInputStream(filename);

+      in = new ObjectInputStream(fis);

+      data = (TaxonomySerializer) in.readObject();

+      in.close();

+    } catch (IOException ex) {

+      ex.printStackTrace();

+    } catch (ClassNotFoundException ex) {

+      ex.printStackTrace();

+    }

+

+    // System.out.print(data.lemma_ExtendedAssocWords);

+

+    return data;

+

+  }

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
index a075f69..adb5321 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java

@@ -36,333 +36,351 @@
 import java.util.List;

 import org.apache.log4j.Logger;

 

-

 /**

- *This class responsible to save data to files as well as read out!

- *It is capable to handle text and binary files. 

+ * This class responsible to save data to files as well as read out! It is

+ * capable to handle text and binary files.

  */

 public class FileHandler {

-	

-	private static Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.utils.FileHandler");

-       

-	

-	public  void writeToTextFile(String data,String filepath,boolean append) throws IOException {

-		try{

-			BufferedWriter out = new BufferedWriter(new FileWriter(filepath, append));

-			out.write(data + "\n");

-            out.close();

-            } catch (IOException e) {

-                	LOG.error(e);

-                	e.printStackTrace();

-            }

-	}

-	/**

-	 * Writes data from an arrayList<String> to a text-file where each line of the text represented by an element in the list.

-	 * @param list

-	 * @param filePath

-	 * @param append

-	 * @throws Exception

-	 */

-	public  void writeToTextFile(ArrayList<String> list, String filePath, boolean append)	throws Exception {

-		FileWriter outFile = null;

-		Iterator<String> it = list.iterator();

-		if (!append) {

-			outFile = new FileWriter(filePath);

-			PrintWriter out = new PrintWriter(outFile);

-			while (it.hasNext()) {

-				out.println((String) it.next());

-			}

-			outFile.close();

-		} else {

-			int tmp = 0;

-			while (it.hasNext()) {

-				if (tmp == 0) {

-					appendtofile("\n" + (String) it.next(), filePath);

-				} else {

-					appendtofile((String) it.next(), filePath);

-				}

-				tmp++;

-			}

-		}

-	}

 

-     public  void writeObjectToFile(Object obj, String filepath, boolean append) {

-    	 	if(!isFileOrDirectoryExists(getDirPathfromFullPath(filepath))){

-    	 		createFolder(getDirPathfromFullPath(filepath));

-    	 	}

-    	 	ObjectOutputStream outputStream = null;

-         try {

-        	 outputStream = new ObjectOutputStream(new FileOutputStream(filepath));

-             outputStream.writeObject(obj);

-             } catch (IOException e) {

-            	 LOG.error(e);

-             }

+  private static Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.utils.FileHandler");

+

+  public void writeToTextFile(String data, String filepath, boolean append)

+      throws IOException {

+    try {

+      BufferedWriter out = new BufferedWriter(new FileWriter(filepath, append));

+      out.write(data + "\n");

+      out.close();

+    } catch (IOException e) {

+      LOG.error(e);

+      e.printStackTrace();

     }

-    public  Object readObjectfromFile(String filePath){

-    	ObjectInputStream inputStream = null;

-    	try {

-    		//Construct the ObjectInputStream object

-            inputStream = new ObjectInputStream(new FileInputStream(filePath));

-            Object obj = null;

-            while ((obj = inputStream.readObject()) != null) {

-            	return  obj;

-            }

-        } catch (EOFException ex) { //This exception will be caught when EOF is reached

-        	LOG.error("End of file reached.",ex);

-        } catch (ClassNotFoundException ex) {

-        	LOG.error(ex);

-        } catch (FileNotFoundException ex) {

-        	LOG.error(ex);

-        } catch (IOException ex) {

-        	LOG.error(ex);

-        } finally {

-            //Close the ObjectInputStream

-            try {

-                if (inputStream != null) {

-                    inputStream.close();

-                }

-            } catch (IOException ex) {

-            	LOG.error(ex);

-            }

+  }

+

+  /**

+   * Writes data from an arrayList<String> to a text-file where each line of the

+   * text represented by an element in the list.

+   * 

+   * @param list

+   * @param filePath

+   * @param append

+   * @throws Exception

+   */

+  public void writeToTextFile(ArrayList<String> list, String filePath,

+      boolean append) throws Exception {

+    FileWriter outFile = null;

+    Iterator<String> it = list.iterator();

+    if (!append) {

+      outFile = new FileWriter(filePath);

+      PrintWriter out = new PrintWriter(outFile);

+      while (it.hasNext()) {

+        out.println((String) it.next());

+      }

+      outFile.close();

+    } else {

+      int tmp = 0;

+      while (it.hasNext()) {

+        if (tmp == 0) {

+          appendtofile("\n" + (String) it.next(), filePath);

+        } else {

+          appendtofile((String) it.next(), filePath);

         }

-             return null;

-         }

-    /**

-     * Creates a byte array from any object.

-     * 

-     * I wanted to use it when I write out object to files! (This is not in use right now, I may move it into other class)

-     * 

-     * @param obj

-     * @return

-     * @throws java.io.IOException

-     */

-    public  byte[] getBytes(Object obj) throws java.io.IOException{

-    	ByteArrayOutputStream bos = new ByteArrayOutputStream();

-        ObjectOutputStream oos = new ObjectOutputStream(bos);

-        oos.writeObject(obj);

-        oos.flush();

-        oos.close();

-        bos.close();

-        byte [] data = bos.toByteArray();

-        return data;

+        tmp++;

+      }

     }

+  }

 

-	/**

-	 * Fetches all content from a text file, and return it as a String.

-	 * @return

-	 */

-	 public String readFromTextFile(String filePath) {

-		StringBuilder contents = new StringBuilder();

-		// ...checks on aFile are edited

-		File aFile = new File(filePath);

+  public void writeObjectToFile(Object obj, String filepath, boolean append) {

+    if (!isFileOrDirectoryExists(getDirPathfromFullPath(filepath))) {

+      createFolder(getDirPathfromFullPath(filepath));

+    }

+    ObjectOutputStream outputStream = null;

+    try {

+      outputStream = new ObjectOutputStream(new FileOutputStream(filepath));

+      outputStream.writeObject(obj);

+    } catch (IOException e) {

+      LOG.error(e);

+    }

+  }

 

-		try {

-			// use buffering, reading one line at a time

-			// FileReader always assumes default encoding is OK!

-			// TODO be sure that the default encoding is OK!!!!! Otherwise

-			// change it

+  public Object readObjectfromFile(String filePath) {

+    ObjectInputStream inputStream = null;

+    try {

+      // Construct the ObjectInputStream object

+      inputStream = new ObjectInputStream(new FileInputStream(filePath));

+      Object obj = null;

+      while ((obj = inputStream.readObject()) != null) {

+        return obj;

+      }

+    } catch (EOFException ex) { // This exception will be caught when EOF is

+                                // reached

+      LOG.error("End of file reached.", ex);

+    } catch (ClassNotFoundException ex) {

+      LOG.error(ex);

+    } catch (FileNotFoundException ex) {

+      LOG.error(ex);

+    } catch (IOException ex) {

+      LOG.error(ex);

+    } finally {

+      // Close the ObjectInputStream

+      try {

+        if (inputStream != null) {

+          inputStream.close();

+        }

+      } catch (IOException ex) {

+        LOG.error(ex);

+      }

+    }

+    return null;

+  }

 

-			BufferedReader input = new BufferedReader(new FileReader(aFile));

-			try {

-				String line = null; // not declared within while loop

-				/*

-				 * readLine is a bit quirky : it returns the content of a line

-				 * MINUS the newline. it returns null only for the END of the

-				 * stream. it returns an empty String if two newlines appear in

-				 * a row.

-				 */

-				while ((line = input.readLine()) != null) {

-					contents.append(line);

-					contents.append(System.getProperty("line.separator"));

-				}

-			} finally {

-				input.close();

-			}

-		} catch (IOException ex) {

-			LOG.error("fileName: "+filePath,ex);

-		}

-		return contents.toString();

-	}

-	/**

-	 * Reads text file line-wise each line will be an element in the resulting list

-	 * @param filePath

-	 * @return

-	 */

-	public  List<String> readLinesFromTextFile(String filePath){

-		List<String> lines= new ArrayList<String>();

-		// ...checks on aFile are edited

-		File aFile = new File(filePath);

-		try {

-			// use buffering, reading one line at a time

-			// FileReader always assumes default encoding is OK!

-			// TODO be sure that the default encoding is OK!!!!! Otherwise

-			// change it

+  /**

+   * Creates a byte array from any object.

+   * 

+   * I wanted to use it when I write out object to files! (This is not in use

+   * right now, I may move it into other class)

+   * 

+   * @param obj

+   * @return

+   * @throws java.io.IOException

+   */

+  public byte[] getBytes(Object obj) throws java.io.IOException {

+    ByteArrayOutputStream bos = new ByteArrayOutputStream();

+    ObjectOutputStream oos = new ObjectOutputStream(bos);

+    oos.writeObject(obj);

+    oos.flush();

+    oos.close();

+    bos.close();

+    byte[] data = bos.toByteArray();

+    return data;

+  }

 

-			BufferedReader input = new BufferedReader(new FileReader(aFile));

-			try {

-				String line = null; // not declared within while loop

-				/*

-				 * readLine is a bit quirky : it returns the content of a line

-				 * MINUS the newline. it returns null only for the END of the

-				 * stream. it returns an empty String if two newlines appear in

-				 * a row.

-				 */

-				while ((line = input.readLine()) != null) {

-					lines.add(line);

-				}

-			} finally {

-				input.close();

-			}

-		} catch (IOException ex) {

-			LOG.error(ex);

-		}

-		return lines;

-	}

+  /**

+   * Fetches all content from a text file, and return it as a String.

+   * 

+   * @return

+   */

+  public String readFromTextFile(String filePath) {

+    StringBuilder contents = new StringBuilder();

+    // ...checks on aFile are edited

+    File aFile = new File(filePath);

 

-	

+    try {

+      // use buffering, reading one line at a time

+      // FileReader always assumes default encoding is OK!

+      // TODO be sure that the default encoding is OK!!!!! Otherwise

+      // change it

 

-	private  void appendtofile(String data, String filePath) {

-		try {

-			BufferedWriter out = new BufferedWriter(new FileWriter(filePath,true));

-			out.write(data + "\n");

-			out.close();

-		} catch (IOException e) {

-		}

-	}

-	public  void  createFolder(String path){

-		if(!isFileOrDirectoryExists(path)){

-			File file = new File(path);

-	    	 try{

-	    	 file.mkdirs();

-	    	 }catch (Exception e) {

-				LOG.error("Directory already exists or the file-system is read only",e);

-			}	

-		} 

-	}

-	public  boolean isFileOrDirectoryExists(String path){

-		File file=new File(path);

-		boolean exists = file.exists();

-		return exists;

-	}

-	/**

-	 * Separates the directory-path from a full file-path

-	 * @param filePath

-	 * @return

-	 */

-	private  String getDirPathfromFullPath(String filePath){

-		String dirPath="";

-		if(filePath!=null){

-			if(filePath!=""&&filePath.contains("\\"))

-			dirPath =filePath.substring(0,filePath.lastIndexOf("\\"));

-		}

-		return dirPath;

-	}

-	/**

-	 * Returns the file-names of the files in a folder (not paths only names) (Not recursive)

-	 * @param dirPath

-	 * @return

-	 */

-	public  ArrayList<String> getFileNamesInFolder(String dirPath){

-		ArrayList<String> fileNames= new ArrayList<String>();

-			

-			File folder = new File(dirPath);

-		    File[] listOfFiles = folder.listFiles();

+      BufferedReader input = new BufferedReader(new FileReader(aFile));

+      try {

+        String line = null; // not declared within while loop

+        /*

+         * readLine is a bit quirky : it returns the content of a line MINUS the

+         * newline. it returns null only for the END of the stream. it returns

+         * an empty String if two newlines appear in a row.

+         */

+        while ((line = input.readLine()) != null) {

+          contents.append(line);

+          contents.append(System.getProperty("line.separator"));

+        }

+      } finally {

+        input.close();

+      }

+    } catch (IOException ex) {

+      LOG.error("fileName: " + filePath, ex);

+    }

+    return contents.toString();

+  }

 

-		    for (int i = 0; i < listOfFiles.length; i++) {

-		      if (listOfFiles[i].isFile()) {

-		        fileNames.add(listOfFiles[i].getName());

-		      } else if (listOfFiles[i].isDirectory()) {

-		        //TODO if I want to use it recursive I should handle this case

-		      }

-			}

-		return fileNames;

-	}

-	

-	public void deleteAllfilesinDir(String dirName){

-		ArrayList<String> fileNameList=getFileNamesInFolder(dirName);

-		if(fileNameList!=null){

-		for(int i=0; i<fileNameList.size();i++){

-		try{

-			deleteFile(dirName+fileNameList.get(i));

-			}catch(IllegalArgumentException e){

-				LOG.error("No way to delete file: "+dirName+fileNameList.get(i),e);

-			}

-		}

-		}

-	}

-	public  void deleteFile(String filePath) throws IllegalArgumentException{

-		// A File object to represent the filename

-	    File f = new File(filePath);

-	    // Make sure the file or directory exists and isn't write protected

-	    if (!f.exists())

-	      throw new IllegalArgumentException(

-	          "Delete: no such file or directory: " + filePath);

+  /**

+   * Reads text file line-wise each line will be an element in the resulting

+   * list

+   * 

+   * @param filePath

+   * @return

+   */

+  public List<String> readLinesFromTextFile(String filePath) {

+    List<String> lines = new ArrayList<String>();

+    // ...checks on aFile are edited

+    File aFile = new File(filePath);

+    try {

+      // use buffering, reading one line at a time

+      // FileReader always assumes default encoding is OK!

+      // TODO be sure that the default encoding is OK!!!!! Otherwise

+      // change it

 

-	    if (!f.canWrite())

-	      throw new IllegalArgumentException("Delete: write protected: "

-	          + filePath);

-	    // If it is a directory, make sure it is empty

-	    if (f.isDirectory()) {

-	      String[] files = f.list();

-	      if (files.length > 0)

-	        throw new IllegalArgumentException(

-	            "Delete: directory not empty: " + filePath);

-	    }

-	    // Attempt to delete it

-	    boolean success = f.delete();

-	    if (!success)

-	      throw new IllegalArgumentException("Delete: deletion failed");

-	}

-	

-	public boolean deleteDirectory(File path) {

-	    if( path.exists() ) {

-	      File[] files = path.listFiles();

-	      for(int i=0; i<files.length; i++) {

-	         if(files[i].isDirectory()) {

-	           deleteDirectory(files[i]);

-	         }

-	         else {

-	           files[i].delete();

-	         }

-	      }

-	    }

-	    return( path.delete() );

-	  }

-	

-	/**

-	 * Returns the absolute-file-paths of the files in a directory (not recursive)

-	 * @param dirPath

-	 * @return

-	 */

-	public  ArrayList<String> getFilePathsInFolder(String dirPath){

-		ArrayList<String> filePaths= new ArrayList<String>();

-			

-			File folder = new File(dirPath);

-		    File[] listOfFiles = folder.listFiles();

-		    if(listOfFiles==null)

-		    	return null;

-		    for (int i = 0; i < listOfFiles.length; i++) {

-		      if (listOfFiles[i].isFile()) {

-		    	  filePaths.add(listOfFiles[i].getAbsolutePath());

-		      } else if (listOfFiles[i].isDirectory()) {

-		        //TODO if I want to use it recursive I should handle this case

-		      }

-			}

-		return filePaths;

-	}

-	/**

-	 * Returns the number of individual files in a directory (Not ercursive)

-	 * @param dirPath

-	 * @return

-	 */

-	public  int getFileNumInFolder(String dirPath){

-		int num=0;

-		try{

-			num=getFileNamesInFolder(dirPath).size();

-		}catch (Exception e) {

-			num=0;

-		}

-		return num;

-	}

+      BufferedReader input = new BufferedReader(new FileReader(aFile));

+      try {

+        String line = null; // not declared within while loop

+        /*

+         * readLine is a bit quirky : it returns the content of a line MINUS the

+         * newline. it returns null only for the END of the stream. it returns

+         * an empty String if two newlines appear in a row.

+         */

+        while ((line = input.readLine()) != null) {

+          lines.add(line);

+        }

+      } finally {

+        input.close();

+      }

+    } catch (IOException ex) {

+      LOG.error(ex);

+    }

+    return lines;

+  }

+

+  private void appendtofile(String data, String filePath) {

+    try {

+      BufferedWriter out = new BufferedWriter(new FileWriter(filePath, true));

+      out.write(data + "\n");

+      out.close();

+    } catch (IOException e) {

+    }

+  }

+

+  public void createFolder(String path) {

+    if (!isFileOrDirectoryExists(path)) {

+      File file = new File(path);

+      try {

+        file.mkdirs();

+      } catch (Exception e) {

+        LOG.error("Directory already exists or the file-system is read only", e);

+      }

+    }

+  }

+

+  public boolean isFileOrDirectoryExists(String path) {

+    File file = new File(path);

+    boolean exists = file.exists();

+    return exists;

+  }

+

+  /**

+   * Separates the directory-path from a full file-path

+   * 

+   * @param filePath

+   * @return

+   */

+  private String getDirPathfromFullPath(String filePath) {

+    String dirPath = "";

+    if (filePath != null) {

+      if (filePath != "" && filePath.contains("\\"))

+        dirPath = filePath.substring(0, filePath.lastIndexOf("\\"));

+    }

+    return dirPath;

+  }

+

+  /**

+   * Returns the file-names of the files in a folder (not paths only names) (Not

+   * recursive)

+   * 

+   * @param dirPath

+   * @return

+   */

+  public ArrayList<String> getFileNamesInFolder(String dirPath) {

+    ArrayList<String> fileNames = new ArrayList<String>();

+

+    File folder = new File(dirPath);

+    File[] listOfFiles = folder.listFiles();

+

+    for (int i = 0; i < listOfFiles.length; i++) {

+      if (listOfFiles[i].isFile()) {

+        fileNames.add(listOfFiles[i].getName());

+      } else if (listOfFiles[i].isDirectory()) {

+        // TODO if I want to use it recursive I should handle this case

+      }

+    }

+    return fileNames;

+  }

+

+  public void deleteAllfilesinDir(String dirName) {

+    ArrayList<String> fileNameList = getFileNamesInFolder(dirName);

+    if (fileNameList != null) {

+      for (int i = 0; i < fileNameList.size(); i++) {

+        try {

+          deleteFile(dirName + fileNameList.get(i));

+        } catch (IllegalArgumentException e) {

+          LOG.error("No way to delete file: " + dirName + fileNameList.get(i),

+              e);

+        }

+      }

+    }

+  }

+

+  public void deleteFile(String filePath) throws IllegalArgumentException {

+    // A File object to represent the filename

+    File f = new File(filePath);

+    // Make sure the file or directory exists and isn't write protected

+    if (!f.exists())

+      throw new IllegalArgumentException("Delete: no such file or directory: "

+          + filePath);

+

+    if (!f.canWrite())

+      throw new IllegalArgumentException("Delete: write protected: " + filePath);

+    // If it is a directory, make sure it is empty

+    if (f.isDirectory()) {

+      String[] files = f.list();

+      if (files.length > 0)

+        throw new IllegalArgumentException("Delete: directory not empty: "

+            + filePath);

+    }

+    // Attempt to delete it

+    boolean success = f.delete();

+    if (!success)

+      throw new IllegalArgumentException("Delete: deletion failed");

+  }

+

+  public boolean deleteDirectory(File path) {

+    if (path.exists()) {

+      File[] files = path.listFiles();

+      for (int i = 0; i < files.length; i++) {

+        if (files[i].isDirectory()) {

+          deleteDirectory(files[i]);

+        } else {

+          files[i].delete();

+        }

+      }

+    }

+    return (path.delete());

+  }

+

+  /**

+   * Returns the absolute-file-paths of the files in a directory (not recursive)

+   * 

+   * @param dirPath

+   * @return

+   */

+  public ArrayList<String> getFilePathsInFolder(String dirPath) {

+    ArrayList<String> filePaths = new ArrayList<String>();

+

+    File folder = new File(dirPath);

+    File[] listOfFiles = folder.listFiles();

+    if (listOfFiles == null)

+      return null;

+    for (int i = 0; i < listOfFiles.length; i++) {

+      if (listOfFiles[i].isFile()) {

+        filePaths.add(listOfFiles[i].getAbsolutePath());

+      } else if (listOfFiles[i].isDirectory()) {

+        // TODO if I want to use it recursive I should handle this case

+      }

+    }

+    return filePaths;

+  }

+

+  /**

+   * Returns the number of individual files in a directory (Not ercursive)

+   * 

+   * @param dirPath

+   * @return

+   */

+  public int getFileNumInFolder(String dirPath) {

+    int num = 0;

+    try {

+      num = getFileNamesInFolder(dirPath).size();

+    } catch (Exception e) {

+      num = 0;

+    }

+    return num;

+  }

 

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
index 6fdbe3b..2ee288b 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java

@@ -29,7 +29,8 @@
 import org.apache.tika.exception.TikaException;

 

 public class PageFetcher {

-  private static final Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.utils.PageFetcher");

+  private static final Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.utils.PageFetcher");

 

   private static int DEFAULT_TIMEOUT = 15000;

 

@@ -51,11 +52,11 @@
       pageContent = tika.parseToString(connection.getInputStream())

           .replace('\n', ' ').replace('\t', ' ');

     } catch (MalformedURLException e) {

-    	LOG.severe(e.getMessage() + "\n"+ e);

+      LOG.severe(e.getMessage() + "\n" + e);

     } catch (IOException e) {

-    	 LOG.severe(e.getMessage() + "\n"+ e);

+      LOG.severe(e.getMessage() + "\n" + e);

     } catch (TikaException e) {

-    	 LOG.severe(e.getMessage() + "\n"+ e);

+      LOG.severe(e.getMessage() + "\n" + e);

     }

     return pageContent;

   }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
index eb9be1d..18fc5f7 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java

@@ -38,644 +38,689 @@
 

 public class Utils {

 

-	private static final Logger LOG =  Logger.getLogger("opennlp.tools.similarity.apps.utils.Utils");

+  private static final Logger LOG = Logger

+      .getLogger("opennlp.tools.similarity.apps.utils.Utils");

 

-	protected static final ArrayList<String[]> characterMappings = new ArrayList<String[]>();

+  protected static final ArrayList<String[]> characterMappings = new ArrayList<String[]>();

 

-	static {

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã‚Â ÃƒÆ’Ã‚Â¡ÃƒÆ’Ã‚Â¢ÃƒÆ’Ã‚Â£ÃƒÆ’Ã‚Â¤ÃƒÆ’Ã‚Â¥ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ†â€™Ãƒâ€žÃ¢â‚¬Â¦Ãƒï¿½Ã‚Â°]",

-		" " }); // was a

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã¢â€šÂ¬ÃƒÂ¯Ã‚Â¿Ã‚Â½?ÃƒÆ’Ã¢â‚¬Å¡ÃƒÆ’Ã†â€™ÃƒÆ’Ã¢â‚¬Å¾ÃƒÆ’Ã¢â‚¬Â¦Ãƒâ€žÃ¢â€šÂ¬Ãƒâ€žÃ¢â‚¬Å¡Ãƒâ€žÃ¢â‚¬Å¾ÃƒÂ¯Ã‚Â¿Ã‚Â½?]",

-		"A" });

-		characterMappings.add(new String[] {

-				"[ÃƒÆ’Ã‚Â§Ãƒâ€žÃ¢â‚¬Â¡Ãƒâ€žÃ¢â‚¬Â°Ãƒâ€žÃ¢â‚¬Â¹ÃƒÂ¯Ã‚Â¿Ã‚Â½?]", "c" });

-		characterMappings.add(new String[] {

-				"[ÃƒÆ’Ã¢â‚¬Â¡Ãƒâ€žÃ¢â‚¬Â Ãƒâ€žÃ‹â€ Ãƒâ€žÃ…Â Ãƒâ€žÃ…â€™]", "C" });

-		characterMappings.add(new String[] { "[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ¢â‚¬Ëœ]", "d" });

-		characterMappings.add(new String[] {

-				"[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ…Â½ÃƒÂ¯Ã‚Â¿Ã‚Â½?]", "D" });

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã‚Â¨ÃƒÆ’Ã‚Â©ÃƒÆ’Ã‚ÂªÃƒÆ’Ã‚Â«ÃƒÆ’Ã‚Â¦Ãƒâ€žÃ¢â€žÂ¢Ãƒâ€žÃ¢â‚¬Å“Ãƒâ€žÃ¢â‚¬Â¢Ãƒâ€žÃ¢â‚¬â€�Ãƒâ€žÃ¢â€žÂ¢Ãƒâ€žÃ¢â‚¬Âº]",

-		" " }); // was e

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã‹â€ ÃƒÆ’Ã¢â‚¬Â°ÃƒÆ’Ã…Â ÃƒÆ’Ã¢â‚¬Â¹ÃƒÆ’Ã¢â‚¬Â Ãƒâ€žÃ¢â‚¬â„¢Ãƒâ€žÃ¢â‚¬ï¿½Ãƒâ€žÃ¢â‚¬â€œÃƒâ€žÃ‹Å“Ãƒâ€žÃ…Â¡]",

-		"'" }); // was E

-		characterMappings.add(new String[] {

-				"[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ…Â¸Ãƒâ€žÃ‚Â¡Ãƒâ€žÃ‚Â£]", "g" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€žÃ…â€œÃƒâ€žÃ…Â¾Ãƒâ€žÃ‚Â Ãƒâ€žÃ‚Â¢Ãƒâ€ Ã¢â‚¬Å“]", "G" });

-		characterMappings.add(new String[] { "[Ãƒâ€žÃ‚Â¥Ãƒâ€žÃ‚Â§]", "h" });

-		characterMappings.add(new String[] { "[Ãƒâ€žÃ‚Â¤Ãƒâ€žÃ‚Â¦]", "H" });

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã‚Â¬ÃƒÆ’Ã‚ÂÃƒÆ’Ã‚Â®ÃƒÆ’Ã‚Â¯Ãƒâ€žÃ‚Â©Ãƒâ€žÃ‚Â«Ãƒâ€žÃ‚ÂÃƒâ€žÃ‚Â®Ãƒâ€žÃ‚Â¯Ãƒâ€žÃ‚Â±Ãƒâ€žÃ‚Â³Ãƒâ€žÃ‚Âµ]",

-		"i" });

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã…â€™ÃƒÂ¯Ã‚Â¿Ã‚Â½?ÃƒÆ’Ã…Â½ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ‚Â¨Ãƒâ€žÃ‚ÂªÃƒâ€žÃ‚Â¬Ãƒâ€žÃ‚Â°Ãƒâ€žÃ‚Â²Ãƒâ€žÃ‚Â´Ãƒâ€žÃ‚Âµ]",

-		"I" });

-		characterMappings.add(new String[] { "[Ãƒâ€žÃ‚Â·Ãƒâ€žÃ‚Â¸]", "k" });

-		characterMappings.add(new String[] { "[Ãƒâ€žÃ‚Â¶]", "K" });

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã‚Â¸Ãƒâ€¦Ã¢â‚¬ËœÃƒÆ’Ã‚Â°ÃƒÆ’Ã‚Â²ÃƒÆ’Ã‚Â³ÃƒÆ’Ã‚Â´ÃƒÆ’Ã‚ÂµÃƒÆ’Ã‚Â¶ÃƒÂ¯Ã‚Â¿Ã‚Â½?ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã¢â‚¬ËœÃƒâ€¦Ã¢â‚¬Å“Ãƒâ€ Ã‚Â¡]",

-		"o" });

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã¢â‚¬â„¢ÃƒÆ’Ã¢â‚¬Å“ÃƒÆ’Ã¢â‚¬ï¿½ÃƒÆ’Ã¢â‚¬Â¢ÃƒÆ’Ã¢â‚¬â€œÃƒÆ’Ã‹Å“Ãƒâ€¦Ã…â€™Ãƒâ€¦Ã…Â½ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã¢â‚¬â„¢Ãƒâ€ Ã‚Â ]",

-		"O" });

-		characterMappings.add(new String[] {

-				"[ÃƒÆ’Ã‚Â±Ãƒâ€¦Ã¢â‚¬Å¾Ãƒâ€¦Ã¢â‚¬Â Ãƒâ€¦Ã‹â€ Ãƒâ€¦Ã¢â‚¬Â°Ãƒâ€¦Ã¢â‚¬Â¹]",

-		"n" });

-		characterMappings.add(new String[] {

-				"[ÃƒÆ’Ã¢â‚¬ËœÃƒâ€¦Ã†â€™Ãƒâ€¦Ã¢â‚¬Â¦Ãƒâ€¦Ã¢â‚¬Â¡Ãƒâ€¦Ã…Â Ãƒâ€¦Ã¢â‚¬Â¹]",

-		"N" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€žÃ‚ÂºÃƒâ€žÃ‚Â¼Ãƒâ€žÃ‚Â¾Ãƒâ€¦Ã¢â€šÂ¬Ãƒâ€¦Ã¢â‚¬Å¡]", "l" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€žÃ‚Â¹Ãƒâ€žÃ‚Â»Ãƒâ€žÃ‚Â½Ãƒâ€žÃ‚Â¿ÃƒÂ¯Ã‚Â¿Ã‚Â½?]", "L" });

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã‚Â¹ÃƒÆ’Ã‚ÂºÃƒÆ’Ã‚Â»ÃƒÆ’Ã‚Â¼Ãƒâ€¦Ã‚Â©Ãƒâ€¦Ã‚Â«Ãƒâ€¦Ã‚ÂÃƒâ€¦Ã‚Â¯Ãƒâ€¦Ã‚Â±Ãƒâ€¦Ã‚Â³Ãƒâ€ Ã‚Â°]",

-		"u" });

-		characterMappings

-		.add(new String[] {

-				"[ÃƒÆ’Ã¢â€žÂ¢ÃƒÆ’Ã…Â¡ÃƒÆ’Ã¢â‚¬ÂºÃƒÆ’Ã…â€œÃƒâ€¦Ã‚Â¨Ãƒâ€¦Ã‚ÂªÃƒâ€¦Ã‚Â¬Ãƒâ€¦Ã‚Â®Ãƒâ€¦Ã‚Â°Ãƒâ€¦Ã‚Â²Ãƒâ€ Ã‚Â¯]",

-		"U" });

-		characterMappings.add(new String[] { "[ÃƒÆ’Ã‚Â½ÃƒÆ’Ã‚Â¿Ãƒâ€¦Ã‚Â·]", "y" });

-		characterMappings.add(new String[] { "[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã‚Â¶Ãƒâ€¦Ã‚Â¸]",

-		"Y" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€¦Ã¢â‚¬Â¢Ãƒâ€¦Ã¢â‚¬â€�Ãƒâ€¦Ã¢â€žÂ¢]", "r" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€¦Ã¢â‚¬ï¿½Ãƒâ€¦Ã¢â‚¬â€œÃƒâ€¦Ã‹Å“]", "R" });

-		characterMappings

-		.add(new String[] {

-				"[Ãƒâ€¦Ã‚Â¡Ãƒâ€¦Ã¢â‚¬ÂºÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã…Â¸Ãƒâ€¦Ã‚Â¡Ãƒâ€¦Ã‚Â¿]",

-		"s" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€¦Ã‚Â Ãƒâ€¦Ã…Â¡Ãƒâ€¦Ã…â€œÃƒâ€¦Ã…Â¾Ãƒâ€¦Ã‚Â Ãƒâ€¦Ã‚Â¿]", "S" });

-		characterMappings.add(new String[] { "ÃƒÆ’Ã…Â¸", "ss" });

-		characterMappings.add(new String[] { "ÃƒÆ’Ã…Â¾", "th" });

-		characterMappings.add(new String[] { "ÃƒÆ’Ã‚Â¾", "Th" });

-		characterMappings

-		.add(new String[] { "[Ãƒâ€¦Ã‚Â£Ãƒâ€¦Ã‚Â¥Ãƒâ€¦Ã‚Â§]", "t" });

-		characterMappings

-		.add(new String[] { "[Ãƒâ€¦Ã‚Â¢Ãƒâ€¦Ã‚Â¤Ãƒâ€¦Ã‚Â¦]", "T" });

-		characterMappings.add(new String[] { "[Ãƒâ€¦Ã‚Âµ]", "w" });

-		characterMappings.add(new String[] { "[Ãƒâ€¦Ã‚Â´]", "W" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€¦Ã‚Â¾Ãƒâ€¦Ã‚ÂºÃƒâ€¦Ã‚Â¼Ãƒâ€¦Ã‚Â¾Ãƒâ€ Ã‚Â¶]", "z" });

-		characterMappings.add(new String[] {

-				"[Ãƒâ€¦Ã‚Â½Ãƒâ€¦Ã‚Â½Ãƒâ€¦Ã‚Â¹Ãƒâ€¦Ã‚Â»Ãƒâ€¦Ã‚Â½Ãƒâ€ Ã‚Âµ]", "Z" });

-		characterMappings.add(new String[] { "[ÃƒÂ¢Ã¢â€šÂ¬Ã¢â€žÂ¢]", "'" });

-		characterMappings.add(new String[] { "[ÃƒÂ¢Ã¢â€šÂ¬Ã¢â‚¬Å“]", "'" });

-		characterMappings.add(new String[] { "&#39;", "'" });

-		characterMappings.add(new String[] { "Ãƒâ€še", "Ã‚Â«" });

-		characterMappings.add(new String[] { "'AG", "Ã¢â‚¬Å“" });

-		characterMappings.add(new String[] { "AÃ¯Â¿Â½", " " });

-		characterMappings.add(new String[] { "&quot;", "\"" });

-		characterMappings.add(new String[] { "&amp;", "&" });

-		characterMappings.add(new String[] { "&nbsp;", " " });

-		characterMappings.add(new String[] { "ÃƒÂ®Ã¢â€šÂ¬Ã¢â€šÂ¬", " " });

-		characterMappings.add(new String[] { "ÃƒÂ¢Ã¢â‚¬Å¾Ã‚Â¢", " " });

-		characterMappings.add(new String[] { "ÃƒÂ¢Ã¢â€šÂ¬Ã¢â‚¬ï¿½", "" });

-		characterMappings.add(new String[] { "â€™", "'" });

-	}

+  static {

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‚Â ÃƒÆ’Ã‚Â¡ÃƒÆ’Ã‚Â¢ÃƒÆ’Ã‚Â£ÃƒÆ’Ã‚Â¤ÃƒÆ’Ã‚Â¥ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ†â€™Ãƒâ€žÃ¢â‚¬Â¦Ãƒï¿½Ã‚Â°]",

+            " " }); // was a

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã¢â€šÂ¬ÃƒÂ¯Ã‚Â¿Ã‚Â½?ÃƒÆ’Ã¢â‚¬Å¡ÃƒÆ’Ã†â€™ÃƒÆ’Ã¢â‚¬Å¾ÃƒÆ’Ã¢â‚¬Â¦Ãƒâ€žÃ¢â€šÂ¬Ãƒâ€žÃ¢â‚¬Å¡Ãƒâ€žÃ¢â‚¬Å¾ÃƒÂ¯Ã‚Â¿Ã‚Â½?]",

+            "A" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‚Â§Ãƒâ€žÃ¢â‚¬Â¡Ãƒâ€žÃ¢â‚¬Â°Ãƒâ€žÃ¢â‚¬Â¹ÃƒÂ¯Ã‚Â¿Ã‚Â½?]",

+            "c" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã¢â‚¬Â¡Ãƒâ€žÃ¢â‚¬Â Ãƒâ€žÃ‹â€ Ãƒâ€žÃ…Â Ãƒâ€žÃ…â€™]",

+            "C" });

+    characterMappings.add(new String[] {

+        "[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ¢â‚¬Ëœ]", "d" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ…Â½ÃƒÂ¯Ã‚Â¿Ã‚Â½?]",

+            "D" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‚Â¨ÃƒÆ’Ã‚Â©ÃƒÆ’Ã‚ÂªÃƒÆ’Ã‚Â«ÃƒÆ’Ã‚Â¦Ãƒâ€žÃ¢â€žÂ¢Ãƒâ€žÃ¢â‚¬Å“Ãƒâ€žÃ¢â‚¬Â¢Ãƒâ€žÃ¢â‚¬â€�Ãƒâ€žÃ¢â€žÂ¢Ãƒâ€žÃ¢â‚¬Âº]",

+            " " }); // was e

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‹â€ ÃƒÆ’Ã¢â‚¬Â°ÃƒÆ’Ã…Â ÃƒÆ’Ã¢â‚¬Â¹ÃƒÆ’Ã¢â‚¬Â Ãƒâ€žÃ¢â‚¬â„¢Ãƒâ€žÃ¢â‚¬ï¿½Ãƒâ€žÃ¢â‚¬â€œÃƒâ€žÃ‹Å“Ãƒâ€žÃ…Â¡]",

+            "'" }); // was E

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ…Â¸Ãƒâ€žÃ‚Â¡Ãƒâ€žÃ‚Â£]",

+            "g" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€žÃ…â€œÃƒâ€žÃ…Â¾Ãƒâ€žÃ‚Â Ãƒâ€žÃ‚Â¢Ãƒâ€ Ã¢â‚¬Å“]",

+            "G" });

+    characterMappings.add(new String[] {

+        "[Ãƒâ€žÃ‚Â¥Ãƒâ€žÃ‚Â§]", "h" });

+    characterMappings.add(new String[] {

+        "[Ãƒâ€žÃ‚Â¤Ãƒâ€žÃ‚Â¦]", "H" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‚Â¬ÃƒÆ’Ã‚ÂÃƒÆ’Ã‚Â®ÃƒÆ’Ã‚Â¯Ãƒâ€žÃ‚Â©Ãƒâ€žÃ‚Â«Ãƒâ€žÃ‚ÂÃƒâ€žÃ‚Â®Ãƒâ€žÃ‚Â¯Ãƒâ€žÃ‚Â±Ãƒâ€žÃ‚Â³Ãƒâ€žÃ‚Âµ]",

+            "i" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã…â€™ÃƒÂ¯Ã‚Â¿Ã‚Â½?ÃƒÆ’Ã…Â½ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€žÃ‚Â¨Ãƒâ€žÃ‚ÂªÃƒâ€žÃ‚Â¬Ãƒâ€žÃ‚Â°Ãƒâ€žÃ‚Â²Ãƒâ€žÃ‚Â´Ãƒâ€žÃ‚Âµ]",

+            "I" });

+    characterMappings.add(new String[] {

+        "[Ãƒâ€žÃ‚Â·Ãƒâ€žÃ‚Â¸]", "k" });

+    characterMappings.add(new String[] { "[Ãƒâ€žÃ‚Â¶]", "K" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‚Â¸Ãƒâ€¦Ã¢â‚¬ËœÃƒÆ’Ã‚Â°ÃƒÆ’Ã‚Â²ÃƒÆ’Ã‚Â³ÃƒÆ’Ã‚Â´ÃƒÆ’Ã‚ÂµÃƒÆ’Ã‚Â¶ÃƒÂ¯Ã‚Â¿Ã‚Â½?ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã¢â‚¬ËœÃƒâ€¦Ã¢â‚¬Å“Ãƒâ€ Ã‚Â¡]",

+            "o" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã¢â‚¬â„¢ÃƒÆ’Ã¢â‚¬Å“ÃƒÆ’Ã¢â‚¬ï¿½ÃƒÆ’Ã¢â‚¬Â¢ÃƒÆ’Ã¢â‚¬â€œÃƒÆ’Ã‹Å“Ãƒâ€¦Ã…â€™Ãƒâ€¦Ã…Â½ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã¢â‚¬â„¢Ãƒâ€ Ã‚Â ]",

+            "O" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‚Â±Ãƒâ€¦Ã¢â‚¬Å¾Ãƒâ€¦Ã¢â‚¬Â Ãƒâ€¦Ã‹â€ Ãƒâ€¦Ã¢â‚¬Â°Ãƒâ€¦Ã¢â‚¬Â¹]",

+            "n" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã¢â‚¬ËœÃƒâ€¦Ã†â€™Ãƒâ€¦Ã¢â‚¬Â¦Ãƒâ€¦Ã¢â‚¬Â¡Ãƒâ€¦Ã…Â Ãƒâ€¦Ã¢â‚¬Â¹]",

+            "N" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€žÃ‚ÂºÃƒâ€žÃ‚Â¼Ãƒâ€žÃ‚Â¾Ãƒâ€¦Ã¢â€šÂ¬Ãƒâ€¦Ã¢â‚¬Å¡]",

+            "l" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€žÃ‚Â¹Ãƒâ€žÃ‚Â»Ãƒâ€žÃ‚Â½Ãƒâ€žÃ‚Â¿ÃƒÂ¯Ã‚Â¿Ã‚Â½?]",

+            "L" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã‚Â¹ÃƒÆ’Ã‚ÂºÃƒÆ’Ã‚Â»ÃƒÆ’Ã‚Â¼Ãƒâ€¦Ã‚Â©Ãƒâ€¦Ã‚Â«Ãƒâ€¦Ã‚ÂÃƒâ€¦Ã‚Â¯Ãƒâ€¦Ã‚Â±Ãƒâ€¦Ã‚Â³Ãƒâ€ Ã‚Â°]",

+            "u" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÆ’Ã¢â€žÂ¢ÃƒÆ’Ã…Â¡ÃƒÆ’Ã¢â‚¬ÂºÃƒÆ’Ã…â€œÃƒâ€¦Ã‚Â¨Ãƒâ€¦Ã‚ÂªÃƒâ€¦Ã‚Â¬Ãƒâ€¦Ã‚Â®Ãƒâ€¦Ã‚Â°Ãƒâ€¦Ã‚Â²Ãƒâ€ Ã‚Â¯]",

+            "U" });

+    characterMappings.add(new String[] {

+        "[ÃƒÆ’Ã‚Â½ÃƒÆ’Ã‚Â¿Ãƒâ€¦Ã‚Â·]", "y" });

+    characterMappings

+        .add(new String[] {

+            "[ÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã‚Â¶Ãƒâ€¦Ã‚Â¸]",

+            "Y" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã¢â‚¬Â¢Ãƒâ€¦Ã¢â‚¬â€�Ãƒâ€¦Ã¢â€žÂ¢]",

+            "r" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã¢â‚¬ï¿½Ãƒâ€¦Ã¢â‚¬â€œÃƒâ€¦Ã‹Å“]",

+            "R" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã‚Â¡Ãƒâ€¦Ã¢â‚¬ÂºÃƒÂ¯Ã‚Â¿Ã‚Â½?Ãƒâ€¦Ã…Â¸Ãƒâ€¦Ã‚Â¡Ãƒâ€¦Ã‚Â¿]",

+            "s" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã‚Â Ãƒâ€¦Ã…Â¡Ãƒâ€¦Ã…â€œÃƒâ€¦Ã…Â¾Ãƒâ€¦Ã‚Â Ãƒâ€¦Ã‚Â¿]",

+            "S" });

+    characterMappings.add(new String[] { "ÃƒÆ’Ã…Â¸", "ss" });

+    characterMappings.add(new String[] { "ÃƒÆ’Ã…Â¾", "th" });

+    characterMappings.add(new String[] { "ÃƒÆ’Ã‚Â¾", "Th" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã‚Â£Ãƒâ€¦Ã‚Â¥Ãƒâ€¦Ã‚Â§]",

+            "t" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã‚Â¢Ãƒâ€¦Ã‚Â¤Ãƒâ€¦Ã‚Â¦]",

+            "T" });

+    characterMappings.add(new String[] { "[Ãƒâ€¦Ã‚Âµ]", "w" });

+    characterMappings.add(new String[] { "[Ãƒâ€¦Ã‚Â´]", "W" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã‚Â¾Ãƒâ€¦Ã‚ÂºÃƒâ€¦Ã‚Â¼Ãƒâ€¦Ã‚Â¾Ãƒâ€ Ã‚Â¶]",

+            "z" });

+    characterMappings

+        .add(new String[] {

+            "[Ãƒâ€¦Ã‚Â½Ãƒâ€¦Ã‚Â½Ãƒâ€¦Ã‚Â¹Ãƒâ€¦Ã‚Â»Ãƒâ€¦Ã‚Â½Ãƒâ€ Ã‚Âµ]",

+            "Z" });

+    characterMappings.add(new String[] {

+        "[ÃƒÂ¢Ã¢â€šÂ¬Ã¢â€žÂ¢]", "'" });

+    characterMappings.add(new String[] {

+        "[ÃƒÂ¢Ã¢â€šÂ¬Ã¢â‚¬Å“]", "'" });

+    characterMappings.add(new String[] { "&#39;", "'" });

+    characterMappings.add(new String[] { "Ãƒâ€še", "Ã‚Â«" });

+    characterMappings.add(new String[] { "'AG", "Ã¢â‚¬Å“" });

+    characterMappings.add(new String[] { "AÃ¯Â¿Â½", " " });

+    characterMappings.add(new String[] { "&quot;", "\"" });

+    characterMappings.add(new String[] { "&amp;", "&" });

+    characterMappings.add(new String[] { "&nbsp;", " " });

+    characterMappings.add(new String[] {

+        "ÃƒÂ®Ã¢â€šÂ¬Ã¢â€šÂ¬", " " });

+    characterMappings.add(new String[] { "ÃƒÂ¢Ã¢â‚¬Å¾Ã‚Â¢",

+        " " });

+    characterMappings.add(new String[] {

+        "ÃƒÂ¢Ã¢â€šÂ¬Ã¢â‚¬ï¿½", "" });

+    characterMappings.add(new String[] { "â€™", "'" });

+  }

 

-	public static String stripNonAsciiChars(String s) {

-		StringBuffer b = new StringBuffer();

-		if (s != null) {

-			for (int i = 0; i < s.length(); i++) {

-				if (((int) s.charAt(i)) <= 256) {

-					b.append(s.charAt(i));

-				}

-			}

-		}

+  public static String stripNonAsciiChars(String s) {

+    StringBuffer b = new StringBuffer();

+    if (s != null) {

+      for (int i = 0; i < s.length(); i++) {

+        if (((int) s.charAt(i)) <= 256) {

+          b.append(s.charAt(i));

+        }

+      }

+    }

 

-		return b.toString().trim().replaceAll("\\s+", " "); // replace any multiple

-		// spaces with a single

-		// space

-	}

+    return b.toString().trim().replaceAll("\\s+", " "); // replace any multiple

+    // spaces with a single

+    // space

+  }

 

-	public static String convertToASCII(String s) {

-		s = s.replace("&amp", "");

-		s = s.replaceAll("â€™", "__apostrophe__");

-		String tmp = s;

-		if (tmp != null) {

-			for (String[] mapping : characterMappings) {

-				tmp = tmp.replaceAll(mapping[0], mapping[1]);

-			}

-		}

-		return stripNonAsciiChars(tmp.replaceAll("__apostrophe__", "'"));

-	}

+  public static String convertToASCII(String s) {

+    s = s.replace("&amp", "");

+    s = s.replaceAll("â€™", "__apostrophe__");

+    String tmp = s;

+    if (tmp != null) {

+      for (String[] mapping : characterMappings) {

+        tmp = tmp.replaceAll(mapping[0], mapping[1]);

+      }

+    }

+    return stripNonAsciiChars(tmp.replaceAll("__apostrophe__", "'"));

+  }

 

-	public static class KeyValue {

-		public Object key = null;

+  public static class KeyValue {

+    public Object key = null;

 

-		public float value = 0;

+    public float value = 0;

 

-		public KeyValue(Object o, Float i) {

-			this.key = o;

-			this.value = i;

-		}

+    public KeyValue(Object o, Float i) {

+      this.key = o;

+      this.value = i;

+    }

 

-		public static class SortByValue implements Comparator {

-			public int compare(Object obj1, Object obj2) {

-				float i1 = ((KeyValue) obj1).value;

-				float i2 = ((KeyValue) obj2).value;

+    public static class SortByValue implements Comparator {

+      public int compare(Object obj1, Object obj2) {

+        float i1 = ((KeyValue) obj1).value;

+        float i2 = ((KeyValue) obj2).value;

 

-				if (i1 < i2)

-					return 1;

-				return -1;

-			}

-		}

-	}

+        if (i1 < i2)

+          return 1;

+        return -1;

+      }

+    }

+  }

 

-	public static boolean createResizedCopy(String originalImage,

-			String newImage, int scaledWidth, int scaledHeight) {

-		boolean retVal = true;

-		try {

-			File o = new File(originalImage);

-			BufferedImage bsrc = ImageIO.read(o);

-			BufferedImage bdest = new BufferedImage(scaledWidth, scaledHeight,

-					BufferedImage.TYPE_INT_RGB);

+  public static boolean createResizedCopy(String originalImage,

+      String newImage, int scaledWidth, int scaledHeight) {

+    boolean retVal = true;

+    try {

+      File o = new File(originalImage);

+      BufferedImage bsrc = ImageIO.read(o);

+      BufferedImage bdest = new BufferedImage(scaledWidth, scaledHeight,

+          BufferedImage.TYPE_INT_RGB);

 

-			Graphics2D g = bdest.createGraphics();

-			AffineTransform at = AffineTransform.getScaleInstance(

-					(double) scaledWidth / bsrc.getWidth(),

-					(double) scaledHeight / bsrc.getHeight());

-			g.drawRenderedImage(bsrc, at);

-			ImageIO.write(bdest, "jpeg", new File(newImage));

+      Graphics2D g = bdest.createGraphics();

+      AffineTransform at = AffineTransform.getScaleInstance(

+          (double) scaledWidth / bsrc.getWidth(),

+          (double) scaledHeight / bsrc.getHeight());

+      g.drawRenderedImage(bsrc, at);

+      ImageIO.write(bdest, "jpeg", new File(newImage));

 

-		} catch (Exception e) {

-			retVal = false;

-			LOG.severe("Failed creating thumbnail for image: " + originalImage + e);

-		}

+    } catch (Exception e) {

+      retVal = false;

+      LOG.severe("Failed creating thumbnail for image: " + originalImage + e);

+    }

 

-		return retVal;

-	}

+    return retVal;

+  }

 

-	private static int minimum(int a, int b, int c) {

-		int mi;

+  private static int minimum(int a, int b, int c) {

+    int mi;

 

-		mi = a;

-		if (b < mi) {

-			mi = b;

-		}

-		if (c < mi) {

-			mi = c;

-		}

-		return mi;

+    mi = a;

+    if (b < mi) {

+      mi = b;

+    }

+    if (c < mi) {

+      mi = c;

+    }

+    return mi;

 

-	}

+  }

 

-	public static int computeEditDistance(String s, String t) {

-		int d[][]; // matrix

-		int n; // length of s

-		int m; // length of t

-		int i; // iterates through s

-		int j; // iterates through t

-		char s_i; // ith character of s

-		char t_j; // jth character of t

-		int cost; // cost

+  public static int computeEditDistance(String s, String t) {

+    int d[][]; // matrix

+    int n; // length of s

+    int m; // length of t

+    int i; // iterates through s

+    int j; // iterates through t

+    char s_i; // ith character of s

+    char t_j; // jth character of t

+    int cost; // cost

 

-		// Step 1

-		n = s.length();

-		m = t.length();

-		if (n == 0) {

-			return m;

-		}

-		if (m == 0) {

-			return n;

-		}

-		d = new int[n + 1][m + 1];

-		// Step 2

-		for (i = 0; i <= n; i++) {

-			d[i][0] = i;

-		}

-		for (j = 0; j <= m; j++) {

-			d[0][j] = j;

-		}

-		// Step 3

-		for (i = 1; i <= n; i++) {

-			s_i = s.charAt(i - 1);

-			// Step 4

-			for (j = 1; j <= m; j++) {

-				t_j = t.charAt(j - 1);

-				// Step 5

-				if (s_i == t_j) {

-					cost = 0;

-				} else {

-					cost = 1;

-				}

-				// Step 6

-				d[i][j] = minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1]

-				                                                             + cost);

-			}

-		}

-		// Step 7

-		return d[n][m];

-	}

+    // Step 1

+    n = s.length();

+    m = t.length();

+    if (n == 0) {

+      return m;

+    }

+    if (m == 0) {

+      return n;

+    }

+    d = new int[n + 1][m + 1];

+    // Step 2

+    for (i = 0; i <= n; i++) {

+      d[i][0] = i;

+    }

+    for (j = 0; j <= m; j++) {

+      d[0][j] = j;

+    }

+    // Step 3

+    for (i = 1; i <= n; i++) {

+      s_i = s.charAt(i - 1);

+      // Step 4

+      for (j = 1; j <= m; j++) {

+        t_j = t.charAt(j - 1);

+        // Step 5

+        if (s_i == t_j) {

+          cost = 0;

+        } else {

+          cost = 1;

+        }

+        // Step 6

+        d[i][j] = minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1]

+            + cost);

+      }

+    }

+    // Step 7

+    return d[n][m];

+  }

 

-	public static ArrayList<KeyValue> sortByValue(HashMap<Object, Float> h) {

-		ArrayList<KeyValue> res = new ArrayList<KeyValue>();

-		for (Object o : h.keySet()) {

-			// form a pair

-			res.add(new KeyValue(o, h.get(o)));

-		}

+  public static ArrayList<KeyValue> sortByValue(HashMap<Object, Float> h) {

+    ArrayList<KeyValue> res = new ArrayList<KeyValue>();

+    for (Object o : h.keySet()) {

+      // form a pair

+      res.add(new KeyValue(o, h.get(o)));

+    }

 

-		Collections.sort(res, new KeyValue.SortByValue());

+    Collections.sort(res, new KeyValue.SortByValue());

 

-		return res;

-	}

+    return res;

+  }

 

-	public static String convertKeyValueToString(ArrayList<KeyValue> l) {

-		StringBuffer retVal = new StringBuffer();

-		for (KeyValue kv : l) {

-			retVal.append(kv.key);

-			retVal.append("-");

-			retVal.append(kv.value);

-			retVal.append(",");

-		}

+  public static String convertKeyValueToString(ArrayList<KeyValue> l) {

+    StringBuffer retVal = new StringBuffer();

+    for (KeyValue kv : l) {

+      retVal.append(kv.key);

+      retVal.append("-");

+      retVal.append(kv.value);

+      retVal.append(",");

+    }

 

-		return retVal.toString();

-	}

+    return retVal.toString();

+  }

 

-	public static String convertStringArrayToString(ArrayList<String> l) {

-		StringBuffer b = new StringBuffer();

-		for (String s : l) {

-			b.append(s);

-			b.append(", ");

-		}

+  public static String convertStringArrayToString(ArrayList<String> l) {

+    StringBuffer b = new StringBuffer();

+    for (String s : l) {

+      b.append(s);

+      b.append(", ");

+    }

 

-		return b.toString();

-	}

+    return b.toString();

+  }

 

-	public static String convertStringArrayToPlainString(ArrayList<String> l) {

-		StringBuffer b = new StringBuffer();

-		for (String s : l) {

-			b.append(s);

-			b.append(" ");

-		}

+  public static String convertStringArrayToPlainString(ArrayList<String> l) {

+    StringBuffer b = new StringBuffer();

+    for (String s : l) {

+      b.append(s);

+      b.append(" ");

+    }

 

-		return b.toString();

-	}

+    return b.toString();

+  }

 

-	public static boolean noDomainInUrl(String siteUrl, String url) {

-		if (StringUtils.isEmpty(url)) {

-			return true;

-		}

-		if (!url.startsWith("http://")) {

-			return true;

-		}

-		return false;

-	}

+  public static boolean noDomainInUrl(String siteUrl, String url) {

+    if (StringUtils.isEmpty(url)) {

+      return true;

+    }

+    if (!url.startsWith("http://")) {

+      return true;

+    }

+    return false;

+  }

 

-	public static String addDomainToUrl(String siteUrl, String url) {

-		if (StringUtils.isEmpty(url)) {

-			return null; // should we return siteUrl here ??

-		}

-		if (!url.startsWith("http://")) {

-			String domain = StringUtils.substringBetween(siteUrl, "http://", "/");

-			if (domain == null) {

-				url = siteUrl + (url.startsWith("/") ? "" : "/") + url;

-			} else {

-				if (!url.startsWith("/")) {

-					int lastIndex = StringUtils.lastIndexOf(siteUrl, "/");

-					url = siteUrl.substring(0, lastIndex) + "/" + url;

-				} else {

-					url = "http://" + domain + url;

-				}

-			}

-		}

-		return url;

-	}

+  public static String addDomainToUrl(String siteUrl, String url) {

+    if (StringUtils.isEmpty(url)) {

+      return null; // should we return siteUrl here ??

+    }

+    if (!url.startsWith("http://")) {

+      String domain = StringUtils.substringBetween(siteUrl, "http://", "/");

+      if (domain == null) {

+        url = siteUrl + (url.startsWith("/") ? "" : "/") + url;

+      } else {

+        if (!url.startsWith("/")) {

+          int lastIndex = StringUtils.lastIndexOf(siteUrl, "/");

+          url = siteUrl.substring(0, lastIndex) + "/" + url;

+        } else {

+          url = "http://" + domain + url;

+        }

+      }

+    }

+    return url;

+  }

 

-	public static int countValues(Hashtable<String, Float> b1) {

-		int retVal = 0;

-		for (String s : b1.keySet()) {

-			retVal += b1.get(s);

-		}

+  public static int countValues(Hashtable<String, Float> b1) {

+    int retVal = 0;

+    for (String s : b1.keySet()) {

+      retVal += b1.get(s);

+    }

 

-		return retVal;

-	}

+    return retVal;

+  }

 

-	public static int countValues(HashMap<String, Integer> b1) {

-		int retVal = 0;

-		for (String s : b1.keySet()) {

-			retVal += b1.get(s);

-		}

+  public static int countValues(HashMap<String, Integer> b1) {

+    int retVal = 0;

+    for (String s : b1.keySet()) {

+      retVal += b1.get(s);

+    }

 

-		return retVal;

-	}

+    return retVal;

+  }

 

-	public static String convertHashMapToString(HashMap<String, Integer> m) {

-		StringBuffer s = new StringBuffer();

-		for (String x : m.keySet()) {

-			s.append(x);

-			s.append("-");

-			s.append(m.get(x));

-			s.append(",");

-		}

+  public static String convertHashMapToString(HashMap<String, Integer> m) {

+    StringBuffer s = new StringBuffer();

+    for (String x : m.keySet()) {

+      s.append(x);

+      s.append("-");

+      s.append(m.get(x));

+      s.append(",");

+    }

 

-		return s.toString();

-	}

+    return s.toString();

+  }

 

-	public static boolean isTokenAllDigitOrPunc(String token) {

-		for (int i = 0; i < token.length(); i++) {

-			if (java.lang.Character.isLetter(token.charAt(i))) {

-				return false;

-			}

-		}

-		return true;

-	}

+  public static boolean isTokenAllDigitOrPunc(String token) {

+    for (int i = 0; i < token.length(); i++) {

+      if (java.lang.Character.isLetter(token.charAt(i))) {

+        return false;

+      }

+    }

+    return true;

+  }

 

-	public static boolean containsDigit(String token) {

-		for (int i = 0; i < token.length(); i++) {

-			if (java.lang.Character.isDigit(token.charAt(i))) {

-				return true;

-			}

-		}

-		return false;

-	}

+  public static boolean containsDigit(String token) {

+    for (int i = 0; i < token.length(); i++) {

+      if (java.lang.Character.isDigit(token.charAt(i))) {

+        return true;

+      }

+    }

+    return false;

+  }

 

-	public static String CleanCharacter(String txt, int uValue) {

-		StringBuffer retVal = new StringBuffer();

-		for (int i = 0; i < txt.length(); i++) {

-			int uChar = (txt.charAt(i));

-			if (uChar != uValue) {

-				retVal.append(txt.charAt(i));

-			} else {

-				retVal.append(" ");

-			}

-		}

-		return retVal.toString();

-	}

+  public static String CleanCharacter(String txt, int uValue) {

+    StringBuffer retVal = new StringBuffer();

+    for (int i = 0; i < txt.length(); i++) {

+      int uChar = (txt.charAt(i));

+      if (uChar != uValue) {

+        retVal.append(txt.charAt(i));

+      } else {

+        retVal.append(" ");

+      }

+    }

+    return retVal.toString();

+  }

 

-	public static String removeHTMLTagsFromStr(String inputStr) {

-		String[] removeTags = StringUtils.substringsBetween(inputStr, "<", ">");

+  public static String removeHTMLTagsFromStr(String inputStr) {

+    String[] removeTags = StringUtils.substringsBetween(inputStr, "<", ">");

 

-		if (removeTags != null && removeTags.length > 0) {

-			for (String tag : removeTags) {

-				inputStr = StringUtils.remove(inputStr, "<" + tag + ">");

-			}

-		}

+    if (removeTags != null && removeTags.length > 0) {

+      for (String tag : removeTags) {

+        inputStr = StringUtils.remove(inputStr, "<" + tag + ">");

+      }

+    }

 

-		return inputStr;

-	}

+    return inputStr;

+  }

 

-	public static String unescapeHTML(String text) {

-		return org.apache.commons.lang.StringEscapeUtils.unescapeHtml(text);

-	}

+  public static String unescapeHTML(String text) {

+    return org.apache.commons.lang.StringEscapeUtils.unescapeHtml(text);

+  }

 

-	public static String stripHTML(String text) {

-		return text.replaceAll("\\<.*?>", "");

-	}

+  public static String stripHTML(String text) {

+    return text.replaceAll("\\<.*?>", "");

+  }

 

-	public static String stripScriptTags(String text) {

-		Pattern p = java.util.regex.Pattern.compile("\\<SCRIPT.*?</SCRIPT>",

-				Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

-		Matcher matcher = p.matcher(text);

-		String tmp = matcher.replaceAll("");

-		return tmp;

-	}

+  public static String stripScriptTags(String text) {

+    Pattern p = java.util.regex.Pattern.compile("\\<SCRIPT.*?</SCRIPT>",

+        Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

+    Matcher matcher = p.matcher(text);

+    String tmp = matcher.replaceAll("");

+    return tmp;

+  }

 

-	public static String stripNoScriptTags(String text) {

-		Pattern p = java.util.regex.Pattern.compile("\\<NOSCRIPT.*?</NOSCRIPT>",

-				Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

-		Matcher matcher = p.matcher(text);

-		String tmp = matcher.replaceAll("");

-		return tmp;

-	}

+  public static String stripNoScriptTags(String text) {

+    Pattern p = java.util.regex.Pattern.compile("\\<NOSCRIPT.*?</NOSCRIPT>",

+        Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

+    Matcher matcher = p.matcher(text);

+    String tmp = matcher.replaceAll("");

+    return tmp;

+  }

 

-	public static String stripHTMLMultiLine(String text,

-			HashSet<String> allowedHtmlTags, String escGtCh, String escLtCh) {

-		if (StringUtils.isNotEmpty(text)) {

+  public static String stripHTMLMultiLine(String text,

+      HashSet<String> allowedHtmlTags, String escGtCh, String escLtCh) {

+    if (StringUtils.isNotEmpty(text)) {

 

-			boolean hadAllowedHtmlTags = false;

+      boolean hadAllowedHtmlTags = false;

 

-			if (allowedHtmlTags != null) {

-				for (String htmlTag : allowedHtmlTags) {

-					String tmp = text.replaceAll("<" + htmlTag + ">", escLtCh + htmlTag

-							+ escGtCh);

-					tmp = tmp.replaceAll("</" + htmlTag + ">", escLtCh + "/" + htmlTag

-							+ escGtCh);

-					if (!tmp.equals(text)) {

-						text = tmp;

-						hadAllowedHtmlTags = true;

-					}

-				}

-			}

+      if (allowedHtmlTags != null) {

+        for (String htmlTag : allowedHtmlTags) {

+          String tmp = text.replaceAll("<" + htmlTag + ">", escLtCh + htmlTag

+              + escGtCh);

+          tmp = tmp.replaceAll("</" + htmlTag + ">", escLtCh + "/" + htmlTag

+              + escGtCh);

+          if (!tmp.equals(text)) {

+            text = tmp;

+            hadAllowedHtmlTags = true;

+          }

+        }

+      }

 

-			text = stripHTMLMultiLine(text);

+      text = stripHTMLMultiLine(text);

 

-			if (hadAllowedHtmlTags) {

-				text = text.replaceAll(escLtCh, "<");

-				text = text.replaceAll(escGtCh, ">");

-			}

-		}

+      if (hadAllowedHtmlTags) {

+        text = text.replaceAll(escLtCh, "<");

+        text = text.replaceAll(escGtCh, ">");

+      }

+    }

 

-		return text;

-	}

+    return text;

+  }

 

-	public static String stripHTMLMultiLine(String text) {

-		Pattern p = java.util.regex.Pattern.compile("\\<.*?>", Pattern.DOTALL);

-		Matcher matcher = p.matcher(text);

-		String tmp = matcher.replaceAll("");

-		return tmp;

-	}

+  public static String stripHTMLMultiLine(String text) {

+    Pattern p = java.util.regex.Pattern.compile("\\<.*?>", Pattern.DOTALL);

+    Matcher matcher = p.matcher(text);

+    String tmp = matcher.replaceAll("");

+    return tmp;

+  }

 

-	public static String stripHTMLCommentsMultiLine(String text) {

-		Pattern p = java.util.regex.Pattern.compile("\\<!--.*?-->", Pattern.DOTALL);

-		Matcher matcher = p.matcher(text);

-		String tmp = matcher.replaceAll("");

-		return tmp;

-	}

+  public static String stripHTMLCommentsMultiLine(String text) {

+    Pattern p = java.util.regex.Pattern.compile("\\<!--.*?-->", Pattern.DOTALL);

+    Matcher matcher = p.matcher(text);

+    String tmp = matcher.replaceAll("");

+    return tmp;

+  }

 

-	public static boolean isFlagSet(Integer flags, Integer flagToCheck) {

-		if (flags != null && flagToCheck != null) {

-			return ((flags & flagToCheck) == flagToCheck);

-		}

-		return false;

-	}

+  public static boolean isFlagSet(Integer flags, Integer flagToCheck) {

+    if (flags != null && flagToCheck != null) {

+      return ((flags & flagToCheck) == flagToCheck);

+    }

+    return false;

+  }

 

-	public static Integer updateFlag(Integer flags, Integer flagToCheck,

-			boolean shouldSet) {

-		if (shouldSet) {

-			return setFlag(flags, flagToCheck);

-		} else {

-			return resetFlag(flags, flagToCheck);

-		}

-	}

+  public static Integer updateFlag(Integer flags, Integer flagToCheck,

+      boolean shouldSet) {

+    if (shouldSet) {

+      return setFlag(flags, flagToCheck);

+    } else {

+      return resetFlag(flags, flagToCheck);

+    }

+  }

 

-	public static Integer setFlag(Integer flags, Integer flagToCheck) {

-		if (flags == null) {

-			flags = new Integer(0);

-		}

-		if (!isFlagSet(flags, flagToCheck)) {

-			flags = flags + flagToCheck;

-			;

-		}

-		return flags;

-	}

+  public static Integer setFlag(Integer flags, Integer flagToCheck) {

+    if (flags == null) {

+      flags = new Integer(0);

+    }

+    if (!isFlagSet(flags, flagToCheck)) {

+      flags = flags + flagToCheck;

+      ;

+    }

+    return flags;

+  }

 

-	public static Integer resetFlag(Integer flags, Integer flagToCheck) {

-		if (flags == null) {

-			// nothing to reset

-			flags = new Integer(0);

-			return flags;

-		}

+  public static Integer resetFlag(Integer flags, Integer flagToCheck) {

+    if (flags == null) {

+      // nothing to reset

+      flags = new Integer(0);

+      return flags;

+    }

 

-		if (isFlagSet(flags, flagToCheck)) {

-			flags = flags - flagToCheck;

-		}

-		return flags;

-	}

+    if (isFlagSet(flags, flagToCheck)) {

+      flags = flags - flagToCheck;

+    }

+    return flags;

+  }

 

-	public static String truncateOnSpace(String text, Integer length) {

-		String retVal = "";

-		if (text.length() <= length) {

-			retVal = text;

-		} else {

-			StringBuffer b = new StringBuffer();

-			for (int i = 0; i < text.length(); i++) {

-				if (b.length() >= length && Character.isWhitespace(text.charAt(i))) { // iterate

-					// until

-					// we

-					// hit

-					// whitespace

-					b.append("...");

-					break;

-				}

-				b.append(text.charAt(i));

-			}

-			retVal = b.toString();

-		}

+  public static String truncateOnSpace(String text, Integer length) {

+    String retVal = "";

+    if (text.length() <= length) {

+      retVal = text;

+    } else {

+      StringBuffer b = new StringBuffer();

+      for (int i = 0; i < text.length(); i++) {

+        if (b.length() >= length && Character.isWhitespace(text.charAt(i))) { // iterate

+          // until

+          // we

+          // hit

+          // whitespace

+          b.append("...");

+          break;

+        }

+        b.append(text.charAt(i));

+      }

+      retVal = b.toString();

+    }

 

-		return retVal.trim();

-	}

+    return retVal.trim();

+  }

 

-	public static String sanitizeString(String text) {

-		text = Utils.stripHTMLCommentsMultiLine(text);

-		text = Utils.stripHTMLMultiLine(text);

-		text = Utils.unescapeHTML(text);

-		text = StringUtils.trimToEmpty(text);

-		text = text.replaceAll("\\s+", " ");

-		return text;

-	}

+  public static String sanitizeString(String text) {

+    text = Utils.stripHTMLCommentsMultiLine(text);

+    text = Utils.stripHTMLMultiLine(text);

+    text = Utils.unescapeHTML(text);

+    text = StringUtils.trimToEmpty(text);

+    text = text.replaceAll("\\s+", " ");

+    return text;

+  }

 

-	public static String makeStringUrlSafe(String text) {

-		StringBuffer b = new StringBuffer();

-		for (int i = 0; i < text.length(); i++) {

-			if (StringUtils.isAlphanumericSpace(String.valueOf(text.charAt(i)))) {

-				b.append(text.charAt(i));

-			}

-		}

-		return Utils.convertToASCII(b.toString().replaceAll("\\s+", " "));

-	}

+  public static String makeStringUrlSafe(String text) {

+    StringBuffer b = new StringBuffer();

+    for (int i = 0; i < text.length(); i++) {

+      if (StringUtils.isAlphanumericSpace(String.valueOf(text.charAt(i)))) {

+        b.append(text.charAt(i));

+      }

+    }

+    return Utils.convertToASCII(b.toString().replaceAll("\\s+", " "));

+  }

 

-	public static String getEventIdFromNewsUrl(String url) {

-		String eventId = null;

-		String p = "news/([0-9]+)";

-		Pattern pattern = Pattern.compile(p);

-		Matcher matcher = pattern.matcher(url);

-		while (matcher.find()) {

-			// System.out.println("found: " + matcher.group(2));

-			eventId = matcher.group(1);

-		}

-		return eventId;

-	}

+  public static String getEventIdFromNewsUrl(String url) {

+    String eventId = null;

+    String p = "news/([0-9]+)";

+    Pattern pattern = Pattern.compile(p);

+    Matcher matcher = pattern.matcher(url);

+    while (matcher.find()) {

+      // System.out.println("found: " + matcher.group(2));

+      eventId = matcher.group(1);

+    }

+    return eventId;

+  }

 

-	public static String buildCommaSeparatedIds(List ids) {

+  public static String buildCommaSeparatedIds(List ids) {

 

-		if (ids != null && ids.size() > 0) {

-			StringBuffer sbuf = new StringBuffer();

+    if (ids != null && ids.size() > 0) {

+      StringBuffer sbuf = new StringBuffer();

 

-			for (int count = 0; count < ids.size(); count++) {

-				if (count > 0) {

-					sbuf.append(",");

-				}

-				sbuf.append(ids.get(count));

-			}

-			return sbuf.toString();

-		}

+      for (int count = 0; count < ids.size(); count++) {

+        if (count > 0) {

+          sbuf.append(",");

+        }

+        sbuf.append(ids.get(count));

+      }

+      return sbuf.toString();

+    }

 

-		return null;

-	}

+    return null;

+  }

 

-	public static float computeScoreForRanking(List<Float> scores,

-			int desiredRanking) {

-		float newScore = 0f;

+  public static float computeScoreForRanking(List<Float> scores,

+      int desiredRanking) {

+    float newScore = 0f;

 

-		if (desiredRanking == 1) {

-			newScore = scores.get(0) + 50000;

-		} else if (desiredRanking == scores.size()) {

-			newScore = scores.get(scores.size() - 1) - 1;

-		} else {

-			newScore = (scores.get(desiredRanking - 2) + scores

-					.get(desiredRanking - 1)) / 2;

-		}

+    if (desiredRanking == 1) {

+      newScore = scores.get(0) + 50000;

+    } else if (desiredRanking == scores.size()) {

+      newScore = scores.get(scores.size() - 1) - 1;

+    } else {

+      newScore = (scores.get(desiredRanking - 2) + scores

+          .get(desiredRanking - 1)) / 2;

+    }

 

-		return newScore;

-	}

+    return newScore;

+  }

 

-	public static String fullStripHTML(String text) {

-		text = Utils.stripScriptTags(text);

-		text = Utils.stripNoScriptTags(text);

-		text = Utils.stripStyleTags(text);

-		return text.replaceAll("\\<.*?>", "");

-	}

+  public static String fullStripHTML(String text) {

+    text = Utils.stripScriptTags(text);

+    text = Utils.stripNoScriptTags(text);

+    text = Utils.stripStyleTags(text);

+    return text.replaceAll("\\<.*?>", "");

+  }

 

-	public static String stripStyleTags(String text) {

-		Pattern p = java.util.regex.Pattern.compile("\\<STYLE.*?</STYLE>",

-				Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

-		Matcher matcher = p.matcher(text);

-		String tmp = matcher.replaceAll("");

-		return tmp;

-	}

+  public static String stripStyleTags(String text) {

+    Pattern p = java.util.regex.Pattern.compile("\\<STYLE.*?</STYLE>",

+        Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

+    Matcher matcher = p.matcher(text);

+    String tmp = matcher.replaceAll("");

+    return tmp;

+  }

 

-	public static boolean isLatinWord(String word) {

-		for (int i = 0; i < word.length(); i++) {

-			int asciiCode = (int) word.charAt(i);

-			if (asciiCode > 128)

-				return false;

-		}

+  public static boolean isLatinWord(String word) {

+    for (int i = 0; i < word.length(); i++) {

+      int asciiCode = (int) word.charAt(i);

+      if (asciiCode > 128)

+        return false;

+    }

 

-		return true;

-	}

+    return true;

+  }

 

-	static public void main(String[] args) {

-		System.out.println(isLatinWord("Performing Arts Center (SPAC)"));

-		System.out.println(isLatinWord("â€œJazz Ageâ€�"));

+  static public void main(String[] args) {

+    System.out.println(isLatinWord("Performing Arts Center (SPAC)"));

+    System.out.println(isLatinWord("â€œJazz Ageâ€�"));

 

-		System.out

-		.println(isLatinWord("Ã£Æ’â€¡Ã£Æ’Â¼Ã£Æ’â€œÃ£Æ’Æ’Ã£Æ’â€°Ã£Æ’Â»Ã£"));

-		System.out.println(isLatinWord("ÃƒÂ© ÃƒÂ±ÃƒÂ§ÃƒÂ¸ÃƒÂ¥ÃƒÂ³"));

-		System.out.println(isLatinWord("ÃƒÂ¹ÃƒÂ¬ÃƒÂ®ÃƒÂ¤ Ãƒ ÃƒÂ¸ÃƒÂ¶ÃƒÂ©"));

-		System.out

-		.println(isLatinWord("Ã©â„¢Â³Ã¦Â¸Â¯Ã§â€�Å¸, Ã©â„¢Ë†Ã¦Â¸Â¯Ã§â€�Å¸"));

+    System.out

+        .println(isLatinWord("Ã£Æ’â€¡Ã£Æ’Â¼Ã£Æ’â€œÃ£Æ’Æ’Ã£Æ’â€°Ã£Æ’Â»Ã£"));

+    System.out

+        .println(isLatinWord("ÃƒÂ© ÃƒÂ±ÃƒÂ§ÃƒÂ¸ÃƒÂ¥ÃƒÂ³"));

+    System.out

+        .println(isLatinWord("ÃƒÂ¹ÃƒÂ¬ÃƒÂ®ÃƒÂ¤ Ãƒ ÃƒÂ¸ÃƒÂ¶ÃƒÂ©"));

+    System.out

+        .println(isLatinWord("Ã©â„¢Â³Ã¦Â¸Â¯Ã§â€�Å¸, Ã©â„¢Ë†Ã¦Â¸Â¯Ã§â€�Å¸"));

 

-		System.out

-		.println(convertToASCII("Irvine Bay Hotel & Golf Club on Sunday, May 01 duringÃ‚Â Jazz on the Beach,Ã‚Â Tobago Jazz Experience alongsideÃ‚Â The Jazz Singer"));

-		System.out

-		.println(convertToASCII("This yearâ€™s event, held again at the wonderful Saratoga Performing Arts Center (SPAC)"));

-		System.out

-		.println(convertToASCII("and the great saxophone playing of Sam Rogers                Rush Hour Blues 2010 Â     .  "));

-		System.out

-		.println(convertToASCII("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ron Carter is among the most original, prolific "));

-		System.out

-		.println(convertToASCII("Â . Â Â Â Â Â Â Â Â Â Â Â  Ron Carter is among the most original, prolific. "));

-		// TODO deal with

-		// www.wmot.org/program-guide/program-listings/28th_annual_playboy_jazz_festiva_2006.htm

-		System.out

-		.println(convertToASCII("By the mid 1920â€™s,    during the period referred to as the â€œJazz Ageâ€�, jazz music was heard    in most major cities from the East Coast"));

+    System.out

+        .println(convertToASCII("Irvine Bay Hotel & Golf Club on Sunday, May 01 duringÃ‚Â Jazz on the Beach,Ã‚Â Tobago Jazz Experience alongsideÃ‚Â The Jazz Singer"));

+    System.out

+        .println(convertToASCII("This yearâ€™s event, held again at the wonderful Saratoga Performing Arts Center (SPAC)"));

+    System.out

+        .println(convertToASCII("and the great saxophone playing of Sam Rogers                Rush Hour Blues 2010 Â     .  "));

+    System.out

+        .println(convertToASCII("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ron Carter is among the most original, prolific "));

+    System.out

+        .println(convertToASCII("Â . Â Â Â Â Â Â Â Â Â Â Â  Ron Carter is among the most original, prolific. "));

+    // TODO deal with

+    // www.wmot.org/program-guide/program-listings/28th_annual_playboy_jazz_festiva_2006.htm

+    System.out

+        .println(convertToASCII("By the mid 1920â€™s,    during the period referred to as the â€œJazz Ageâ€�, jazz music was heard    in most major cities from the East Coast"));

 

-	}

+  }

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/EpistemicStatesTrainingSet.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/EpistemicStatesTrainingSet.java
index 556b64f..e67f9db 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/EpistemicStatesTrainingSet.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/EpistemicStatesTrainingSet.java

@@ -18,70 +18,96 @@
 

 import java.util.HashMap;

 

-public class EpistemicStatesTrainingSet

-{

+public class EpistemicStatesTrainingSet {

 

-	static public HashMap<String, String> class_setOfSentences = new HashMap<String, String>();

+  static public HashMap<String, String> class_setOfSentences = new HashMap<String, String>();

 

-	static

-	{

-		class_setOfSentences

-			.put(

-				"beginner",

-				"I'm fairly new to real cameras. "

-					+ "I am not a pro photographer. I am not a professional. "

-					+ "I have played around with friends digital cameras but never owned one myself. "

-					+ "First time buyer. I am a novice. Which camera is the most fool proof. I am a newbie. I am a beginner_xyz in cameras. I am just starting.");

+  static {

+    class_setOfSentences

+        .put(

+            "beginner",

+            "I'm fairly new to real cameras. "

+                + "I am not a pro photographer. I am not a professional. "

+                + "I have played around with friends digital cameras but never owned one myself. "

+                + "First time buyer. I am a novice. Which camera is the most fool proof. I am a newbie. I am a beginner_xyz in cameras. I am just starting.");

 

-		class_setOfSentences.put("normal user", "I am not looking to make money with photos. "

-			+ "Need a camera for family use .	The camera will be used mainly for taking pictures of kids and family. ");

+    class_setOfSentences

+        .put(

+            "normal user",

+            "I am not looking to make money with photos. "

+                + "Need a camera for family use .	The camera will be used mainly for taking pictures of kids and family. ");

 

-		class_setOfSentences.put("pro or semi pro user", "I am not looking for an entry level, more like semi-pro. "

-			+ "I am looking for an affordable professional camera. " + "looking for something professional. "

-			+ "I've shot a lot of film underwater using camera.");

+    class_setOfSentences.put("pro or semi pro user",

+        "I am not looking for an entry level, more like semi-pro. "

+            + "I am looking for an affordable professional camera. "

+            + "looking for something professional. "

+            + "I've shot a lot of film underwater using camera.");

 

-		class_setOfSentences.put("potential buyer", "I now want to get one of my own. "

-			+ "I need a camera that can handle conditions. " + "Which camera should I buy? "

-			+ "I would really like to get a camera with an optical viewfinder. " + "Need a camera for family use. "

-			+ "what camera would you recommend? " + "what camera should i buy? "

-			+ "I am looking for a camera that can serve a dual purpose. " + "Which camera is the most fool proof. "

-			+ "I am looking for a new camera to take with me to concerts. "

-			+ "I am looking for an affordable professional camera. " + "I want to buy a camera with features. "

-			+ "I am looking for a smaller camera. " + "what kind of camera should be purchased for the lab?  "

-			+ "I am looking to buy a mega zoom digital camera. " + "I was looking at a specific camera "

-			+ "what's the best compact camera? " + "I've been looking for a digital camera for my daughter. "

-			+ "I want a ultra zoom compact camera. " + "I need a new camera. "

-			+ "I am looking for a camera to take with me on vacation. "

-			+ "I still could not figure out what i should buy. ");

-		/*

-		 * I need a camera for Alaska trip I am looking for small camera for the night time I'm looking to upgrade to

-		 * something better. I need a replacement I am looking for one with better zoom and quality.

-		 */

+    class_setOfSentences

+        .put(

+            "potential buyer",

+            "I now want to get one of my own. "

+                + "I need a camera that can handle conditions. "

+                + "Which camera should I buy? "

+                + "I would really like to get a camera with an optical viewfinder. "

+                + "Need a camera for family use. "

+                + "what camera would you recommend? "

+                + "what camera should i buy? "

+                + "I am looking for a camera that can serve a dual purpose. "

+                + "Which camera is the most fool proof. "

+                + "I am looking for a new camera to take with me to concerts. "

+                + "I am looking for an affordable professional camera. "

+                + "I want to buy a camera with features. "

+                + "I am looking for a smaller camera. "

+                + "what kind of camera should be purchased for the lab?  "

+                + "I am looking to buy a mega zoom digital camera. "

+                + "I was looking at a specific camera "

+                + "what's the best compact camera? "

+                + "I've been looking for a digital camera for my daughter. "

+                + "I want a ultra zoom compact camera. "

+                + "I need a new camera. "

+                + "I am looking for a camera to take with me on vacation. "

+                + "I still could not figure out what i should buy. ");

+    /*

+     * I need a camera for Alaska trip I am looking for small camera for the

+     * night time I'm looking to upgrade to something better. I need a

+     * replacement I am looking for one with better zoom and quality.

+     */

 

-		// upgrade_xyz - required in matching expr; otherwise fail

-		class_setOfSentences.put("experienced buyer",

-			"I have read a lot of reviews but still have some questions on what camera is right for me. "

-				+ "I'm looking to upgrade_xyz to something better. " + "I need a replacement. I need a new camera!");

+    // upgrade_xyz - required in matching expr; otherwise fail

+    class_setOfSentences

+        .put(

+            "experienced buyer",

+            "I have read a lot of reviews but still have some questions on what camera is right for me. "

+                + "I'm looking to upgrade_xyz to something better. "

+                + "I need a replacement. I need a new camera!");

 

-		class_setOfSentences.put("open minded buyer", "I've been looking at some Canon models but am open to others. "

-			+ "I am open to all options just want a good quality camera. "

-			+ "I just cannot decide with all those cameras out there. " + "It comes down a few different canons. "

-			+ "There is just so many to choose from that I dont know what to pick. "

-			+ "what is the best compact camera? " + "i still could not figure out what i should buy. "

-			+ "I dont have brands that I like in particular. ");

+    class_setOfSentences

+        .put(

+            "open minded buyer",

+            "I've been looking at some Canon models but am open to others. "

+                + "I am open to all options just want a good quality camera. "

+                + "I just cannot decide with all those cameras out there. "

+                + "It comes down a few different canons. "

+                + "There is just so many to choose from that I dont know what to pick. "

+                + "what is the best compact camera? "

+                + "i still could not figure out what i should buy. "

+                + "I dont have brands that I like in particular. ");

 

-		class_setOfSentences.put("user with one brand in mind",

-			"No brand in particular but I have read that Canon makes good cameras. " + "I want to buy xyz camera. "

-				+ "Canon is my favorite brand. ");

+    class_setOfSentences.put("user with one brand in mind",

+        "No brand in particular but I have read that Canon makes good cameras. "

+            + "I want to buy xyz camera. " + "Canon is my favorite brand. ");

 

-		class_setOfSentences.put("already have a short list", "I am only looking at Nikon and Canon, maybe Sony. "

-			+ "I have narrowed my choice between these three cameras. " + "I am debating between these two. "

-			+ "Leaning toward Canon, Nikon, Sony but suggestions are welcome. "

-			+ "I'm looking at the camera and camera. " + "I have narrowed down my choices of camera. ");

-	}

+    class_setOfSentences.put("already have a short list",

+        "I am only looking at Nikon and Canon, maybe Sony. "

+            + "I have narrowed my choice between these three cameras. "

+            + "I am debating between these two. "

+            + "Leaning toward Canon, Nikon, Sony but suggestions are welcome. "

+            + "I'm looking at the camera and camera. "

+            + "I have narrowed down my choices of camera. ");

+  }

 

-	public EpistemicStatesTrainingSet()

-	{

-	}

+  public EpistemicStatesTrainingSet() {

+  }

 

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java
index c4ce29e..1dc100c 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java

@@ -19,7 +19,6 @@
 
 import java.util.List;
 
-
 import opennlp.tools.stemmer.PorterStemmer;
 
 public class LemmaFormManager {
@@ -70,7 +69,8 @@
     }
     try {
       if (ps != null) {
-        if (ps.stem(lemma1).toString().equalsIgnoreCase(ps.stem(lemma2).toString())) {
+        if (ps.stem(lemma1).toString()
+            .equalsIgnoreCase(ps.stem(lemma2).toString())) {
           return lemma1;
         }
       }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
index d37f593..74c685c 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java

@@ -20,436 +20,394 @@
 import java.util.ArrayList;

 import java.util.List;

 

-public class ParseTreeChunk

-{

-	private String mainPOS;

+public class ParseTreeChunk {

+  private String mainPOS;

 

-	private List<String> lemmas;

+  private List<String> lemmas;

 

-	private List<String> POSs;

+  private List<String> POSs;

 

-	private int startPos;

+  private int startPos;

 

-	private int endPos;

+  private int endPos;

 

-	private int size;

+  private int size;

 

-	private ParseTreeMatcher parseTreeMatcher;

+  private ParseTreeMatcher parseTreeMatcher;

 

-	private LemmaFormManager lemmaFormManager;

+  private LemmaFormManager lemmaFormManager;

 

-	private GeneralizationListReducer generalizationListReducer;

+  private GeneralizationListReducer generalizationListReducer;

 

-	public ParseTreeChunk()

-	{

-	}

+  public ParseTreeChunk() {

+  }

 

-	public ParseTreeChunk(List<String> lemmas, List<String> POSs, int startPos, int endPos)

-	{

-		this.lemmas = lemmas;

-		this.POSs = POSs;

-		this.startPos = startPos;

-		this.endPos = endPos;

+  public ParseTreeChunk(List<String> lemmas, List<String> POSs, int startPos,

+      int endPos) {

+    this.lemmas = lemmas;

+    this.POSs = POSs;

+    this.startPos = startPos;

+    this.endPos = endPos;

 

-		// phraseType.put(0, "np");

-	}

+    // phraseType.put(0, "np");

+  }

 

-	// constructor which takes lemmas and POS as lists so that phrases can be conveniently specified.

-	// usage: stand-alone runs

-	public ParseTreeChunk(String mPOS, String[] lemmas, String[] POSss)

-	{

-		this.mainPOS = mPOS;

-		this.lemmas = new ArrayList<String>();

-		for (String l : lemmas)

-		{

-			this.lemmas.add(l);

-		}

-		this.POSs = new ArrayList<String>();

-		for (String p : POSss)

-		{

-			this.POSs.add(p);

-		}

-	}

+  // constructor which takes lemmas and POS as lists so that phrases can be

+  // conveniently specified.

+  // usage: stand-alone runs

+  public ParseTreeChunk(String mPOS, String[] lemmas, String[] POSss) {

+    this.mainPOS = mPOS;

+    this.lemmas = new ArrayList<String>();

+    for (String l : lemmas) {

+      this.lemmas.add(l);

+    }

+    this.POSs = new ArrayList<String>();

+    for (String p : POSss) {

+      this.POSs.add(p);

+    }

+  }

 

-	// constructor which takes lemmas and POS as lists so that phrases can be conveniently specified.

-	// usage: stand-alone runs

-	public ParseTreeChunk(String mPOS, List<String> lemmas, List<String> POSss)

-	{

-		this.mainPOS = mPOS;

-		this.lemmas =  lemmas;

-		this.POSs = POSss;

-		

-	}

-	// Before:

-	// [0(S-At home we like to eat great pizza deals), 0(PP-At home), 0(IN-At), 3(NP-home), 3(NN-home), 8(NP-we),

-	// 8(PRP-we), 11(VP-like to eat great pizza deals), 11(VBP-like), 16(S-to eat great pizza deals), 16(VP-to eat great

-	// pizza deals),

-	// 16(TO-to), 19(VP-eat great pizza deals), 19(VB-eat), 23(NP-great pizza deals), 23(JJ-great), 29(NN-pizza),

-	// 35(NNS-deals)]

+  // constructor which takes lemmas and POS as lists so that phrases can be

+  // conveniently specified.

+  // usage: stand-alone runs

+  public ParseTreeChunk(String mPOS, List<String> lemmas, List<String> POSss) {

+    this.mainPOS = mPOS;

+    this.lemmas = lemmas;

+    this.POSs = POSss;

 

-	// After:

-	// [S [IN-At NP-home NP-we VBP-like ], PP [IN-At NP-home ], IN [IN-At ], NP [NP-home ], NN [NP-home ], NP [NP-we ],

-	// PRP [NP-we ], VP [VBP-like TO-to VB-eat JJ-great ], VBP [VBP-like ], S [TO-to VB-eat JJ-great NN-pizza ], VP

-	// [TO-to VB-eat JJ-great NN-pizza ], TO [TO-to ], VP [VB-eat JJ-great NN-pizza NNS-deals ],

-	// VB [VB-eat ], NP [JJ-great NN-pizza NNS-deals ], JJ [JJ-great ], NN [NN-pizza ], NNS [NNS-deals ]]

+  }

 

-	public List<ParseTreeChunk> buildChunks(List<LemmaPair> parseResults)

-	{

-		List<ParseTreeChunk> chunksResults = new ArrayList<ParseTreeChunk>();

-		for (LemmaPair chunk : parseResults)

-		{

-			String[] lemmasAr = chunk.getLemma().split(" ");

-			List<String> poss = new ArrayList<String>(), lems = new ArrayList<String>();

-			for (String lem : lemmasAr)

-			{

-				lems.add(lem);

-				// now looking for POSs for individual word

-				for (LemmaPair chunkCur : parseResults)

-				{

-					if (chunkCur.getLemma().equals(lem) &&

-					// check that this is a proper word in proper position

-						chunkCur.getEndPos() <= chunk.getEndPos() && chunkCur.getStartPos() >= chunk.getStartPos())

-					{

-						poss.add(chunkCur.getPOS());

-						break;

-					}

-				}

-			}

-			if (lems.size() != poss.size())

-			{

-				System.err.println("lems.size()!= poss.size()");

-			}

-			if (lems.size() < 2)

-			{ // single word phrase, nothing to match

-				continue;

-			}

-			ParseTreeChunk ch = new ParseTreeChunk(lems, poss, chunk.getStartPos(), chunk.getEndPos());

-			ch.setMainPOS(chunk.getPOS());

-			chunksResults.add(ch);

-		}

-		return chunksResults;

-	}

+  // Before:

+  // [0(S-At home we like to eat great pizza deals), 0(PP-At home), 0(IN-At),

+  // 3(NP-home), 3(NN-home), 8(NP-we),

+  // 8(PRP-we), 11(VP-like to eat great pizza deals), 11(VBP-like), 16(S-to eat

+  // great pizza deals), 16(VP-to eat great

+  // pizza deals),

+  // 16(TO-to), 19(VP-eat great pizza deals), 19(VB-eat), 23(NP-great pizza

+  // deals), 23(JJ-great), 29(NN-pizza),

+  // 35(NNS-deals)]

 

-	public List<List<ParseTreeChunk>> matchTwoSentencesGivenPairLists(List<LemmaPair> sent1Pairs,

-		List<LemmaPair> sent2Pairs)

-	{

+  // After:

+  // [S [IN-At NP-home NP-we VBP-like ], PP [IN-At NP-home ], IN [IN-At ], NP

+  // [NP-home ], NN [NP-home ], NP [NP-we ],

+  // PRP [NP-we ], VP [VBP-like TO-to VB-eat JJ-great ], VBP [VBP-like ], S

+  // [TO-to VB-eat JJ-great NN-pizza ], VP

+  // [TO-to VB-eat JJ-great NN-pizza ], TO [TO-to ], VP [VB-eat JJ-great

+  // NN-pizza NNS-deals ],

+  // VB [VB-eat ], NP [JJ-great NN-pizza NNS-deals ], JJ [JJ-great ], NN

+  // [NN-pizza ], NNS [NNS-deals ]]

 

-		List<ParseTreeChunk> chunk1List = buildChunks(sent1Pairs);

-		List<ParseTreeChunk> chunk2List = buildChunks(sent2Pairs);

+  public List<ParseTreeChunk> buildChunks(List<LemmaPair> parseResults) {

+    List<ParseTreeChunk> chunksResults = new ArrayList<ParseTreeChunk>();

+    for (LemmaPair chunk : parseResults) {

+      String[] lemmasAr = chunk.getLemma().split(" ");

+      List<String> poss = new ArrayList<String>(), lems = new ArrayList<String>();

+      for (String lem : lemmasAr) {

+        lems.add(lem);

+        // now looking for POSs for individual word

+        for (LemmaPair chunkCur : parseResults) {

+          if (chunkCur.getLemma().equals(lem)

+              &&

+              // check that this is a proper word in proper position

+              chunkCur.getEndPos() <= chunk.getEndPos()

+              && chunkCur.getStartPos() >= chunk.getStartPos()) {

+            poss.add(chunkCur.getPOS());

+            break;

+          }

+        }

+      }

+      if (lems.size() != poss.size()) {

+        System.err.println("lems.size()!= poss.size()");

+      }

+      if (lems.size() < 2) { // single word phrase, nothing to match

+        continue;

+      }

+      ParseTreeChunk ch = new ParseTreeChunk(lems, poss, chunk.getStartPos(),

+          chunk.getEndPos());

+      ch.setMainPOS(chunk.getPOS());

+      chunksResults.add(ch);

+    }

+    return chunksResults;

+  }

 

-		List<List<ParseTreeChunk>> sent1GrpLst = groupChunksAsParses(chunk1List);

-		List<List<ParseTreeChunk>> sent2GrpLst = groupChunksAsParses(chunk2List);

+  public List<List<ParseTreeChunk>> matchTwoSentencesGivenPairLists(

+      List<LemmaPair> sent1Pairs, List<LemmaPair> sent2Pairs) {

 

-		System.out.println("=== Grouped chunks 1 " + sent1GrpLst);

-		System.out.println("=== Grouped chunks 2 " + sent2GrpLst);

+    List<ParseTreeChunk> chunk1List = buildChunks(sent1Pairs);

+    List<ParseTreeChunk> chunk2List = buildChunks(sent2Pairs);

 

-		return matchTwoSentencesGroupedChunks(sent1GrpLst, sent2GrpLst);

-	}

+    List<List<ParseTreeChunk>> sent1GrpLst = groupChunksAsParses(chunk1List);

+    List<List<ParseTreeChunk>> sent2GrpLst = groupChunksAsParses(chunk2List);

 

-	// groups noun phrases, verb phrases, propos phrases etc. for separate match

+    System.out.println("=== Grouped chunks 1 " + sent1GrpLst);

+    System.out.println("=== Grouped chunks 2 " + sent2GrpLst);

 

-	public List<List<ParseTreeChunk>> groupChunksAsParses(List<ParseTreeChunk> parseResults)

-	{

-		List<ParseTreeChunk> np = new ArrayList<ParseTreeChunk>(), vp = new ArrayList<ParseTreeChunk>(), prp = new ArrayList<ParseTreeChunk>(), sbarp = new ArrayList<ParseTreeChunk>(), pp = new ArrayList<ParseTreeChunk>(), adjp = new ArrayList<ParseTreeChunk>(), whadvp = new ArrayList<ParseTreeChunk>(), restOfPhrasesTypes = new ArrayList<ParseTreeChunk>();

-		List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

-		for (ParseTreeChunk ch : parseResults)

-		{

-			String mainPos = ch.getMainPOS().toLowerCase();

+    return matchTwoSentencesGroupedChunks(sent1GrpLst, sent2GrpLst);

+  }

 

-			if (mainPos.equals("s"))

-			{

-				continue;

-			}

-			if (mainPos.equals("np"))

-			{

-				np.add(ch);

-			}

-			else if (mainPos.equals("vp"))

-			{

-				vp.add(ch);

-			}

-			else if (mainPos.equals("prp"))

-			{

-				prp.add(ch);

-			}

-			else if (mainPos.equals("pp"))

-			{

-				pp.add(ch);

-			}

-			else if (mainPos.equals("adjp"))

-			{

-				adjp.add(ch);

-			}

-			else if (mainPos.equals("whadvp"))

-			{

-				whadvp.add(ch);

-			}

-			else if (mainPos.equals("sbar"))

-			{

-				sbarp.add(ch);

-			}

-			else

-			{

-				restOfPhrasesTypes.add(ch);

-			}

+  // groups noun phrases, verb phrases, propos phrases etc. for separate match

 

-		}

-		results.add(np);

-		results.add(vp);

-		results.add(prp);

-		results.add(pp);

-		results.add(adjp);

-		results.add(whadvp);

-		results.add(restOfPhrasesTypes);

+  public List<List<ParseTreeChunk>> groupChunksAsParses(

+      List<ParseTreeChunk> parseResults) {

+    List<ParseTreeChunk> np = new ArrayList<ParseTreeChunk>(), vp = new ArrayList<ParseTreeChunk>(), prp = new ArrayList<ParseTreeChunk>(), sbarp = new ArrayList<ParseTreeChunk>(), pp = new ArrayList<ParseTreeChunk>(), adjp = new ArrayList<ParseTreeChunk>(), whadvp = new ArrayList<ParseTreeChunk>(), restOfPhrasesTypes = new ArrayList<ParseTreeChunk>();

+    List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

+    for (ParseTreeChunk ch : parseResults) {

+      String mainPos = ch.getMainPOS().toLowerCase();

 

-		return results;

+      if (mainPos.equals("s")) {

+        continue;

+      }

+      if (mainPos.equals("np")) {

+        np.add(ch);

+      } else if (mainPos.equals("vp")) {

+        vp.add(ch);

+      } else if (mainPos.equals("prp")) {

+        prp.add(ch);

+      } else if (mainPos.equals("pp")) {

+        pp.add(ch);

+      } else if (mainPos.equals("adjp")) {

+        adjp.add(ch);

+      } else if (mainPos.equals("whadvp")) {

+        whadvp.add(ch);

+      } else if (mainPos.equals("sbar")) {

+        sbarp.add(ch);

+      } else {

+        restOfPhrasesTypes.add(ch);

+      }

 

-	}

+    }

+    results.add(np);

+    results.add(vp);

+    results.add(prp);

+    results.add(pp);

+    results.add(adjp);

+    results.add(whadvp);

+    results.add(restOfPhrasesTypes);

 

-	// main function to generalize two expressions grouped by phrase types

-	// returns a list of generalizations for each phrase type with filtered sub-expressions

-	public List<List<ParseTreeChunk>> matchTwoSentencesGroupedChunks(List<List<ParseTreeChunk>> sent1,

-		List<List<ParseTreeChunk>> sent2)

-	{

-		List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

-		// first irerate through component

-		for (int comp = 0; comp < 2 && // just np & vp

-			comp < sent1.size() && comp < sent2.size(); comp++)

-		{

-			List<ParseTreeChunk> resultComps = new ArrayList<ParseTreeChunk>();

-			// then iterate through each phrase in each component

-			for (ParseTreeChunk ch1 : sent1.get(comp))

-			{

-				for (ParseTreeChunk ch2 : sent2.get(comp))

-				{ // simpler version

-					ParseTreeChunk chunkToAdd = parseTreeMatcher

-						.generalizeTwoGroupedPhrasesRandomSelectHighestScoreWithTransforms(ch1, ch2);

+    return results;

 

-					if (!lemmaFormManager.mustOccurVerifier(ch1, ch2, chunkToAdd))

-					{

-						continue; // if the words which have to stay do not stay, proceed to other elements

-					}

-					Boolean alreadyThere = false;

-					for (ParseTreeChunk chunk : resultComps)

-					{

-						if (chunk.equalsTo(chunkToAdd))

-						{

-							alreadyThere = true;

-							break;

-						}

+  }

 

-						if (parseTreeMatcher.generalizeTwoGroupedPhrasesRandomSelectHighestScore(chunk, chunkToAdd)

-							.equalsTo(chunkToAdd))

-						{

-							alreadyThere = true;

-							break;

-						}

-					}

+  // main function to generalize two expressions grouped by phrase types

+  // returns a list of generalizations for each phrase type with filtered

+  // sub-expressions

+  public List<List<ParseTreeChunk>> matchTwoSentencesGroupedChunks(

+      List<List<ParseTreeChunk>> sent1, List<List<ParseTreeChunk>> sent2) {

+    List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

+    // first irerate through component

+    for (int comp = 0; comp < 2 && // just np & vp

+        comp < sent1.size() && comp < sent2.size(); comp++) {

+      List<ParseTreeChunk> resultComps = new ArrayList<ParseTreeChunk>();

+      // then iterate through each phrase in each component

+      for (ParseTreeChunk ch1 : sent1.get(comp)) {

+        for (ParseTreeChunk ch2 : sent2.get(comp)) { // simpler version

+          ParseTreeChunk chunkToAdd = parseTreeMatcher

+              .generalizeTwoGroupedPhrasesRandomSelectHighestScoreWithTransforms(

+                  ch1, ch2);

 

-					if (!alreadyThere)

-					{

-						resultComps.add(chunkToAdd);

-					}

+          if (!lemmaFormManager.mustOccurVerifier(ch1, ch2, chunkToAdd)) {

+            continue; // if the words which have to stay do not stay, proceed to

+                      // other elements

+          }

+          Boolean alreadyThere = false;

+          for (ParseTreeChunk chunk : resultComps) {

+            if (chunk.equalsTo(chunkToAdd)) {

+              alreadyThere = true;

+              break;

+            }

 

-					List<ParseTreeChunk> resultCompsReduced = generalizationListReducer

-						.applyFilteringBySubsumption(resultComps);

-					// if (resultCompsReduced.size() != resultComps.size())

-					// System.out.println("reduction of gen list occurred");

-				}

-			}

-			results.add(resultComps);

-		}

+            if (parseTreeMatcher

+                .generalizeTwoGroupedPhrasesRandomSelectHighestScore(chunk,

+                    chunkToAdd).equalsTo(chunkToAdd)) {

+              alreadyThere = true;

+              break;

+            }

+          }

 

-		return results;

-	}

+          if (!alreadyThere) {

+            resultComps.add(chunkToAdd);

+          }

 

-	public Boolean equals(ParseTreeChunk ch)

-	{

-		List<String> lems = ch.getLemmas();

-		List<String> poss = ch.POSs;

+          List<ParseTreeChunk> resultCompsReduced = generalizationListReducer

+              .applyFilteringBySubsumption(resultComps);

+          // if (resultCompsReduced.size() != resultComps.size())

+          // System.out.println("reduction of gen list occurred");

+        }

+      }

+      results.add(resultComps);

+    }

 

-		if (this.lemmas.size() <= lems.size())

-			return false; // sub-chunk should be shorter than chunk

+    return results;

+  }

 

-		for (int i = 0; i < lems.size() && i < this.lemmas.size(); i++)

-		{

-			if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(poss.get(i))))

-				return false;

-		}

-		return true;

-	}

+  public Boolean equals(ParseTreeChunk ch) {

+    List<String> lems = ch.getLemmas();

+    List<String> poss = ch.POSs;

 

-	// 'this' is super - chunk of ch, ch is sub-chunk of 'this'

-	public Boolean isASubChunk(ParseTreeChunk ch)

-	{

-		List<String> lems = ch.getLemmas();

-		List<String> poss = ch.POSs;

+    if (this.lemmas.size() <= lems.size())

+      return false; // sub-chunk should be shorter than chunk

 

-		if (this.lemmas.size() < lems.size())

-			return false; // sub-chunk should be shorter than chunk

+    for (int i = 0; i < lems.size() && i < this.lemmas.size(); i++) {

+      if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(

+          poss.get(i))))

+        return false;

+    }

+    return true;

+  }

 

-		for (int i = 0; i < lems.size() && i < this.lemmas.size(); i++)

-		{

-			if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(poss.get(i))))

-				return false;

-		}

-		return true;

-	}

+  // 'this' is super - chunk of ch, ch is sub-chunk of 'this'

+  public Boolean isASubChunk(ParseTreeChunk ch) {

+    List<String> lems = ch.getLemmas();

+    List<String> poss = ch.POSs;

 

-	public Boolean equalsTo(ParseTreeChunk ch)

-	{

-		List<String> lems = ch.getLemmas();

-		List<String> poss = ch.POSs;

-		if (this.lemmas.size() != lems.size() || this.POSs.size() != poss.size())

-			return false;

+    if (this.lemmas.size() < lems.size())

+      return false; // sub-chunk should be shorter than chunk

 

-		for (int i = 0; i < lems.size(); i++)

-		{

-			if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(poss.get(i))))

-				return false;

-		}

+    for (int i = 0; i < lems.size() && i < this.lemmas.size(); i++) {

+      if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(

+          poss.get(i))))

+        return false;

+    }

+    return true;

+  }

 

-		return true;

-	}

+  public Boolean equalsTo(ParseTreeChunk ch) {

+    List<String> lems = ch.getLemmas();

+    List<String> poss = ch.POSs;

+    if (this.lemmas.size() != lems.size() || this.POSs.size() != poss.size())

+      return false;

 

-	public String toString()

-	{

-		String buf = " [";

-		if (mainPOS != null)

-			buf = mainPOS + " [";

-		for (int i = 0; i < lemmas.size() && i < POSs.size() // && i<=3

-		; i++)

-		{

-			buf += POSs.get(i) + "-" + lemmas.get(i) + " ";

-		}

-		return buf + "]";

-	}

+    for (int i = 0; i < lems.size(); i++) {

+      if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(

+          poss.get(i))))

+        return false;

+    }

 

-	public int compareTo(ParseTreeChunk o)

-	{

-		if (this.size > o.size)

-			return -1;

-		else

-			return 1;

+    return true;

+  }

 

-	}

+  public String toString() {

+    String buf = " [";

+    if (mainPOS != null)

+      buf = mainPOS + " [";

+    for (int i = 0; i < lemmas.size() && i < POSs.size() // && i<=3

+    ; i++) {

+      buf += POSs.get(i) + "-" + lemmas.get(i) + " ";

+    }

+    return buf + "]";

+  }

 

-	public String listToString(List<List<ParseTreeChunk>> chunks)

-	{

-		StringBuffer buf = new StringBuffer();

-		if (chunks.get(0).size() > 0)

-		{

-			buf.append(" np " + chunks.get(0).toString());

-		}

-		if (chunks.get(1).size() > 0)

-		{

-			buf.append(" vp " + chunks.get(1).toString());

-		}

-		if (chunks.size() < 3)

-		{

-			return buf.toString();

-		}

-		if (chunks.get(2).size() > 0)

-		{

-			buf.append(" prp " + chunks.get(2).toString());

-		}

-		if (chunks.get(3).size() > 0)

-		{

-			buf.append(" pp " + chunks.get(3).toString());

-		}

-		if (chunks.get(4).size() > 0)

-		{

-			buf.append(" adjp " + chunks.get(4).toString());

-		}

-		if (chunks.get(5).size() > 0)

-		{

-			buf.append(" whadvp " + chunks.get(5).toString());

-		}

-		/*

-		 * if (mainPos.equals("np")) np.add(ch); else if (mainPos.equals( "vp")) vp.add(ch); else if (mainPos.equals(

-		 * "prp")) prp.add(ch); else if (mainPos.equals( "pp")) pp.add(ch); else if (mainPos.equals( "adjp"))

-		 * adjp.add(ch); else if (mainPos.equals( "whadvp")) whadvp.add(ch);

-		 */

-		return buf.toString();

-	}

+  public int compareTo(ParseTreeChunk o) {

+    if (this.size > o.size)

+      return -1;

+    else

+      return 1;

 

-	public List<List<ParseTreeChunk>> obtainParseTreeChunkListByParsingList(String toParse)

-	{

-		List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

-		// if (toParse.endsWith("]]]")){

-		// toParse = toParse.replace("[[","").replace("]]","");

-		// }

-		toParse = toParse.replace(" ]], [ [", "&");

-		String[] phraseTypeFragments = toParse.trim().split("&");

-		for (String toParseFragm : phraseTypeFragments)

-		{

-			toParseFragm = toParseFragm.replace("],  [", "#");

+  }

 

-			List<ParseTreeChunk> resultsPhraseType = new ArrayList<ParseTreeChunk>();

-			String[] indivChunks = toParseFragm.trim().split("#");

-			for (String expr : indivChunks)

-			{

-				List<String> lems = new ArrayList<String>(), poss = new ArrayList<String>();

-				expr = expr.replace("[", "").replace(" ]", "");

-				String[] pairs = expr.trim().split(" ");

-				for (String word : pairs)

-				{

-					word = word.replace("]]", "").replace("]", "");

-					String[] pos_lem = word.split("-");

-					lems.add(pos_lem[1].trim());

-					poss.add(pos_lem[0].trim());

-				}

-				ParseTreeChunk ch = new ParseTreeChunk();

-				ch.setLemmas(lems);

-				ch.setPOSs(poss);

-				resultsPhraseType.add(ch);

-			}

-			results.add(resultsPhraseType);

-		}

-		System.out.println(results);

-		return results;

+  public String listToString(List<List<ParseTreeChunk>> chunks) {

+    StringBuffer buf = new StringBuffer();

+    if (chunks.get(0).size() > 0) {

+      buf.append(" np " + chunks.get(0).toString());

+    }

+    if (chunks.get(1).size() > 0) {

+      buf.append(" vp " + chunks.get(1).toString());

+    }

+    if (chunks.size() < 3) {

+      return buf.toString();

+    }

+    if (chunks.get(2).size() > 0) {

+      buf.append(" prp " + chunks.get(2).toString());

+    }

+    if (chunks.get(3).size() > 0) {

+      buf.append(" pp " + chunks.get(3).toString());

+    }

+    if (chunks.get(4).size() > 0) {

+      buf.append(" adjp " + chunks.get(4).toString());

+    }

+    if (chunks.get(5).size() > 0) {

+      buf.append(" whadvp " + chunks.get(5).toString());

+    }

+    /*

+     * if (mainPos.equals("np")) np.add(ch); else if (mainPos.equals( "vp"))

+     * vp.add(ch); else if (mainPos.equals( "prp")) prp.add(ch); else if

+     * (mainPos.equals( "pp")) pp.add(ch); else if (mainPos.equals( "adjp"))

+     * adjp.add(ch); else if (mainPos.equals( "whadvp")) whadvp.add(ch);

+     */

+    return buf.toString();

+  }

 

-		// 2.1 | Vietnam <b>embassy</b> <b>in</b> <b>Israel</b>: information on how to get your <b>visa</b> at Vietnam

-		// <b>embassy</b> <b>in</b> <b>Israel</b>. <b>...</b> <b>Spain</b>. Scotland. Sweden. Slovakia. Switzerland. T

-		// [Top of Page] <b>...</b>

-		// [[ [NN-* IN-in NP-israel ], [NP-* IN-in NP-israel ], [NP-* IN-* TO-* NN-* ], [NN-visa IN-* NN-* IN-in ]], [

-		// [VB-get NN-visa IN-* NN-* IN-in .-* ], [VBD-* IN-* NN-* NN-* .-* ], [VB-* NP-* ]]]

+  public List<List<ParseTreeChunk>> obtainParseTreeChunkListByParsingList(

+      String toParse) {

+    List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

+    // if (toParse.endsWith("]]]")){

+    // toParse = toParse.replace("[[","").replace("]]","");

+    // }

+    toParse = toParse.replace(" ]], [ [", "&");

+    String[] phraseTypeFragments = toParse.trim().split("&");

+    for (String toParseFragm : phraseTypeFragments) {

+      toParseFragm = toParseFragm.replace("],  [", "#");

 

-	}

+      List<ParseTreeChunk> resultsPhraseType = new ArrayList<ParseTreeChunk>();

+      String[] indivChunks = toParseFragm.trim().split("#");

+      for (String expr : indivChunks) {

+        List<String> lems = new ArrayList<String>(), poss = new ArrayList<String>();

+        expr = expr.replace("[", "").replace(" ]", "");

+        String[] pairs = expr.trim().split(" ");

+        for (String word : pairs) {

+          word = word.replace("]]", "").replace("]", "");

+          String[] pos_lem = word.split("-");

+          lems.add(pos_lem[1].trim());

+          poss.add(pos_lem[0].trim());

+        }

+        ParseTreeChunk ch = new ParseTreeChunk();

+        ch.setLemmas(lems);

+        ch.setPOSs(poss);

+        resultsPhraseType.add(ch);

+      }

+      results.add(resultsPhraseType);

+    }

+    System.out.println(results);

+    return results;

 

-	public void setMainPOS(String mainPOS)

-	{

-		this.mainPOS = mainPOS;

-	}

+    // 2.1 | Vietnam <b>embassy</b> <b>in</b> <b>Israel</b>: information on how

+    // to get your <b>visa</b> at Vietnam

+    // <b>embassy</b> <b>in</b> <b>Israel</b>. <b>...</b> <b>Spain</b>.

+    // Scotland. Sweden. Slovakia. Switzerland. T

+    // [Top of Page] <b>...</b>

+    // [[ [NN-* IN-in NP-israel ], [NP-* IN-in NP-israel ], [NP-* IN-* TO-* NN-*

+    // ], [NN-visa IN-* NN-* IN-in ]], [

+    // [VB-get NN-visa IN-* NN-* IN-in .-* ], [VBD-* IN-* NN-* NN-* .-* ], [VB-*

+    // NP-* ]]]

 

-	public String getMainPOS()

-	{

-		return mainPOS;

-	}

+  }

 

-	public List<String> getLemmas()

-	{

-		return lemmas;

-	}

+  public void setMainPOS(String mainPOS) {

+    this.mainPOS = mainPOS;

+  }

 

-	public void setLemmas(List<String> lemmas)

-	{

-		this.lemmas = lemmas;

-	}

+  public String getMainPOS() {

+    return mainPOS;

+  }

 

-	public List<String> getPOSs()

-	{

-		return POSs;

-	}

+  public List<String> getLemmas() {

+    return lemmas;

+  }

 

-	public void setPOSs(List<String> pOSs)

-	{

-		POSs = pOSs;

-	}

+  public void setLemmas(List<String> lemmas) {

+    this.lemmas = lemmas;

+  }

 

-	public ParseTreeMatcher getParseTreeMatcher()

-	{

-		return parseTreeMatcher;

-	}

+  public List<String> getPOSs() {

+    return POSs;

+  }

+

+  public void setPOSs(List<String> pOSs) {

+    POSs = pOSs;

+  }

+

+  public ParseTreeMatcher getParseTreeMatcher() {

+    return parseTreeMatcher;

+  }

 

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
index b0bd02b..a58b104 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java

@@ -28,8 +28,11 @@
   private LemmaFormManager lemmaFormManager = new LemmaFormManager();

 

   private POSManager posManager = new POSManager();

+

   /**

-   * key matching function which takes two phrases, aligns them and finds a set of maximum common sub-phrase

+   * key matching function which takes two phrases, aligns them and finds a set

+   * of maximum common sub-phrase

+   * 

    * @param chunk1

    * @param chunk2

    * @return

@@ -202,13 +205,15 @@
     return results;

   }

 

-  /** main function to generalize two expressions grouped by phrase types

-   * returns a list of generalizations for each phrase type with filtered

+  /**

+   * main function to generalize two expressions grouped by phrase types returns

+   * a list of generalizations for each phrase type with filtered

    * sub-expressions

    * 

    * @param sent1

    * @param sent2

-   * @return  List<List<ParseTreeChunk>> list of list of POS-words pairs for each resultant matched / overlapped phrase

+   * @return List<List<ParseTreeChunk>> list of list of POS-words pairs for each

+   *         resultant matched / overlapped phrase

    */

   public List<List<ParseTreeChunk>> matchTwoSentencesGroupedChunksDeterministic(

       List<List<ParseTreeChunk>> sent1, List<List<ParseTreeChunk>> sent2) {


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParserChunker2MatcherOlderOpenNLP.java.txt b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParserChunker2MatcherOlderOpenNLP.java.txt
deleted file mode 100644
index 9960079..0000000
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParserChunker2MatcherOlderOpenNLP.java.txt
+++ /dev/null

@@ -1,399 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.textsimilarity;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import opennlp.tools.lang.english.SentenceDetector;
-import opennlp.tools.lang.english.Tokenizer;
-import opennlp.tools.lang.english.TreebankParser;
-import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.chunking.Parser;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.util.Span;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.beans.factory.annotation.Autowired;
-
-public class ParserChunker2MatcherOlderOpenNLP {
-  public static final String resourcesDir = (System.getProperty("os.name")
-      .toLowerCase().indexOf("win") > -1 ? "C:/workspace/ZSearch/resources_external"
-      : "/var/search/solr-1.2/resources");
-  static private ParserChunker2MatcherOlderOpenNLP m_SyntMatcher = null;
-
-  private static final Logger LOG = LoggerFactory.getLogger(ParserChunker2MatcherOlderOpenNLP.class);
-
-  private SentenceDetectorME sentenceDetectorME = null;
-
-  private Tokenizer tokenizer = null;
-
-  private Parser parser = null;
-
-  private final boolean useTagDict = true;
-
-  private final boolean useCaseInsensitiveTagDict = false;
-
-  private final int beamSize = Parser.defaultBeamSize;
-
-  private final double advancePercentage = Parser.defaultAdvancePercentage;
-
-  private Map<String, List<List<ParseTreeChunk>>> parsingsCache = new HashMap<String, List<List<ParseTreeChunk>>>();
-
-  private ParseTreeChunkListScorer parseTreeChunkListScorer;
-
-  private ParseTreeMatcherDeterministic parseTreeMatcherDeterministic = new ParseTreeMatcherDeterministic();
-
-  /**
-   * Get the StopList singleton instance.
-   * 
-   * @return The StopList
-   */
-  static public ParserChunker2MatcherOlderOpenNLP getInstance() {
-    String dir = resourcesDir + "/models";
-    if (m_SyntMatcher == null) {
-      m_SyntMatcher = new ParserChunker2MatcherOlderOpenNLP();
-
-      try {
-        m_SyntMatcher.loadOpenNLP(dir);
-      } catch (Exception e) {
-        LOG.error("Problem loading openNLP! ", 2);
-      }
-    }
-    return m_SyntMatcher;
-  }
-
-  static public ParserChunker2MatcherOlderOpenNLP getInstance(String resourceDirSpec) {
-    String dir = resourceDirSpec + "/models";
-    if (m_SyntMatcher == null) {
-      m_SyntMatcher = new ParserChunker2MatcherOlderOpenNLP();
-
-      try {
-        m_SyntMatcher.loadOpenNLP(dir);
-      } catch (Exception e) {
-        e.printStackTrace();
-        LOG.error("Problem loading openNLP! ", e);
-      }
-    }
-    return m_SyntMatcher;
-  }
-
-  public ParserChunker2MatcherOlderOpenNLP() {
-    /*
-     * try { loadOpenNLP(resourcesDir); } catch (IOException e) {
-     * LOG.error("Problem loading openNLP! ", e); }
-     */
-  }
-
-  public ParserChunker2MatcherOlderOpenNLP(String resourcesDir) {
-    try {
-      loadOpenNLP(resourcesDir);
-    } catch (IOException e) {
-      LOG.error("Problem loading openNLP! ", e);
-    }
-  }
-
-  public ParserChunker2MatcherOlderOpenNLP(String resourcesDir, String language) {
-    try {
-      loadOpenNLP(resourcesDir, language);
-    } catch (IOException e) {
-      LOG.error("Problem loading openNLP! ", e);
-    }
-  }
-
-  protected void loadOpenNLP(String dir) throws IOException {
-    sentenceDetectorME = new SentenceDetector(dir
-        + "/sentdetect/EnglishSD.bin.gz");
-    tokenizer = new Tokenizer(dir + "/tokenize/EnglishTok.bin.gz");
-    parser = (Parser) TreebankParser.getParser(dir + "/parser", useTagDict,
-        useCaseInsensitiveTagDict, beamSize, advancePercentage);
-
-  }
-
-  protected void loadOpenNLP(String dir, String lang) throws IOException {
-    if (lang.equalsIgnoreCase("es")) {
-      sentenceDetectorME = new SentenceDetector(dir
-          + "/sentdetect/EnglishSD.bin.gz");
-      tokenizer = new Tokenizer(dir + "/tokenize/EnglishTok.bin.gz");
-      parser = (Parser) TreebankParser.getParser(dir + "/parser", useTagDict,
-          useCaseInsensitiveTagDict, beamSize, advancePercentage);
-    }
-  }
-
-  // TODO is synchronized needed here?
-  public synchronized Parse[] parseLine(String line, Parser p, double confidence) {
-    String[] tokens = tokenizer.tokenize(line);
-    // tokens = TextProcessor.fastTokenize(line, false).toArray(new String[0]);
-
-    StringBuilder sb = new StringBuilder();
-    for (String t : tokens)
-      sb.append(t).append(" ");
-
-    Parse[] ps = null;
-    try {
-      ps = TreebankParser.parseLine(sb.toString(), parser, 2);
-    } catch (Exception e) {
-      System.out.println("Problem parsing " + sb.toString());
-      e.printStackTrace(); // unable to parse for whatever reason
-    }
-    int i = 1;
-    for (; i < ps.length; i++) {
-      if (ps[i - 1].getProb() - ps[i].getProb() > confidence)
-        break;
-    }
-    if (i < ps.length) {
-      Parse[] retp = new Parse[i];
-      for (int j = 0; j < i; j++)
-        retp[j] = ps[j];
-      return retp;
-    } else
-      return ps;
-  }
-
-  // TODO is synchronized needed here?
-  protected synchronized Double[] getPhrasingAcceptabilityData(String line) {
-    int nParsings = 5;
-    String[] tokens = tokenizer.tokenize(line);
-    int numWords = tokens.length;
-    StringBuilder sb = new StringBuilder();
-    for (String t : tokens)
-      sb.append(t).append(" ");
-    Double result[] = new Double[5];
-
-    Parse[] ps = null;
-    try {
-      ps = TreebankParser.parseLine(sb.toString(), parser, nParsings);
-    } catch (Exception e) {
-      // unable to parse for whatever reason
-      for (int i = 0; i < result.length; i++) {
-        result[i] = -20.0;
-      }
-    }
-
-    for (int i = 0; i < ps.length; i++) {
-      result[i] = Math.abs(ps[i].getProb() / (double) numWords);
-    }
-    return result;
-  }
-
-  protected boolean allChildNodesArePOSTags(Parse p) {
-    Parse[] subParses = p.getChildren();
-    for (int pi = 0; pi < subParses.length; pi++)
-      if (!((Parse) subParses[pi]).isPosTag())
-        return false;
-    return true;
-  }
-
-  protected ArrayList<String> getNounPhrases(Parse p) {
-    ArrayList<String> nounphrases = new ArrayList<String>();
-
-    Parse[] subparses = p.getChildren();
-    for (int pi = 0; pi < subparses.length; pi++) {
-      // System.out.println("Processing Label: " + subparses[pi].getLabel());
-      // System.out.println("Processing Type: " + subparses[pi].getType());
-      if (subparses[pi].getType().equals("NP")
-          && allChildNodesArePOSTags(subparses[pi]))// &&
-      // ((Parse)subparses[pi]).getLabel()
-      // == "NP")
-      {
-        // System.out.println("Processing: " + subparses[pi].getLabel() +
-        // " as Chunk...");
-        Span _span = subparses[pi].getSpan();
-        nounphrases
-            .add(p.getText().substring(_span.getStart(), _span.getEnd()));
-      } else if (!((Parse) subparses[pi]).isPosTag())
-        nounphrases.addAll(getNounPhrases(subparses[pi]));
-    }
-
-    return nounphrases;
-  }
-
-  public List<LemmaPair> getAllPhrasesTWPairs(Parse p) {
-    List<String> nounphrases = new ArrayList<String>();
-    List<LemmaPair> LemmaPairs = new ArrayList<LemmaPair>();
-
-    Parse[] subparses = p.getChildren();
-    for (int pi = 0; pi < subparses.length; pi++) {
-      Span _span = subparses[pi].getSpan();
-
-      nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
-      String expr = p.getText().substring(_span.getStart(), _span.getEnd());
-
-      // if (expr.indexOf(" ")>0)
-      LemmaPairs.add(new LemmaPair(subparses[pi].getType(), expr, _span
-          .getStart()));
-      if (!((Parse) subparses[pi]).isPosTag())
-        LemmaPairs.addAll(getAllPhrasesTWPairs(subparses[pi]));
-    }
-
-    return LemmaPairs;
-  }
-
-  protected List<List<ParseTreeChunk>> matchOrigSentences(String sent1,
-      String sent2) {
-    // with tokenizer now
-    Parse[] parses1 = parseLine(sent1, parser, 1);
-    Parse[] parses2 = parseLine(sent2, parser, 1);
-    List<LemmaPair> origChunks1 = getAllPhrasesTWPairs(parses1[0]);
-    List<LemmaPair> origChunks2 = getAllPhrasesTWPairs(parses2[0]);
-    System.out.println(origChunks1);
-    System.out.println(origChunks2);
-
-    ParseTreeChunk matcher = new ParseTreeChunk();
-    List<List<ParseTreeChunk>> matchResult = matcher
-        .matchTwoSentencesGivenPairLists(origChunks1, origChunks2);
-    return matchResult;
-  }
-
-  public List<List<ParseTreeChunk>> matchOrigSentencesCache(String sent1,
-      String sent2) {
-    sent1 = sent1.replace("'s", " 's").replace(":", " ");
-    sent2 = sent2.replace("'s", " 's").replace(":", " ");
-
-    ParseTreeChunk matcher = new ParseTreeChunk();
-    List<List<ParseTreeChunk>> sent1GrpLst = null, sent2GrpLst = null;
-
-    sent1GrpLst = parsingsCache.get(sent1);
-    if (sent1GrpLst == null) {
-      List<LemmaPair> origChunks1 = new ArrayList<LemmaPair>();
-      String[] sents1 = sentenceDetectorME.sentDetect(sent1);
-      for (String s1 : sents1) {
-        Parse[] parses1 = parseLine(s1, parser, 1);
-        origChunks1.addAll(getAllPhrasesTWPairs(parses1[0]));
-      }
-      List<ParseTreeChunk> chunk1List = matcher.buildChunks(origChunks1);
-      sent1GrpLst = matcher.groupChunksAsParses(chunk1List);
-      parsingsCache.put(sent1, sent1GrpLst);
-      System.out.println(origChunks1);
-      // System.out.println("=== Grouped chunks 1 "+ sent1GrpLst);
-    }
-    sent2GrpLst = parsingsCache.get(sent2);
-    if (sent2GrpLst == null) {
-      List<LemmaPair> origChunks2 = new ArrayList<LemmaPair>();
-      String[] sents2 = sentenceDetectorME.sentDetect(sent2);
-      for (String s2 : sents2) {
-        Parse[] parses2 = parseLine(s2, parser, 1);
-        origChunks2.addAll(getAllPhrasesTWPairs(parses2[0]));
-      }
-      List<ParseTreeChunk> chunk2List = matcher.buildChunks(origChunks2);
-      sent2GrpLst = matcher.groupChunksAsParses(chunk2List);
-      parsingsCache.put(sent2, sent2GrpLst);
-      System.out.println(origChunks2);
-      // System.out.println("=== Grouped chunks 2 "+ sent2GrpLst);
-    }
-
-    return parseTreeMatcherDeterministic
-        .matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);
-
-  }
-
-  public SentencePairMatchResult assessRelevance(String minedSent1, String sent2) {
-    minedSent1 = minedSent1.replace("'s", " 's").replace(":", " ")
-        .replace("’s", " 's");
-    sent2 = sent2.replace("'s", " 's").replace(":", " ").replace("’s", " 's");
-
-    ParseTreeChunk matcher = new ParseTreeChunk();
-    List<List<ParseTreeChunk>> sent1GrpLst = null, sent2GrpLst = null;
-
-    // sent1GrpLst = parsingsCache.get(minedSent1);
-    // if (sent1GrpLst==null){
-    List<LemmaPair> origChunks1 = new ArrayList<LemmaPair>();
-    String[] sents1 = sentenceDetectorME.sentDetect(minedSent1);
-    for (String s1 : sents1) {
-      Parse[] parses1 = parseLine(s1, parser, 1);
-      origChunks1.addAll(getAllPhrasesTWPairs(parses1[0]));
-    }
-    List<ParseTreeChunk> chunk1List = matcher.buildChunks(origChunks1);
-    sent1GrpLst = matcher.groupChunksAsParses(chunk1List);
-    parsingsCache.put(minedSent1, sent1GrpLst);
-    // System.out.println(origChunks1);
-    // System.out.println("=== Grouped chunks 1 "+ sent1GrpLst);
-    // }
-    sent2GrpLst = parsingsCache.get(sent2);
-    if (sent2GrpLst == null) {
-      List<LemmaPair> origChunks2 = new ArrayList<LemmaPair>();
-      String[] sents2 = sentenceDetectorME.sentDetect(sent2);
-      for (String s2 : sents2) {
-        Parse[] parses2 = parseLine(s2, parser, 1);
-        origChunks2.addAll(getAllPhrasesTWPairs(parses2[0]));
-      }
-      List<ParseTreeChunk> chunk2List = matcher.buildChunks(origChunks2);
-      sent2GrpLst = matcher.groupChunksAsParses(chunk2List);
-      parsingsCache.put(sent2, sent2GrpLst);
-      // System.out.println(origChunks2);
-      // System.out.println("=== Grouped chunks 2 "+ sent2GrpLst);
-    }
-
-    ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic();
-    List<List<ParseTreeChunk>> res = md
-        .matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);
-    return new SentencePairMatchResult(res, origChunks1);
-
-  }
-
-  public Map<String, List<LemmaPair>> findMappingBetweenSentencesOfAParagraphAndAClassReps(
-      String para1, String classStr) {
-    // profile of matches
-    List<List<List<ParseTreeChunk>>> matchResultPerSentence = new ArrayList<List<List<ParseTreeChunk>>>();
-
-    ParseTreeChunk matcher = new ParseTreeChunk();
-
-    String[] sents = sentenceDetectorME.sentDetect(para1);
-    String[] classSents = sentenceDetectorME.sentDetect(classStr);
-
-    List<List<LemmaPair>> parseSentList = new ArrayList<List<LemmaPair>>();
-    for (String s : sents) {
-      parseSentList.add(getAllPhrasesTWPairs((parseLine(s, parser, 1)[0])));
-    }
-
-    List<List<LemmaPair>> parseClassList = new ArrayList<List<LemmaPair>>();
-    for (String s : classSents) {
-      parseClassList.add(getAllPhrasesTWPairs((parseLine(s, parser, 1)[0])));
-    }
-
-    Map<String, List<LemmaPair>> sentence_bestClassRep = new HashMap<String, List<LemmaPair>>();
-    for (List<LemmaPair> chunksSent : parseSentList) {
-      Double maxScore = -1.0;
-      for (List<LemmaPair> chunksClass : parseClassList) {
-        List<List<ParseTreeChunk>> matchResult = matcher
-            .matchTwoSentencesGivenPairLists(chunksSent, chunksClass);
-        Double score = parseTreeChunkListScorer
-            .getParseTreeChunkListScore(matchResult);
-        if (score > maxScore) {
-          maxScore = score;
-          sentence_bestClassRep.put(chunksSent.toString(), chunksClass);
-        }
-      }
-    }
-    return sentence_bestClassRep;
-  }
-
-  public SentenceDetectorME getSentenceDetectorME() {
-    return sentenceDetectorME;
-  }
-
-  public Parser getParser() {
-    return parser;
-  }
-}
-
-// -Xms500M -Xmx500M

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
index 9c47602..1a2dc75 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java

@@ -24,7 +24,8 @@
 

 public class SentencePairMatchResult {

   public List<List<ParseTreeChunk>> matchResult;

-  private static Logger LOG = Logger.getLogger("opennlp.tools.textsimilarity.SentencePairMatchResult");

+  private static Logger LOG = Logger

+      .getLogger("opennlp.tools.textsimilarity.SentencePairMatchResult");

 

   public List<List<ParseTreeChunk>> getMatchResult() {

     return matchResult;

@@ -69,7 +70,7 @@
     super();

     verbExists = false;

     imperativeVerb = false;

-    //LOG.info("Assessing sentence for inclusion " + resForMinedSent1);

+    // LOG.info("Assessing sentence for inclusion " + resForMinedSent1);

     this.matchResult = matchResult;

     this.resForMinedSent1 = resForMinedSent1;

     for (LemmaPair word : resForMinedSent1) {

@@ -77,19 +78,20 @@
           && StringUtils.isAlpha(word.getLemma())) {// ||

                                                     // word.getPOS().startsWith("VP"))

         verbExists = true;

-        //LOG.info("Found verb=" + word);

+        // LOG.info("Found verb=" + word);

       }

     }

     // various form of sales pitch: 'get something', or 'we offer'

-    if (resForMinedSent1.size()>2 && (resForMinedSent1.get(1).getLemma().startsWith("We")

-        || resForMinedSent1.get(2).getLemma().startsWith("We")))

+    if (resForMinedSent1.size() > 2

+        && (resForMinedSent1.get(1).getLemma().startsWith("We") || resForMinedSent1

+            .get(2).getLemma().startsWith("We")))

       imperativeVerb = true;

-    

+

     for (LemmaPair word : resForMinedSent1) {

       if (word.getPOS().startsWith("VB") && word.getStartPos() < 1

           && word.getEndPos() < 1) {

         imperativeVerb = true;

-        //LOG.info("Found imperative verb=" + word);

+        // LOG.info("Found imperative verb=" + word);

       }

     }

 


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
index d3526ef..37d83aa 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java

@@ -36,10 +36,10 @@
 

 import org.apache.commons.lang.StringUtils;

 

-

 public class TextProcessor {

 

-  private static final Logger LOG = Logger.getLogger("opennlp.tools.textsimilarity.TextProcessor");

+  private static final Logger LOG = Logger

+      .getLogger("opennlp.tools.textsimilarity.TextProcessor");

 

   static final String[] abbrevs = { "mr.", "mrs.", "sen.", "rep.", "gov.",

       "miss.", "dr.", "oct.", "nov.", "jan.", "feb.", "mar.", "apr.", "may",

@@ -278,12 +278,13 @@
 

     return retVal;

   }

-  

-  public static String removePunctuation(String sentence){

-	  List<String> toks = fastTokenize( sentence, false);

-	  return toks.toString().replace('[', ' ').replace(']', ' ').replace(',', ' ').replace("  ", " ");

+

+  public static String removePunctuation(String sentence) {

+    List<String> toks = fastTokenize(sentence, false);

+    return toks.toString().replace('[', ' ').replace(']', ' ')

+        .replace(',', ' ').replace("  ", " ");

   }

-  

+

   public static ArrayList<String> fastTokenize(String txt, boolean retainPunc) {

     ArrayList<String> tokens = new ArrayList<String>();

     if (StringUtils.isEmpty(txt)) {

@@ -426,7 +427,7 @@
 

       return text.substring(start, end + 1);

     } catch (RuntimeException e) {

-      LOG.severe("RuntimeException "+ e);

+      LOG.severe("RuntimeException " + e);

       e.printStackTrace();

       return "";

     }

@@ -454,7 +455,7 @@
 

       return text.substring(start, end + 1);

     } catch (RuntimeException e) {

-      LOG.severe("RuntimeException "+ e);

+      LOG.severe("RuntimeException " + e);

       return "";

     }

   }

@@ -534,7 +535,7 @@
   public static String stemTerm(String term) {

     term = stripToken(term);

     PorterStemmer st = new PorterStemmer();

-    

+

     return st.stem(term).toString();

   }

 

@@ -546,12 +547,12 @@
       try {

         md = MessageDigest.getInstance("SHA"); // step 2

       } catch (NoSuchAlgorithmException e) {

-        LOG.severe("NoSuchAlgorithmException " +  2);

+        LOG.severe("NoSuchAlgorithmException " + 2);

       }

       try {

         md.update(s.getBytes("UTF-8")); // step 3

       } catch (UnsupportedEncodingException e) {

-        LOG.severe("UnsupportedEncodingException "+ e);

+        LOG.severe("UnsupportedEncodingException " + e);

       }

       byte raw[] = md.digest();

       hash = null; // (new BASE64Encoder()).encode(raw);

@@ -617,7 +618,7 @@
       try {

         md.update(hashString.getBytes("UTF-8")); // step 3

       } catch (UnsupportedEncodingException e) {

-        LOG.severe("UnsupportedEncodingException "+ e);

+        LOG.severe("UnsupportedEncodingException " + e);

         throw new Exception(e.getMessage());

       }

       byte raw[] = md.digest();


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextSimilarityBagOfWords.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextSimilarityBagOfWords.java
index 66aa9c0..5e7c316 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextSimilarityBagOfWords.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextSimilarityBagOfWords.java

@@ -21,878 +21,873 @@
 import java.util.Arrays;

 import java.util.List;

 

-public class TextSimilarityBagOfWords

-{

-	public double assessRelevanceAndGetScore(String para1, String para2)

-	{

-		List<String> wordsOfPara1 = TextProcessor.fastTokenize(para1, false); 

-		List<String> wordsOfPara2 = TextProcessor.fastTokenize(para2, false); 

-		List<String> overlap = new ArrayList<String>(wordsOfPara1);

-		overlap.retainAll(wordsOfPara2);

-		overlap.removeAll(Arrays.asList(stopList));

-		

-		return overlap.size();

-	}

-	

-	public String[] stopList = new String[]{

-			"a",

+public class TextSimilarityBagOfWords {

+  public double assessRelevanceAndGetScore(String para1, String para2) {

+    List<String> wordsOfPara1 = TextProcessor.fastTokenize(para1, false);

+    List<String> wordsOfPara2 = TextProcessor.fastTokenize(para2, false);

+    List<String> overlap = new ArrayList<String>(wordsOfPara1);

+    overlap.retainAll(wordsOfPara2);

+    overlap.removeAll(Arrays.asList(stopList));

 

-			"about",

+    return overlap.size();

+  }

 

-			"above",

+  public String[] stopList = new String[] { "a",

 

-			"across",

+  "about",

 

-			"after",

+  "above",

 

-			"again",

+  "across",

 

-			"against",

+  "after",

 

-			"all",

+  "again",

 

-			"almost",

+  "against",

 

-			"alone",

+  "all",

 

-			"along",

+  "almost",

 

-			"already",

+  "alone",

 

-			"also",

+  "along",

 

-			"although",

+  "already",

 

-			"always",

+  "also",

 

-			"among",

+  "although",

 

-			"an",

+  "always",

 

-			"and",

+  "among",

 

-			"another",

+  "an",

 

-			"any",

+  "and",

 

-			"anybody",

+  "another",

 

-			"anyone",

+  "any",

 

-			"anything",

+  "anybody",

 

-			"anywhere",

+  "anyone",

 

-			"are",

+  "anything",

 

-			"area",

+  "anywhere",

 

-			"areas",

+  "are",

 

-			"around",

+  "area",

 

-			"as",

+  "areas",

 

-			"ask",

+  "around",

 

-			"asked",

+  "as",

 

-			"asking",

+  "ask",

 

-			"asks",

+  "asked",

 

-			"at",

+  "asking",

 

-			"away",

+  "asks",

 

-			"b",

+  "at",

 

-			"back",

+  "away",

 

-			"backed",

+  "b",

 

-			"backing",

+  "back",

 

-			"backs",

+  "backed",

 

-			"be",

+  "backing",

 

-			"became",

+  "backs",

 

-			"because",

+  "be",

 

-			"become",

+  "became",

 

-			"becomes",

+  "because",

 

-			"been",

+  "become",

 

-			"before",

+  "becomes",

 

-			"began",

+  "been",

 

-			"behind",

+  "before",

 

-			"being",

+  "began",

 

-			"beings",

+  "behind",

 

-			"best",

+  "being",

 

-			"better",

+  "beings",

 

-			"between",

+  "best",

 

-			"big",

+  "better",

 

-			"both",

+  "between",

 

-			"but",

+  "big",

 

-			"by",

+  "both",

 

-			"c",

+  "but",

 

-			"came",

+  "by",

 

-			"can",

+  "c",

 

-			"cannot",

+  "came",

 

-			"case",

+  "can",

 

-			"cases",

+  "cannot",

 

-			"certain",

+  "case",

 

-			"certainly",

+  "cases",

 

-			"clear",

+  "certain",

 

-			"clearly",

+  "certainly",

 

-			"come",

+  "clear",

 

-			"could",

+  "clearly",

 

-			"d",

+  "come",

 

-			"did",

+  "could",

 

-			"differ",

+  "d",

 

-			"different",

+  "did",

 

-			"differently",

+  "differ",

 

-			"do",

+  "different",

 

-			"does",

+  "differently",

 

-			"done",

+  "do",

 

-			"down",

+  "does",

 

-			"down",

+  "done",

 

-			"downed",

+  "down",

 

-			"downing",

+  "down",

 

-			"downs",

+  "downed",

 

-			"during",

+  "downing",

 

-			"e",

+  "downs",

 

-			"each",

+  "during",

 

-			"early",

+  "e",

 

-			"either",

+  "each",

 

-			"end",

+  "early",

 

-			"ended",

+  "either",

 

-			"ending",

+  "end",

 

-			"ends",

+  "ended",

 

-			"enough",

+  "ending",

 

-			"even",

+  "ends",

 

-			"evenly",

+  "enough",

 

-			"ever",

+  "even",

 

-			"every",

+  "evenly",

 

-			"everybody",

+  "ever",

 

-			"everyone",

+  "every",

 

-			"everything",

+  "everybody",

 

-			"everywhere",

+  "everyone",

 

-			"f",

+  "everything",

 

-			"face",

+  "everywhere",

 

-			"faces",

+  "f",

 

-			"fact",

+  "face",

 

-			"facts",

+  "faces",

 

-			"far",

+  "fact",

 

-			"felt",

+  "facts",

 

-			"few",

+  "far",

 

-			"find",

+  "felt",

 

-			"finds",

+  "few",

 

-			"first",

+  "find",

 

-			"for",

+  "finds",

 

-			"four",

+  "first",

 

-			"from",

+  "for",

 

-			"full",

+  "four",

 

-			"fully",

+  "from",

 

-			"further",

+  "full",

 

-			"furthered",

+  "fully",

 

-			"furthering",

+  "further",

 

-			"furthers",

+  "furthered",

 

-			"g",

+  "furthering",

 

-			"gave",

+  "furthers",

 

-			"general",

+  "g",

 

-			"generally",

+  "gave",

 

-			"get",

+  "general",

 

-			"gets",

+  "generally",

 

-			"give",

+  "get",

 

-			"given",

+  "gets",

 

-			"gives",

+  "give",

 

-			"go",

+  "given",

 

-			"going",

+  "gives",

 

-			"good",

+  "go",

 

-			"goods",

+  "going",

 

-			"got",

+  "good",

 

-			"great",

+  "goods",

 

-			"greater",

+  "got",

 

-			"greatest",

+  "great",

 

-			"group",

+  "greater",

 

-			"grouped",

+  "greatest",

 

-			"grouping",

+  "group",

 

-			"groups",

+  "grouped",

 

-			"h",

+  "grouping",

 

-			"had",

+  "groups",

 

-			"has",

+  "h",

 

-			"have",

+  "had",

 

-			"having",

+  "has",

 

-			"he",

+  "have",

 

-			"her",

+  "having",

 

-			"here",

+  "he",

 

-			"herself",

+  "her",

 

-			"high",

+  "here",

 

-			"high",

+  "herself",

 

-			"high",

+  "high",

 

-			"higher",

+  "high",

 

-			"highest",

+  "high",

 

-			"him",

+  "higher",

 

-			"himself",

+  "highest",

 

-			"his",

+  "him",

 

-			"how",

+  "himself",

 

-			"however",

+  "his",

 

-			"i",

+  "how",

 

-			"if",

+  "however",

 

-			"important",

+  "i",

 

-			"in",

+  "if",

 

-			"interest",

+  "important",

 

-			"interested",

+  "in",

 

-			"interesting",

+  "interest",

 

-			"interests",

+  "interested",

 

-			"into",

+  "interesting",

 

-			"is",

+  "interests",

 

-			"it",

+  "into",

 

-			"its",

+  "is",

 

-			"itself",

+  "it",

 

-			"j",

+  "its",

 

-			"just",

+  "itself",

 

-			"k",

+  "j",

 

-			"keep",

+  "just",

 

-			"keeps",

+  "k",

 

-			"kind",

+  "keep",

 

-			"knew",

+  "keeps",

 

-			"know",

+  "kind",

 

-			"known",

+  "knew",

 

-			"knows",

+  "know",

 

-			"l",

+  "known",

 

-			"large",

+  "knows",

 

-			"largely",

+  "l",

 

-			"last",

+  "large",

 

-			"later",

+  "largely",

 

-			"latest",

+  "last",

 

-			"least",

+  "later",

 

-			"less",

+  "latest",

 

-			"let",

+  "least",

 

-			"lets",

+  "less",

 

-			"like",

+  "let",

 

-			"likely",

+  "lets",

 

-			"long",

+  "like",

 

-			"longer",

+  "likely",

 

-			"longest",

+  "long",

 

-			"m",

+  "longer",

 

-			"made",

+  "longest",

 

-			"make",

+  "m",

 

-			"making",

+  "made",

 

-			"man",

+  "make",

 

-			"many",

+  "making",

 

-			"may",

+  "man",

 

-			"me",

+  "many",

 

-			"member",

+  "may",

 

-			"members",

+  "me",

 

-			"men",

+  "member",

+

+  "members",

 

-			"might",

+  "men",

 

-			"more",

+  "might",

 

-			"most",

+  "more",

 

-			"mostly",

+  "most",

 

-			"mr",

+  "mostly",

 

-			"mrs",

+  "mr",

 

-			"much",

+  "mrs",

 

-			"must",

+  "much",

 

-			"my",

+  "must",

 

-			"myself",

+  "my",

 

-			"n",

+  "myself",

 

-			"necessary",

+  "n",

 

-			"need",

+  "necessary",

 

-			"needed",

+  "need",

 

-			"needing",

+  "needed",

 

-			"needs",

+  "needing",

 

-			"never",

+  "needs",

 

-			"new",

+  "never",

 

-			"new",

+  "new",

 

-			"newer",

+  "new",

 

-			"newest",

+  "newer",

 

-			"next",

+  "newest",

 

-			"no",

+  "next",

 

-			"nobody",

+  "no",

 

-			"non",

+  "nobody",

 

-			"noone",

+  "non",

 

-			"not",

+  "noone",

 

-			"nothing",

+  "not",

 

-			"now",

+  "nothing",

 

-			"nowhere",

+  "now",

 

-			"number",

+  "nowhere",

 

-			"numbers",

+  "number",

 

-			"o",

+  "numbers",

 

-			"of",

+  "o",

 

-			"off",

+  "of",

 

-			"often",

+  "off",

 

-			"old",

+  "often",

 

-			"older",

+  "old",

 

-			"oldest",

+  "older",

 

-			"on",

+  "oldest",

 

-			"once",

+  "on",

 

-			"one",

+  "once",

 

-			"only",

+  "one",

 

-			"open",

+  "only",

 

-			"opened",

+  "open",

 

-			"opening",

+  "opened",

 

-			"opens",

+  "opening",

 

-			"or",

+  "opens",

 

-			"order",

+  "or",

 

-			"ordered",

+  "order",

 

-			"ordering",

+  "ordered",

 

-			"orders",

+  "ordering",

 

-			"other",

+  "orders",

 

-			"others",

+  "other",

 

-			"our",

+  "others",

 

-			"out",

+  "our",

 

-			"over",

+  "out",

 

-			"p",

+  "over",

 

-			"part",

+  "p",

 

-			"parted",

+  "part",

 

-			"parting",

+  "parted",

 

-			"parts",

+  "parting",

 

-			"per",

+  "parts",

 

-			"perhaps",

+  "per",

 

-			"place",

+  "perhaps",

 

-			"places",

+  "place",

 

-			"point",

+  "places",

 

-			"pointed",

+  "point",

 

-			"pointing",

+  "pointed",

 

-			"points",

+  "pointing",

 

-			"possible",

+  "points",

 

-			"present",

+  "possible",

 

-			"presented",

+  "present",

 

-			"presenting",

+  "presented",

 

-			"presents",

+  "presenting",

 

-			"problem",

+  "presents",

 

-			"problems",

+  "problem",

 

-			"put",

+  "problems",

 

-			"puts",

+  "put",

 

-			"q",

+  "puts",

 

-			"quite",

+  "q",

 

-			"r",

+  "quite",

 

-			"rather",

+  "r",

 

-			"really",

+  "rather",

 

-			"right",

+  "really",

 

-			"right",

+  "right",

 

-			"room",

+  "right",

 

-			"rooms",

+  "room",

 

-			"s",

+  "rooms",

 

-			"said",

+  "s",

 

-			"same",

+  "said",

 

-			"saw",

+  "same",

 

-			"say",

+  "saw",

 

-			"says",

+  "say",

 

-			"second",

+  "says",

 

-			"seconds",

+  "second",

 

-			"see",

+  "seconds",

 

-			"seem",

+  "see",

 

-			"seemed",

+  "seem",

 

-			"seeming",

+  "seemed",

 

-			"seems",

+  "seeming",

 

-			"sees",

+  "seems",

 

-			"several",

+  "sees",

 

-			"shall",

+  "several",

 

-			"she",

+  "shall",

 

-			"should",

+  "she",

 

-			"show",

+  "should",

 

-			"showed",

+  "show",

 

-			"showing",

+  "showed",

 

-			"shows",

+  "showing",

 

-			"side",

+  "shows",

 

-			"sides",

+  "side",

 

-			"since",

+  "sides",

 

-			"small",

+  "since",

 

-			"smaller",

+  "small",

 

-			"smallest",

+  "smaller",

 

-			"so",

+  "smallest",

 

-			"some",

+  "so",

 

-			"somebody",

+  "some",

 

-			"someone",

+  "somebody",

 

-			"something",

+  "someone",

 

-			"somewhere",

+  "something",

 

-			"state",

+  "somewhere",

 

-			"states",

+  "state",

 

-			"still",

+  "states",

 

-			"still",

+  "still",

 

-			"such",

+  "still",

 

-			"sure",

+  "such",

 

-			"t",

+  "sure",

 

-			"take",

+  "t",

 

-			"taken",

+  "take",

 

-			"than",

+  "taken",

 

-			"that",

+  "than",

 

-			"the",

+  "that",

 

-			"their",

+  "the",

 

-			"them",

+  "their",

 

-			"then",

+  "them",

 

-			"there",

+  "then",

 

-			"therefore",

+  "there",

 

-			"these",

+  "therefore",

 

-			"they",

+  "these",

 

-			"thing",

+  "they",

 

-			"things",

+  "thing",

 

-			"think",

+  "things",

 

-			"thinks",

+  "think",

 

-			"this",

+  "thinks",

 

-			"those",

+  "this",

 

-			"though",

+  "those",

 

-			"thought",

+  "though",

 

-			"thoughts",

+  "thought",

 

-			"three",

+  "thoughts",

 

-			"through",

+  "three",

 

-			"thus",

+  "through",

 

-			"to",

+  "thus",

 

-			"today",

+  "to",

 

-			"together",

+  "today",

 

-			"too",

+  "together",

 

-			"took",

+  "too",

 

-			"toward",

+  "took",

 

-			"turn",

+  "toward",

 

-			"turned",

+  "turn",

 

-			"turning",

+  "turned",

 

-			"turns",

+  "turning",

 

-			"two",

+  "turns",

 

-			"u",

+  "two",

 

-			"under",

+  "u",

 

-			"until",

+  "under",

 

-			"up",

+  "until",

 

-			"upon",

+  "up",

 

-			"us",

+  "upon",

 

-			"use",

+  "us",

 

-			"used",

+  "use",

 

-			"uses",

+  "used",

 

-			"v",

+  "uses",

 

-			"very",

+  "v",

 

-			"w",

+  "very",

 

-			"want",

+  "w",

 

-			"wanted",

+  "want",

 

-			"wanting",

+  "wanted",

 

-			"wants",

+  "wanting",

 

-			"was",

+  "wants",

 

-			"way",

+  "was",

 

-			"ways",

+  "way",

 

-			"we",

+  "ways",

 

-			"well",

+  "we",

 

-			"wells",

+  "well",

 

-			"went",

+  "wells",

 

-			"were",

+  "went",

 

-			"what",

+  "were",

 

-			"when",

+  "what",

 

-			"where",

+  "when",

 

-			"whether",

+  "where",

 

-			"which",

+  "whether",

 

-			"while",

+  "which",

 

-			"who",

+  "while",

 

-			"whole",

+  "who",

 

-			"whose",

+  "whole",

 

-			"why",

+  "whose",

 

-			"will",

+  "why",

 

-			"with",

+  "will",

 

-			"within",

+  "with",

 

-			"without",

+  "within",

 

-			"work",

+  "without",

 

-			"worked",

+  "work",

 

-			"working",

+  "worked",

 

-			"works",

+  "working",

 

-			"would",

+  "works",

 

-			"x",

+  "would",

 

-			"y",

+  "x",

 

-			"year",

+  "y",

 

-			"years",

+  "year",

 

-			"yet",

+  "years",

 

-			"you",

+  "yet",

 

-			"young",

+  "you",

 

-			"younger",

+  "young",

 

-			"youngest",

+  "younger",

 

-			"your",

+  "youngest",

 

-			"yours",

+  "your",

 

-			"z" };

+  "yours",

 

-	

+  "z" };

 

 }


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
index 1056d2f..4e8c195 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java

@@ -51,7 +51,8 @@
 import au.com.bytecode.opencsv.CSVWriter;

 

 public class ParserCacheSerializer {

-  private static Logger LOG = Logger.getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserCacheSerializer");

+  private static Logger LOG = Logger

+      .getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserCacheSerializer");

   private static boolean javaObjectSerialization = false;

   private static String RESOURCE_DIR = "src/test/resources/";

   public static String parseCacheFileName = "sentence_parseObject.dat";

@@ -115,8 +116,8 @@
       List<String[]> lines = null;

 

       try {

-        reader = new CSVReader(

-            new FileReader(RESOURCE_DIR + parseCacheFileNameCSV), ',');

+        reader = new CSVReader(new FileReader(RESOURCE_DIR

+            + parseCacheFileNameCSV), ',');

         lines = reader.readAll();

       } catch (FileNotFoundException e) {

         e.printStackTrace();


diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
index d14e785..4b18cd0 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java

@@ -1,19 +1,4 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -68,731 +53,728 @@
 import opennlp.tools.tokenize.TokenizerModel;
 import opennlp.tools.util.Span;
 
-
 public class ParserChunker2MatcherProcessor {
-	protected static final int MIN_SENTENCE_LENGTH = 10;
-	private static final String MODEL_DIR_KEY = "nlp.models.dir";
-	// TODO config
-	// this is where resources should live
-	private static String MODEL_DIR, MODEL_DIR_REL = "src/test/resources/models111";
-	protected static ParserChunker2MatcherProcessor instance;
+  protected static final int MIN_SENTENCE_LENGTH = 10;
+  private static final String MODEL_DIR_KEY = "nlp.models.dir";
+  // TODO config
+  // this is where resources should live
+  private static String MODEL_DIR;
+  public static String MODEL_DIR_REL = "src/test/resources/models";
+  protected static ParserChunker2MatcherProcessor instance;
 
-	private SentenceDetector sentenceDetector;
-	private Tokenizer tokenizer;
-	private POSTagger posTagger;
-	private Parser parser;
-	private ChunkerME chunker;
-	private final int NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS = 5;
-	private static Logger LOG = Logger.getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor");
-	private Map<String,String[][]> sentence_parseObject = new HashMap<String,String[][]>();
+  private SentenceDetector sentenceDetector;
+  private Tokenizer tokenizer;
+  private POSTagger posTagger;
+  private Parser parser;
+  private ChunkerME chunker;
+  private final int NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS = 5;
+  private static Logger LOG = Logger
+      .getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor");
+  private Map<String, String[][]> sentence_parseObject = new HashMap<String, String[][]>();
 
-	   public SentenceDetector getSentenceDetector() {
-	        return sentenceDetector;
-	    }
+  public SentenceDetector getSentenceDetector() {
+    return sentenceDetector;
+  }
 
-	    public void setSentenceDetector(SentenceDetector sentenceDetector) {
-	        this.sentenceDetector = sentenceDetector;
-	    }
+  public void setSentenceDetector(SentenceDetector sentenceDetector) {
+    this.sentenceDetector = sentenceDetector;
+  }
 
-	    public Tokenizer getTokenizer() {
-	        return tokenizer;
-	    }
+  public Tokenizer getTokenizer() {
+    return tokenizer;
+  }
 
-	    public void setTokenizer(Tokenizer tokenizer) {
-	        this.tokenizer = tokenizer;
-	    }
+  public void setTokenizer(Tokenizer tokenizer) {
+    this.tokenizer = tokenizer;
+  }
 
-	    public ChunkerME getChunker() {
-	        return chunker;
-	    }
+  public ChunkerME getChunker() {
+    return chunker;
+  }
 
-	    public void setChunker(ChunkerME chunker) {
-	        this.chunker = chunker;
-	    }
-	@SuppressWarnings("unchecked")
-	protected ParserChunker2MatcherProcessor() {
-		try {
-			sentence_parseObject = (Map<String,String[][]>)ParserCacheSerializer.readObject();
-		} catch (Exception e) {
-			// this file might not exist initially
-			LOG.fine("parsing  cache file does not exist (but should be created)");
-			sentence_parseObject = new HashMap<String,String[][]>();
-		}
-		if (sentence_parseObject == null)
-			sentence_parseObject = new HashMap<String,String[][]>();
+  public void setChunker(ChunkerME chunker) {
+    this.chunker = chunker;
+  }
 
-		try {
-			MODEL_DIR = new File(".").getAbsolutePath().replace(".", "")+MODEL_DIR_REL;
-			initializeSentenceDetector();
-			initializeTokenizer();
-			initializePosTagger();
-			initializeParser();
-			initializeChunker();
-		} catch (Exception e) {
-			LOG.fine("model cant be read and we rely on cache");
-		}
-	}
-	
-	// closing the processor, clearing loaded ling models and serializing parsing cache 
-	public void close(){
-		instance=null;
-		ParserCacheSerializer.writeObject(sentence_parseObject);
-	}
+  @SuppressWarnings("unchecked")
+  protected ParserChunker2MatcherProcessor() {
+    try {
+      sentence_parseObject = (Map<String, String[][]>) ParserCacheSerializer
+          .readObject();
+    } catch (Exception e) {
+      // this file might not exist initially
+      LOG.fine("parsing  cache file does not exist (but should be created)");
+      sentence_parseObject = new HashMap<String, String[][]>();
+    }
+    if (sentence_parseObject == null)
+      sentence_parseObject = new HashMap<String, String[][]>();
 
+    try {
+      MODEL_DIR = new File(".").getAbsolutePath().replace(".", "")
+          + MODEL_DIR_REL;
+      initializeSentenceDetector();
+      initializeTokenizer();
+      initializePosTagger();
+      initializeParser();
+      initializeChunker();
+    } catch (Exception e) {
+      LOG.fine("model cant be read and we rely on cache");
+    }
+  }
 
-	/**
-	 * singleton method of instantiating the processor
-	 * @return the instance
-	 */
-	public synchronized static ParserChunker2MatcherProcessor getInstance() {
-		if (instance == null)
-			instance = new ParserChunker2MatcherProcessor();
+  // closing the processor, clearing loaded ling models and serializing parsing
+  // cache
+  public void close() {
+    instance = null;
+    ParserCacheSerializer.writeObject(sentence_parseObject);
+  }
 
-		return instance;
-	}
+  /**
+   * singleton method of instantiating the processor
+   * 
+   * @return the instance
+   */
+  public synchronized static ParserChunker2MatcherProcessor getInstance() {
+    if (instance == null)
+      instance = new ParserChunker2MatcherProcessor();
 
-	/**
-	 * General parsing function, which returns lists of parses for a portion of text
-	 * @param text to be parsed
-	 * @return lists of parses
-	 */
-	public List<List<Parse>> parseTextNlp(String text) {
-		if (text == null || text.trim().length() == 0)
-			return null;
+    return instance;
+  }
 
-		List<List<Parse>> textParses = new ArrayList<List<Parse>>(1);
+  /**
+   * General parsing function, which returns lists of parses for a portion of
+   * text
+   * 
+   * @param text
+   *          to be parsed
+   * @return lists of parses
+   */
+  public List<List<Parse>> parseTextNlp(String text) {
+    if (text == null || text.trim().length() == 0)
+      return null;
 
-		// parse paragraph by paragraph
-		String[] paragraphList = splitParagraph(text);
-		for (String paragraph : paragraphList) {
-			if (paragraph.length() == 0)
-				continue;
+    List<List<Parse>> textParses = new ArrayList<List<Parse>>(1);
 
-			List<Parse> paragraphParses = parseParagraphNlp(paragraph);
-			if (paragraphParses != null)
-				textParses.add(paragraphParses);
-		}
+    // parse paragraph by paragraph
+    String[] paragraphList = splitParagraph(text);
+    for (String paragraph : paragraphList) {
+      if (paragraph.length() == 0)
+        continue;
 
-		return textParses;
-	}
+      List<Parse> paragraphParses = parseParagraphNlp(paragraph);
+      if (paragraphParses != null)
+        textParses.add(paragraphParses);
+    }
 
-	public List<Parse> parseParagraphNlp(String paragraph) {
-		if (paragraph == null || paragraph.trim().length() == 0)
-			return null;
+    return textParses;
+  }
 
-		// normalize the text before parsing, otherwise, the sentences may not
-		// be
-		// separated correctly
+  public List<Parse> parseParagraphNlp(String paragraph) {
+    if (paragraph == null || paragraph.trim().length() == 0)
+      return null;
 
-		//paragraph = TextNormalizer.normalizeText(paragraph);
+    // normalize the text before parsing, otherwise, the sentences may not
+    // be
+    // separated correctly
 
-		// parse sentence by sentence
-		String[] sentences = splitSentences(paragraph);
-		List<Parse> parseList = new ArrayList<Parse>(sentences.length);
-		for (String sentence : sentences) {
-			sentence = sentence.trim();
-			if (sentence.length() == 0)
-				continue;
+    // paragraph = TextNormalizer.normalizeText(paragraph);
 
-			Parse sentenceParse = parseSentenceNlp(sentence, false);
-			if (sentenceParse != null)
-				parseList.add(sentenceParse);
-		}
+    // parse sentence by sentence
+    String[] sentences = splitSentences(paragraph);
+    List<Parse> parseList = new ArrayList<Parse>(sentences.length);
+    for (String sentence : sentences) {
+      sentence = sentence.trim();
+      if (sentence.length() == 0)
+        continue;
 
-		return parseList;
-	}
+      Parse sentenceParse = parseSentenceNlp(sentence, false);
+      if (sentenceParse != null)
+        parseList.add(sentenceParse);
+    }
 
-	public Parse parseSentenceNlp(String sentence) {
-		// if we parse an individual sentence, we want to normalize the text
-		// before parsing
-		return parseSentenceNlp(sentence, true);
-	}
+    return parseList;
+  }
 
-	public synchronized Parse parseSentenceNlp(String sentence,
-			boolean normalizeText) {
-		// don't try to parse very short sentence, not much info in it anyway,
-		// most likely a heading
-		if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH)
-			return null;
+  public Parse parseSentenceNlp(String sentence) {
+    // if we parse an individual sentence, we want to normalize the text
+    // before parsing
+    return parseSentenceNlp(sentence, true);
+  }
 
-		//if (normalizeText)
-		//	sentence = TextNormalizer.normalizeText(sentence);
+  public synchronized Parse parseSentenceNlp(String sentence,
+      boolean normalizeText) {
+    // don't try to parse very short sentence, not much info in it anyway,
+    // most likely a heading
+    if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH)
+      return null;
 
-		Parse[] parseArray = null;
-		try {
-			parseArray = ParserTool.parseLine(sentence, parser, 1);
-		} catch (Throwable t) {
-			LOG.log(Level.WARNING, "failed to parse the sentence : '"+sentence, t);
-			return null;
-		}
-		
-		//	Parse[] chunks = ChunkSampleStream.getInitialChunks(parseArray[0]) ;
+    // if (normalizeText)
+    // sentence = TextNormalizer.normalizeText(sentence);
 
-		// there should be only one result parse
-		if (parseArray != null && parseArray.length > 0)
-			return parseArray[0];
-		else
-			return null;
-	}
+    Parse[] parseArray = null;
+    try {
+      parseArray = ParserTool.parseLine(sentence, parser, 1);
+    } catch (Throwable t) {
+      LOG.log(Level.WARNING, "failed to parse the sentence : '" + sentence, t);
+      return null;
+    }
 
-	/**
-	 * 
-	 * @param para input text string which is assumed to be a paragraph and is split into sentences
-	 * @return a list of lists of phrases with their POS tags for each phrase type (noun, verb etc.)
-	 */
+    // Parse[] chunks = ChunkSampleStream.getInitialChunks(parseArray[0]) ;
 
-	public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(String para){
-		List<List<ParseTreeChunk>> listOfChunksAccum = new ArrayList<List<ParseTreeChunk>>();
-		String[] sentences = splitSentences(para);
-		for(String sent: sentences){
-			List<List<ParseTreeChunk>> singleSentChunks = formGroupedPhrasesFromChunksForSentence(sent); 
-			if (singleSentChunks==null)
-				continue;
-			if (listOfChunksAccum.size()<1 ){
-				listOfChunksAccum = new ArrayList<List<ParseTreeChunk>>(singleSentChunks);
-			} else 
-				for(int i= 0; i<NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS; i++){
-					//make sure not null
-					if (singleSentChunks == null || singleSentChunks.size()!=NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS)
-						break;
-					List<ParseTreeChunk> phraseI = singleSentChunks.get(i);
-					List<ParseTreeChunk> phraseIaccum  = listOfChunksAccum.get(i);
-					phraseIaccum.addAll(phraseI);
-					listOfChunksAccum.set(i, phraseIaccum);
-				}
-		}
-		return listOfChunksAccum;
-	}
-	
-	String[][] parseChunkSentence(String sentenceInp){
-		String[][] resToksTags = sentence_parseObject.get(sentenceInp);
-		if ( resToksTags!=null)
-			return resToksTags;
-		if(tokenizer == null)
-			return null;
-		
-		String sentence = TextProcessor.removePunctuation(sentenceInp);
-	
-		String[] toks = tokenizer.tokenize(sentence);
-		String[] tags = new String[toks.length]; //posTagger.tag(toks);
-		SentenceNode node  = parseSentenceNode(sentence);
-		if (node==null){
-			LOG.info("Problem parsing sentence '"+sentence);
-			return null;
-		}
-		List<String> POSlist = node.getOrderedPOSList();
-		
-		tags = POSlist.toArray(new String[0]);
-		if (toks.length != tags.length){
-			LOG.finest("disagreement between toks and tags; sent =  '"+sentence + "'\n tags = "+tags + 
-					"\n will now try this sentence in lower case" );
-			node  = parseSentenceNode(sentence.toLowerCase());
-			if (node==null){
-				LOG.finest("Problem parsing sentence '"+sentence);
-				return null;
-			}
-			POSlist = node.getOrderedPOSList();
-			tags = POSlist.toArray(new String[0]);
-			if (toks.length != tags.length){
-				LOG.finest("AGAIN: disagreement between toks and tags for lower case! ");
-				if (toks.length>tags.length){
-					String[] newToks = new String[tags.length];
-					for(int i = 0; i<tags.length; i++ ){
-						newToks[i] = toks[i];
-					}
-					toks = newToks;
-					
-				} else
-					return null;
-			}
-		} 
-		
-		String[] res = chunker.chunk(toks, tags);
-		String[][] resTagToks = new String[][] { res, tags, toks};
-		sentence_parseObject.put(sentenceInp,  resTagToks);		
-		return resTagToks;
-	}
-	
-	
+    // there should be only one result parse
+    if (parseArray != null && parseArray.length > 0)
+      return parseArray[0];
+    else
+      return null;
+  }
 
-	/**
-	 * 
-	 * @param para input text string which is assumed to be a sentence
-	 * @return a list of lists of phrases with their POS tags for each phrase type (noun, verb etc.)
-	 */
-	public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForSentence(String sentence) {
-		if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH)
-			return null;
-   /*  
-		sentence = TextProcessor.removePunctuation(sentence);
+  /**
+   * 
+   * @param para
+   *          input text string which is assumed to be a paragraph and is split
+   *          into sentences
+   * @return a list of lists of phrases with their POS tags for each phrase type
+   *         (noun, verb etc.)
+   */
 
-		String[] toks = tokenizer.tokenize(sentence);
-		String[] tags = new String[toks.length]; //posTagger.tag(toks);
-		SentenceNode node  = parseSentenceNode(sentence);
-		if (node==null){
-			LOG.info("Problem parsing sentence '"+sentence);
-			return null;
-		}
-		List<String> POSlist = node.getOrderedPOSList();
-		
-		tags = POSlist.toArray(new String[0]);
-		if (toks.length != tags.length){
-			LOG.info("disagreement between toks and tags; sent =  '"+sentence + "'\n tags = "+tags + 
-					"\n will now try this sentence in lower case" );
-			node  = parseSentenceNode(sentence.toLowerCase());
-			if (node==null){
-				LOG.info("Problem parsing sentence '"+sentence);
-				return null;
-			}
-			POSlist = node.getOrderedPOSList();
-			tags = POSlist.toArray(new String[0]);
-			if (toks.length != tags.length){
-				LOG.info("AGAIN: disagreement between toks and tags for lower case! ");
-				if (toks.length>tags.length){
-					String[] newToks = new String[tags.length];
-					for(int i = 0; i<tags.length; i++ ){
-						newToks[i] = toks[i];
-					}
-					toks = newToks;
-					
-				} else
-					return null;
-			}
-		} 
-	*/	
-		String[][] resTagToks = parseChunkSentence(sentence);
-		if (resTagToks == null )
-			return null;
-		String[] res = resTagToks[0];
-		String[] tags = resTagToks[1];
-		String[] toks = resTagToks[2];
-		
-	//	String[] res = chunker.chunk(toks, tags);
-	
-		List<List<ParseTreeChunk>> listOfChunks = new ArrayList<List<ParseTreeChunk>>();
-		List<ParseTreeChunk> nounPhr = new ArrayList<ParseTreeChunk>(), 
-		prepPhr = new ArrayList<ParseTreeChunk>(), verbPhr  = new ArrayList<ParseTreeChunk>(), 
-		adjPhr  = new ArrayList<ParseTreeChunk>(), 
-		// to store the whole sentence
-		wholeSentence = new ArrayList<ParseTreeChunk>();
-		List<String> pOSsAll = new ArrayList<String>(), lemmasAll = new ArrayList<String>();
+  public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(
+      String para) {
+    List<List<ParseTreeChunk>> listOfChunksAccum = new ArrayList<List<ParseTreeChunk>>();
+    String[] sentences = splitSentences(para);
+    for (String sent : sentences) {
+      List<List<ParseTreeChunk>> singleSentChunks = formGroupedPhrasesFromChunksForSentence(sent);
+      if (singleSentChunks == null)
+        continue;
+      if (listOfChunksAccum.size() < 1) {
+        listOfChunksAccum = new ArrayList<List<ParseTreeChunk>>(
+            singleSentChunks);
+      } else
+        for (int i = 0; i < NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS; i++) {
+          // make sure not null
+          if (singleSentChunks == null
+              || singleSentChunks.size() != NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS)
+            break;
+          List<ParseTreeChunk> phraseI = singleSentChunks.get(i);
+          List<ParseTreeChunk> phraseIaccum = listOfChunksAccum.get(i);
+          phraseIaccum.addAll(phraseI);
+          listOfChunksAccum.set(i, phraseIaccum);
+        }
+    }
+    return listOfChunksAccum;
+  }
 
-		for(int i = 0; i< toks.length; i++){
-			pOSsAll.add(tags[i]);
-			lemmasAll.add(toks[i]);
-		}
-		wholeSentence.add(new ParseTreeChunk("SENTENCE", lemmasAll, pOSsAll));
+  String[][] parseChunkSentence(String sentenceInp) {
+    String[][] resToksTags = sentence_parseObject.get(sentenceInp);
+    if (resToksTags != null)
+      return resToksTags;
+    if (tokenizer == null)
+      return null;
 
-		boolean currPhraseClosed = false;
-		for(int i=0; i< res.length; i++){
-			String bi_POS = res[i];
-			currPhraseClosed = false;
-			if (bi_POS.startsWith("B-NP")){// beginning of a phrase
+    String sentence = TextProcessor.removePunctuation(sentenceInp);
 
-				List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
-				pOSs.add(tags[i]);
-				lemmas.add(toks[i]);
-				for(int j=i+1; j<res.length; j++){
-					if (res[j].startsWith("B-VP")){
-						nounPhr.add(new ParseTreeChunk("NP", lemmas, pOSs));
-						//LOG.info(i + " => " +lemmas);
-						currPhraseClosed = true;
-						break;
-					} else {
-						pOSs.add(tags[j]);
-						lemmas.add(toks[j]);
-					}
-				}
-				if (!currPhraseClosed){
-					nounPhr.add(new ParseTreeChunk("NP", lemmas, pOSs));
-					//LOG.fine(i + " => " + lemmas);
-				}
+    String[] toks = tokenizer.tokenize(sentence);
+    String[] tags = new String[toks.length]; // posTagger.tag(toks);
+    SentenceNode node = parseSentenceNode(sentence);
+    if (node == null) {
+      LOG.info("Problem parsing sentence '" + sentence);
+      return null;
+    }
+    List<String> POSlist = node.getOrderedPOSList();
 
-			} else if (bi_POS.startsWith("B-PP")){// beginning of a phrase
-				List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
-				pOSs.add(tags[i]);
-				lemmas.add(toks[i]);
+    tags = POSlist.toArray(new String[0]);
+    if (toks.length != tags.length) {
+      LOG.finest("disagreement between toks and tags; sent =  '" + sentence
+          + "'\n tags = " + tags
+          + "\n will now try this sentence in lower case");
+      node = parseSentenceNode(sentence.toLowerCase());
+      if (node == null) {
+        LOG.finest("Problem parsing sentence '" + sentence);
+        return null;
+      }
+      POSlist = node.getOrderedPOSList();
+      tags = POSlist.toArray(new String[0]);
+      if (toks.length != tags.length) {
+        LOG.finest("AGAIN: disagreement between toks and tags for lower case! ");
+        if (toks.length > tags.length) {
+          String[] newToks = new String[tags.length];
+          for (int i = 0; i < tags.length; i++) {
+            newToks[i] = toks[i];
+          }
+          toks = newToks;
+
+        } else
+          return null;
+      }
+    }
+
+    String[] res = chunker.chunk(toks, tags);
+    String[][] resTagToks = new String[][] { res, tags, toks };
+    sentence_parseObject.put(sentenceInp, resTagToks);
+    return resTagToks;
+  }
+
+  /**
+   * 
+   * @param para
+   *          input text string which is assumed to be a sentence
+   * @return a list of lists of phrases with their POS tags for each phrase type
+   *         (noun, verb etc.)
+   */
+  public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForSentence(
+      String sentence) {
+    if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH)
+      return null;
+    /*
+     * sentence = TextProcessor.removePunctuation(sentence);
+     * 
+     * String[] toks = tokenizer.tokenize(sentence); String[] tags = new
+     * String[toks.length]; //posTagger.tag(toks); SentenceNode node =
+     * parseSentenceNode(sentence); if (node==null){
+     * LOG.info("Problem parsing sentence '"+sentence); return null; }
+     * List<String> POSlist = node.getOrderedPOSList();
+     * 
+     * tags = POSlist.toArray(new String[0]); if (toks.length != tags.length){
+     * LOG.info("disagreement between toks and tags; sent =  '"+sentence +
+     * "'\n tags = "+tags + "\n will now try this sentence in lower case" );
+     * node = parseSentenceNode(sentence.toLowerCase()); if (node==null){
+     * LOG.info("Problem parsing sentence '"+sentence); return null; } POSlist =
+     * node.getOrderedPOSList(); tags = POSlist.toArray(new String[0]); if
+     * (toks.length != tags.length){
+     * LOG.info("AGAIN: disagreement between toks and tags for lower case! ");
+     * if (toks.length>tags.length){ String[] newToks = new String[tags.length];
+     * for(int i = 0; i<tags.length; i++ ){ newToks[i] = toks[i]; } toks =
+     * newToks;
+     * 
+     * } else return null; } }
+     */
+    String[][] resTagToks = parseChunkSentence(sentence);
+    if (resTagToks == null)
+      return null;
+    String[] res = resTagToks[0];
+    String[] tags = resTagToks[1];
+    String[] toks = resTagToks[2];
+
+    // String[] res = chunker.chunk(toks, tags);
+
+    List<List<ParseTreeChunk>> listOfChunks = new ArrayList<List<ParseTreeChunk>>();
+    List<ParseTreeChunk> nounPhr = new ArrayList<ParseTreeChunk>(), prepPhr = new ArrayList<ParseTreeChunk>(), verbPhr = new ArrayList<ParseTreeChunk>(), adjPhr = new ArrayList<ParseTreeChunk>(),
+    // to store the whole sentence
+    wholeSentence = new ArrayList<ParseTreeChunk>();
+    List<String> pOSsAll = new ArrayList<String>(), lemmasAll = new ArrayList<String>();
+
+    for (int i = 0; i < toks.length; i++) {
+      pOSsAll.add(tags[i]);
+      lemmasAll.add(toks[i]);
+    }
+    wholeSentence.add(new ParseTreeChunk("SENTENCE", lemmasAll, pOSsAll));
+
+    boolean currPhraseClosed = false;
+    for (int i = 0; i < res.length; i++) {
+      String bi_POS = res[i];
+      currPhraseClosed = false;
+      if (bi_POS.startsWith("B-NP")) {// beginning of a phrase
+
+        List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
+        pOSs.add(tags[i]);
+        lemmas.add(toks[i]);
+        for (int j = i + 1; j < res.length; j++) {
+          if (res[j].startsWith("B-VP")) {
+            nounPhr.add(new ParseTreeChunk("NP", lemmas, pOSs));
+            // LOG.info(i + " => " +lemmas);
+            currPhraseClosed = true;
+            break;
+          } else {
+            pOSs.add(tags[j]);
+            lemmas.add(toks[j]);
+          }
+        }
+        if (!currPhraseClosed) {
+          nounPhr.add(new ParseTreeChunk("NP", lemmas, pOSs));
+          // LOG.fine(i + " => " + lemmas);
+        }
+
+      } else if (bi_POS.startsWith("B-PP")) {// beginning of a phrase
+        List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
+        pOSs.add(tags[i]);
+        lemmas.add(toks[i]);
+
+        for (int j = i + 1; j < res.length; j++) {
+          if (res[j].startsWith("B-VP")) {
+            prepPhr.add(new ParseTreeChunk("PP", lemmas, pOSs));
+            // LOG.fine(i + " => " + lemmas);
+            currPhraseClosed = true;
+            break;
+          } else {
+            pOSs.add(tags[j]);
+            lemmas.add(toks[j]);
+          }
+        }
+        if (!currPhraseClosed) {
+          prepPhr.add(new ParseTreeChunk("PP", lemmas, pOSs));
+          // LOG.fine(i + " => " + lemmas);
+        }
+      } else if (bi_POS.startsWith("B-VP")) {// beginning of a phrase
+        List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
+        pOSs.add(tags[i]);
+        lemmas.add(toks[i]);
+
+        for (int j = i + 1; j < res.length; j++) {
+          if (res[j].startsWith("B-VP")) {
+            verbPhr.add(new ParseTreeChunk("VP", lemmas, pOSs));
+            // LOG.fine(i + " => " +lemmas);
+            currPhraseClosed = true;
+            break;
+          } else {
+            pOSs.add(tags[j]);
+            lemmas.add(toks[j]);
+          }
+        }
+        if (!currPhraseClosed) {
+          verbPhr.add(new ParseTreeChunk("VP", lemmas, pOSs));
+          // LOG.fine(i + " => " + lemmas);
+        }
+      } else if (bi_POS.startsWith("B-ADJP")) {// beginning of a phrase
+        List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
+        pOSs.add(tags[i]);
+        lemmas.add(toks[i]);
 
-				for(int j=i+1; j<res.length; j++){
-					if (res[j].startsWith("B-VP")){
-						prepPhr.add(new ParseTreeChunk("PP", lemmas, pOSs));
-						//LOG.fine(i + " => " + lemmas);
-						currPhraseClosed = true;
-						break;
-					} else {
-						pOSs.add(tags[j]);
-						lemmas.add(toks[j]);
-					}
-				}
-				if (!currPhraseClosed){
-					prepPhr.add(new ParseTreeChunk("PP", lemmas, pOSs));
-					//LOG.fine(i + " => " + lemmas);
-				}
-			} else
-				if (bi_POS.startsWith("B-VP")){// beginning of a phrase
-					List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
-					pOSs.add(tags[i]);
-					lemmas.add(toks[i]);
+        for (int j = i + 1; j < res.length; j++) {
+          if (res[j].startsWith("B-VP")) {
+            adjPhr.add(new ParseTreeChunk("ADJP", lemmas, pOSs));
+            // LOG.fine(i + " => " +lemmas);
+            currPhraseClosed = true;
+            break;
+          } else {
+            pOSs.add(tags[j]);
+            lemmas.add(toks[j]);
+          }
+        }
+        if (!currPhraseClosed) {
+          adjPhr.add(new ParseTreeChunk("ADJP", lemmas, pOSs));
+          // LOG.fine(i + " => " + lemmas);
+        }
+      }
+    }
+    listOfChunks.add(nounPhr);
+    listOfChunks.add(verbPhr);
+    listOfChunks.add(prepPhr);
+    listOfChunks.add(adjPhr);
+    listOfChunks.add(wholeSentence);
 
-					for(int j=i+1; j<res.length; j++){
-						if (res[j].startsWith("B-VP")){
-							verbPhr.add(new ParseTreeChunk("VP", lemmas, pOSs));
-							//LOG.fine(i + " => " +lemmas);
-							currPhraseClosed = true;
-							break;
-						} else {
-							pOSs.add(tags[j]);
-							lemmas.add(toks[j]);
-						}
-					}
-					if (!currPhraseClosed){
-						verbPhr.add(new ParseTreeChunk("VP", lemmas, pOSs));
-						//LOG.fine(i + " => " + lemmas);
-					}
-				} else
-					if (bi_POS.startsWith("B-ADJP") ){// beginning of a phrase
-						List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>();
-						pOSs.add(tags[i]);
-						lemmas.add(toks[i]);
+    return listOfChunks;
+  }
 
-						for(int j=i+1; j<res.length; j++){
-							if (res[j].startsWith("B-VP")){
-								adjPhr.add(new ParseTreeChunk("ADJP", lemmas, pOSs));
-								//LOG.fine(i + " => " +lemmas);
-								currPhraseClosed = true;
-								break;
-							} else {
-								pOSs.add(tags[j]);
-								lemmas.add(toks[j]);
-							}
-						}
-						if (!currPhraseClosed){
-							adjPhr.add(new ParseTreeChunk("ADJP", lemmas, pOSs));
-							//LOG.fine(i + " => " + lemmas);
-						}
-					}
-		}
-		listOfChunks.add(nounPhr);
-		listOfChunks.add(verbPhr);
-		listOfChunks.add(prepPhr);
-		listOfChunks.add(adjPhr);
-		listOfChunks.add(wholeSentence);
+  public static List<List<SentenceNode>> textToSentenceNodes(
+      List<List<Parse>> textParses) {
+    if (textParses == null || textParses.size() == 0)
+      return null;
 
-		return listOfChunks;
-	}
+    List<List<SentenceNode>> textNodes = new ArrayList<List<SentenceNode>>(
+        textParses.size());
+    for (List<Parse> paragraphParses : textParses) {
+      List<SentenceNode> paragraphNodes = paragraphToSentenceNodes(paragraphParses);
 
-	public static List<List<SentenceNode>> textToSentenceNodes(
-			List<List<Parse>> textParses) {
-		if (textParses == null || textParses.size() == 0)
-			return null;
+      // append paragraph node if any
+      if (paragraphNodes != null && paragraphNodes.size() > 0)
+        textNodes.add(paragraphNodes);
+    }
 
-		List<List<SentenceNode>> textNodes = new ArrayList<List<SentenceNode>>(
-				textParses.size());
-		for (List<Parse> paragraphParses : textParses) {
-			List<SentenceNode> paragraphNodes = paragraphToSentenceNodes(paragraphParses);
+    if (textNodes.size() > 0)
+      return textNodes;
+    else
+      return null;
+  }
 
-			// append paragraph node if any
-			if (paragraphNodes != null && paragraphNodes.size() > 0)
-				textNodes.add(paragraphNodes);
-		}
+  public static List<SentenceNode> paragraphToSentenceNodes(
+      List<Parse> paragraphParses) {
+    if (paragraphParses == null || paragraphParses.size() == 0)
+      return null;
 
-		if (textNodes.size() > 0)
-			return textNodes;
-		else
-			return null;
-	}
+    List<SentenceNode> paragraphNodes = new ArrayList<SentenceNode>(
+        paragraphParses.size());
+    for (Parse sentenceParse : paragraphParses) {
+      SentenceNode sentenceNode = null;
+      try {
+        sentenceNode = sentenceToSentenceNode(sentenceParse);
+      } catch (Exception e) {
+        // don't fail the whole paragraph when a single sentence fails
+        LOG.severe("Failed to convert sentence to node. error: " + e);
+        sentenceNode = null;
+      }
 
-	public static List<SentenceNode> paragraphToSentenceNodes(
-			List<Parse> paragraphParses) {
-		if (paragraphParses == null || paragraphParses.size() == 0)
-			return null;
+      if (sentenceNode != null)
+        paragraphNodes.add(sentenceNode);
+    }
 
-		List<SentenceNode> paragraphNodes = new ArrayList<SentenceNode>(
-				paragraphParses.size());
-		for (Parse sentenceParse : paragraphParses) {
-			SentenceNode sentenceNode = null;
-			try {
-				sentenceNode = sentenceToSentenceNode(sentenceParse);
-			} catch (Exception e) {
-				// don't fail the whole paragraph when a single sentence fails
-				LOG.severe("Failed to convert sentence to node. error: " + e);
-				sentenceNode = null;
-			}
+    if (paragraphNodes.size() > 0)
+      return paragraphNodes;
+    else
+      return null;
+  }
 
-			if (sentenceNode != null)
-				paragraphNodes.add(sentenceNode);
-		}
+  public static SentenceNode sentenceToSentenceNode(Parse sentenceParse) {
+    if (sentenceParse == null)
+      return null;
 
-		if (paragraphNodes.size() > 0)
-			return paragraphNodes;
-		else
-			return null;
-	}
+    // convert the OpenNLP Parse to our own tree nodes
+    SyntacticTreeNode node = toSyntacticTreeNode(sentenceParse);
+    if ((node == null))
+      return null;
+    if (node instanceof SentenceNode)
+      return (SentenceNode) node;
+    else if (node instanceof PhraseNode) {
+      SentenceNode sn = new SentenceNode("sentence", node.getChildren());
+      return sn;
+    } else
+      return null;
+  }
 
-	public static SentenceNode sentenceToSentenceNode(Parse sentenceParse) {
-		if (sentenceParse == null)
-			return null;
+  public List<List<SentenceNode>> parseTextNode(String text) {
+    List<List<Parse>> textParseList = parseTextNlp(text);
+    return textToSentenceNodes(textParseList);
+  }
 
-		// convert the OpenNLP Parse to our own tree nodes
-		SyntacticTreeNode node = toSyntacticTreeNode(sentenceParse);
-		if ((node == null)) 			
-			return null;
-		if (node instanceof SentenceNode)
-			return (SentenceNode)node;
-		else if (node instanceof PhraseNode){
-			SentenceNode sn = new SentenceNode("sentence", node.getChildren()) ;
-			return sn;
-		} else return null;
-	}
+  public List<SentenceNode> parseParagraphNode(String paragraph) {
+    List<Parse> paragraphParseList = parseParagraphNlp(paragraph);
+    return paragraphToSentenceNodes(paragraphParseList);
+  }
 
-	public List<List<SentenceNode>> parseTextNode(String text) {
-		List<List<Parse>> textParseList = parseTextNlp(text);
-		return textToSentenceNodes(textParseList);
-	}
+  public SentenceNode parseSentenceNode(String sentence) {
+    return parseSentenceNode(sentence, true);
+  }
 
-	public List<SentenceNode> parseParagraphNode(String paragraph) {
-		List<Parse> paragraphParseList = parseParagraphNlp(paragraph);
-		return paragraphToSentenceNodes(paragraphParseList);
-	}
+  public synchronized SentenceNode parseSentenceNode(String sentence,
+      boolean normalizeText) {
+    Parse sentenceParse = parseSentenceNlp(sentence, normalizeText);
+    return sentenceToSentenceNode(sentenceParse);
+  }
 
-	public SentenceNode parseSentenceNode(String sentence) {
-		return parseSentenceNode(sentence, true);
-	}
+  public String[] splitParagraph(String text) {
+    String[] res = text.split("\n");
+    if (res == null || res.length <= 1)
+      return new String[] { text };
+    else
+      return res;
 
-	public synchronized SentenceNode parseSentenceNode(String sentence,
-			boolean normalizeText) {
-		Parse sentenceParse = parseSentenceNlp(sentence, normalizeText);
-		return sentenceToSentenceNode(sentenceParse);
-	}
+  }
 
-	public String[] splitParagraph(String text) {
-		String[] res = text.split("\n");
-		if (res == null || res.length<=1)
-			return new String[] {text};
-		else 
-			return res;
+  public String[] splitSentences(String text) {
+    if (text == null)
+      return null;
+    // if (sentenceDetector!=null)
+    // return sentenceDetector.sentDetect(text);
+    else {
+      List<String> sents = TextProcessor.splitToSentences(text);
+      return sents.toArray(new String[0]);
+    }
+  }
 
-	}
+  public String[] tokenizeSentence(String sentence) {
+    if (sentence == null)
+      return null;
 
-	public String[] splitSentences(String text) {
-		if (text == null)
-			return null;
-	//	if (sentenceDetector!=null)
-	//		return sentenceDetector.sentDetect(text);
-		else 
-		{
-			List<String> sents = TextProcessor.splitToSentences(text);
-			return sents.toArray(new String[0]);
-		}
-	}
+    return tokenizer.tokenize(sentence);
+  }
 
-	public String[] tokenizeSentence(String sentence) {
-		if (sentence == null)
-			return null;
+  protected void initializeSentenceDetector() {
+    InputStream is = null;
+    try {
+      is = new FileInputStream(MODEL_DIR + "/en-sent.bin"
 
-		return tokenizer.tokenize(sentence);
-	}
+      );
+      SentenceModel model = new SentenceModel(is);
+      sentenceDetector = new SentenceDetectorME(model);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } finally {
+      if (is != null) {
+        try {
+          is.close();
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+    }
+  }
 
-	protected void initializeSentenceDetector() {
-		InputStream is = null;
-		try {
-			is = new FileInputStream(
-					MODEL_DIR + "/en-sent.bin"
+  protected void initializeTokenizer() {
+    InputStream is = null;
+    try {
+      is = new FileInputStream(MODEL_DIR + "/en-token.bin");
+      TokenizerModel model = new TokenizerModel(is);
+      tokenizer = new TokenizerME(model);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } finally {
+      if (is != null) {
+        try {
+          is.close();
+        } catch (IOException e) {
+        }
+      }
+    }
+  }
 
-			);
-			SentenceModel model = new SentenceModel(is);
-			sentenceDetector = new SentenceDetectorME(model);
-		} catch (IOException e) {
-			e.printStackTrace();
-		} finally {
-			if (is != null) {
-				try {
-					is.close();
-				} catch (IOException e) {
-					e.printStackTrace();
-				}
-			}
-		}
-	}
+  protected void initializePosTagger() {
+    InputStream is = null;
+    try {
+      is = new FileInputStream(MODEL_DIR + "/en-pos-maxent.bin");
+      POSModel model = new POSModel(is);
+      posTagger = new POSTaggerME(model);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } finally {
+      if (is != null) {
+        try {
+          is.close();
+        } catch (IOException e) {
+        }
+      }
+    }
+  }
 
-	protected void initializeTokenizer() {
-		InputStream is = null;
-		try {
-			is = new FileInputStream(
-					MODEL_DIR+ "/en-token.bin"
-			);
-			TokenizerModel model = new TokenizerModel(is);
-			tokenizer = new TokenizerME(model);
-		} catch (IOException e) {
-			e.printStackTrace();
-		} finally {
-			if (is != null) {
-				try {
-					is.close();
-				} catch (IOException e) {
-				}
-			}
-		}
-	}
+  protected void initializeParser() {
+    InputStream is = null;
+    try {
+      is = new FileInputStream(MODEL_DIR + "/en-parser-chunking.bin");
+      ParserModel model = new ParserModel(is);
+      parser = ParserFactory.create(model);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } finally {
+      if (is != null) {
+        try {
+          is.close();
+        } catch (IOException e) {
+        }
+      }
+    }
+  }
 
-	protected void initializePosTagger() {
-		InputStream is = null;
-		try {
-			is = new FileInputStream(MODEL_DIR
-					+ "/en-pos-maxent.bin");
-			POSModel model = new POSModel(is);
-			posTagger = new POSTaggerME(model);
-		} catch (IOException e) {
-			e.printStackTrace();
-		} finally {
-			if (is != null) {
-				try {
-					is.close();
-				} catch (IOException e) {
-				}
-			}
-		}
-	}
+  private void initializeChunker() {
+    InputStream is = null;
+    try {
+      is = new FileInputStream(MODEL_DIR + "/en-chunker.bin");
+      ChunkerModel model = new ChunkerModel(is);
+      chunker = new ChunkerME(model);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } finally {
+      if (is != null) {
+        try {
+          is.close();
+        } catch (IOException e) {
+        }
+      }
+    }
+  }
 
-	protected void initializeParser() {
-		InputStream is = null;
-		try {
-			is = new FileInputStream(MODEL_DIR
-					+ "/en-parser-chunking.bin");
-			ParserModel model = new ParserModel(is);
-			parser = ParserFactory.create(model);
-		} catch (IOException e) {
-			e.printStackTrace();
-		} finally {
-			if (is != null) {
-				try {
-					is.close();
-				} catch (IOException e) {
-				}
-			}
-		}
-	}
+  /**
+   * convert an instance of Parse to SyntacticTreeNode, by filtering out the
+   * unnecessary data and assigning the word for each node
+   * 
+   * @param parse
+   */
+  private static SyntacticTreeNode toSyntacticTreeNode(Parse parse) {
+    if (parse == null)
+      return null;
 
-	private void initializeChunker() {
-		InputStream is = null;
-		try {
-			is = new FileInputStream(MODEL_DIR
-					+ "/en-chunker.bin");
-			ChunkerModel model = new ChunkerModel(is);
-			chunker = new ChunkerME(model);
-		} catch (IOException e) {
-			e.printStackTrace();
-		} finally {
-			if (is != null) {
-				try {
-					is.close();
-				} catch (IOException e) {
-				}
-			}
-		}
-	}
+    // check for junk types
+    String type = parse.getType();
+    if (SyntacticTreeNode.isJunkType(type, parse))
+      return null;
 
-	/**
-	 * convert an instance of Parse to SyntacticTreeNode, by filtering out the
-	 * unnecessary data and assigning the word for each node
-	 * 
-	 * @param parse
-	 */
-	private static SyntacticTreeNode toSyntacticTreeNode(Parse parse) {
-		if (parse == null)
-			return null;
+    String text = parse.getText();
+    ArrayList<SyntacticTreeNode> childrenNodeList = convertChildrenNodes(parse);
 
-		// check for junk types
-		String type = parse.getType();
-		if (SyntacticTreeNode.isJunkType(type, parse) )
-			return null;
+    // check sentence node, the node contained in the top node
+    if (type.equals(AbstractBottomUpParser.TOP_NODE)
+        && childrenNodeList != null && childrenNodeList.size() > 0) {
+      PhraseNode rootNode = (PhraseNode) childrenNodeList.get(0);
+      return new SentenceNode(text, rootNode.getChildren());
+    }
 
-		String text = parse.getText();
-		ArrayList<SyntacticTreeNode> childrenNodeList = convertChildrenNodes(parse);
+    // if this node contains children nodes, then it is a phrase node
+    if (childrenNodeList != null && childrenNodeList.size() > 0) {
+      // System.out.println("Found "+ type + " phrase = "+ childrenNodeList);
+      return new PhraseNode(type, childrenNodeList);
 
-		// check sentence node, the node contained in the top node
-		if (type.equals(AbstractBottomUpParser.TOP_NODE)
-				&& childrenNodeList != null && childrenNodeList.size() > 0) {
-			PhraseNode rootNode = (PhraseNode) childrenNodeList.get(0);
-			return new SentenceNode(text, rootNode.getChildren());
-		}
+    }
 
-		// if this node contains children nodes, then it is a phrase node
-		if (childrenNodeList != null && childrenNodeList.size() > 0) {
-			//System.out.println("Found "+ type + " phrase = "+ childrenNodeList);
-			return new PhraseNode(type, childrenNodeList);
-			
-		}
+    // otherwise, it is a word node
+    Span span = parse.getSpan();
+    String word = text.substring(span.getStart(), span.getEnd()).trim();
 
-		// otherwise, it is a word node
-		Span span = parse.getSpan();
-		String word = text.substring(span.getStart(), span.getEnd()).trim();
+    return new WordNode(type, word);
+  }
 
-		return new WordNode(type, word);
-	}
+  private static ArrayList<SyntacticTreeNode> convertChildrenNodes(Parse parse) {
+    if (parse == null)
+      return null;
 
-	private static ArrayList<SyntacticTreeNode> convertChildrenNodes(Parse parse) {
-		if (parse == null)
-			return null;
+    Parse[] children = parse.getChildren();
+    if (children == null || children.length == 0)
+      return null;
 
-		Parse[] children = parse.getChildren();
-		if (children == null || children.length == 0)
-			return null;
+    ArrayList<SyntacticTreeNode> childrenNodeList = new ArrayList<SyntacticTreeNode>();
+    for (Parse child : children) {
+      SyntacticTreeNode childNode = toSyntacticTreeNode(child);
+      if (childNode != null)
+        childrenNodeList.add(childNode);
+    }
 
-		ArrayList<SyntacticTreeNode> childrenNodeList = new ArrayList<SyntacticTreeNode>();
-		for (Parse child : children) {
-			SyntacticTreeNode childNode = toSyntacticTreeNode(child);
-			if (childNode != null)
-				childrenNodeList.add(childNode);
-		}
+    return childrenNodeList;
+  }
 
-		return childrenNodeList;
-	}
+  /**
+   * The key function of similarity component which takes two portions of text
+   * and does similarity assessment by finding the set of all maximum common
+   * subtrees of the set of parse trees for each portion of text
+   * 
+   * @param input
+   *          text 1
+   * @param input
+   *          text 2
+   * @return the matching results structure, which includes the similarity score
+   */
+  public SentencePairMatchResult assessRelevance(String para1, String para2) {
+    List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(para1), sent2GrpLst = formGroupedPhrasesFromChunksForPara(para2);
 
-	/**
-	 * The key function of similarity component which takes two portions of text and does similarity assessment by finding the set of all maximum common subtrees
-	 * of the set of parse trees for each portion of text
-	 * @param input text 1
-	 * @param input text 2
-	 * @return the matching results structure, which includes the similarity score
-	 */
-	public SentencePairMatchResult assessRelevance(String para1, String para2)
-	{
-		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(para1), 
-		sent2GrpLst = formGroupedPhrasesFromChunksForPara(para2);
+    List<LemmaPair> origChunks1 = listListParseTreeChunk2ListLemmaPairs(sent1GrpLst);
 
-		List<LemmaPair> origChunks1 = listListParseTreeChunk2ListLemmaPairs(sent1GrpLst); 
+    ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic();
+    List<List<ParseTreeChunk>> res = md
+        .matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);
+    return new SentencePairMatchResult(res, origChunks1);
 
+  }
 
-		ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic();
-		List<List<ParseTreeChunk>> res = md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);
-		return new SentencePairMatchResult(res, origChunks1);
+  protected List<LemmaPair> listListParseTreeChunk2ListLemmaPairs(
+      List<List<ParseTreeChunk>> sent1GrpLst) {
+    List<LemmaPair> results = new ArrayList<LemmaPair>();
+    if (sent1GrpLst == null || sent1GrpLst.size() < 1)
+      return results;
+    List<ParseTreeChunk> wholeSentence = sent1GrpLst
+        .get(sent1GrpLst.size() - 1); // whole sentence is last list in the list
+                                      // of lists
 
-	}
-	
-	protected List<LemmaPair> listListParseTreeChunk2ListLemmaPairs(
-			List<List<ParseTreeChunk>> sent1GrpLst) {
-		List<LemmaPair>  results = new ArrayList<LemmaPair>();
-		if (sent1GrpLst==null || sent1GrpLst.size() <1)
-			return  results;
-		List<ParseTreeChunk> wholeSentence = sent1GrpLst.get(sent1GrpLst.size()-1); // whole sentence is last list in the list of lists
-		
-		List<String> pOSs = wholeSentence.get(0).getPOSs();
-		List<String> lemmas = wholeSentence.get(0).getLemmas();
-		for(int i= 0; i< lemmas.size(); i++){
-			results.add(new LemmaPair( pOSs.get(i), lemmas.get(i), i  ));
-		}
+    List<String> pOSs = wholeSentence.get(0).getPOSs();
+    List<String> lemmas = wholeSentence.get(0).getLemmas();
+    for (int i = 0; i < lemmas.size(); i++) {
+      results.add(new LemmaPair(pOSs.get(i), lemmas.get(i), i));
+    }
 
-		return results;
-	}
+    return results;
+  }
 
-	public void printParseTree(String phrase1){
-		ParserChunker2MatcherProcessor p = ParserChunker2MatcherProcessor.getInstance();
-		List<List<SentenceNode>> nodeListList = p.parseTextNode(phrase1);
-		for (List<SentenceNode> nodeList : nodeListList) {
-			for (SentenceNode node : nodeList) {
-				System.out.println(node);
-			}
-		}
-	}
+  public void printParseTree(String phrase1) {
+    ParserChunker2MatcherProcessor p = ParserChunker2MatcherProcessor
+        .getInstance();
+    List<List<SentenceNode>> nodeListList = p.parseTextNode(phrase1);
+    for (List<SentenceNode> nodeList : nodeListList) {
+      for (SentenceNode node : nodeList) {
+        System.out.println(node);
+      }
+    }
+  }
 }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserConstants.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserConstants.java
index 2719b38..7d02210 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserConstants.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserConstants.java

@@ -18,74 +18,74 @@
 package opennlp.tools.textsimilarity.chunker2matcher;
 
 public interface ParserConstants {
-	// added new POS types for infinitive phrase and participle phrase
-	public static final String TYPE_STP = "STP"; // infinitive phrase
-	public static final String TYPE_SGP = "SGP"; // present participle phrase
-	public static final String TYPE_SNP = "SNP"; // past participle phrase
+  // added new POS types for infinitive phrase and participle phrase
+  public static final String TYPE_STP = "STP"; // infinitive phrase
+  public static final String TYPE_SGP = "SGP"; // present participle phrase
+  public static final String TYPE_SNP = "SNP"; // past participle phrase
 
-	// below are the standard POS types,
-	// http://bulba.sdsu.edu/jeanette/thesis/PennTags.html
-	public static final String TYPE_ADJP = "ADJP";
-	public static final String TYPE_ADVP = "ADVP";
-	public static final String TYPE_CC = "CC";
-	public static final String TYPE_CD = "CD";
-	public static final String TYPE_CONJP = "CONJP";
-	public static final String TYPE_DT = "DT";
-	public static final String TYPE_EX = "EX";
-	public static final String TYPE_FRAG = "FRAG";
-	public static final String TYPE_FW = "FW";
-	public static final String TYPE_IN = "IN";
-	public static final String TYPE_INTJ = "INTJ";
-	public static final String TYPE_JJ = "JJ";
-	public static final String TYPE_JJR = "JJR";
-	public static final String TYPE_JJS = "JJS";
-	public static final String TYPE_LS = "LS";
-	public static final String TYPE_LST = "LST";
-	public static final String TYPE_MD = "MD";
-	public static final String TYPE_NAC = "NAC";
-	public static final String TYPE_NN = "NN";
-	public static final String TYPE_NNS = "NNS";
-	public static final String TYPE_NNP = "NNP";
-	public static final String TYPE_NNPS = "NNPS";
-	public static final String TYPE_NP = "NP";
-	public static final String TYPE_NX = "NX";
-	public static final String TYPE_PDT = "PDT";
-	public static final String TYPE_POS = "POS";
-	public static final String TYPE_PP = "PP";
-	public static final String TYPE_PRN = "PRN";
-	public static final String TYPE_PRP = "PRP";
-	public static final String TYPE_PRP$ = "PRP$";
-	public static final String TYPE_PRT = "PRT";
-	public static final String TYPE_QP = "QP";
-	public static final String TYPE_RB = "RB";
-	public static final String TYPE_RBR = "RBR";
-	public static final String TYPE_RBS = "RBS";
-	public static final String TYPE_RP = "RP";
-	public static final String TYPE_RRC = "RRC";
-	public static final String TYPE_S = "S";
-	public static final String TYPE_SBAR = "SBAR";
-	public static final String TYPE_SBARQ = "SBARQ";
-	public static final String TYPE_SINV = "SINV";
-	public static final String TYPE_SQ = "SQ";
-	public static final String TYPE_SYM = "SYM";
-	public static final String TYPE_TO = "TO";
-	public static final String TYPE_TOP = "TOP";
-	public static final String TYPE_UCP = "UCP";
-	public static final String TYPE_UH = "UH";
-	public static final String TYPE_VB = "VB";
-	public static final String TYPE_VBD = "VBD";
-	public static final String TYPE_VBG = "VBG";
-	public static final String TYPE_VBN = "VBN";
-	public static final String TYPE_VBP = "VBP";
-	public static final String TYPE_VBZ = "VBZ";
-	public static final String TYPE_VP = "VP";
-	public static final String TYPE_WDT = "WDT";
-	public static final String TYPE_WHADJP = "WHADJP";
-	public static final String TYPE_WHADVP = "WHADVP";
-	public static final String TYPE_WHNP = "WHNP";
-	public static final String TYPE_WHPP = "WHPP";
-	public static final String TYPE_WP = "WP";
-	public static final String TYPE_WP$ = "WP$";
-	public static final String TYPE_WRB = "WRB";
-	public static final String TYPE_X = "X";
+  // below are the standard POS types,
+  // http://bulba.sdsu.edu/jeanette/thesis/PennTags.html
+  public static final String TYPE_ADJP = "ADJP";
+  public static final String TYPE_ADVP = "ADVP";
+  public static final String TYPE_CC = "CC";
+  public static final String TYPE_CD = "CD";
+  public static final String TYPE_CONJP = "CONJP";
+  public static final String TYPE_DT = "DT";
+  public static final String TYPE_EX = "EX";
+  public static final String TYPE_FRAG = "FRAG";
+  public static final String TYPE_FW = "FW";
+  public static final String TYPE_IN = "IN";
+  public static final String TYPE_INTJ = "INTJ";
+  public static final String TYPE_JJ = "JJ";
+  public static final String TYPE_JJR = "JJR";
+  public static final String TYPE_JJS = "JJS";
+  public static final String TYPE_LS = "LS";
+  public static final String TYPE_LST = "LST";
+  public static final String TYPE_MD = "MD";
+  public static final String TYPE_NAC = "NAC";
+  public static final String TYPE_NN = "NN";
+  public static final String TYPE_NNS = "NNS";
+  public static final String TYPE_NNP = "NNP";
+  public static final String TYPE_NNPS = "NNPS";
+  public static final String TYPE_NP = "NP";
+  public static final String TYPE_NX = "NX";
+  public static final String TYPE_PDT = "PDT";
+  public static final String TYPE_POS = "POS";
+  public static final String TYPE_PP = "PP";
+  public static final String TYPE_PRN = "PRN";
+  public static final String TYPE_PRP = "PRP";
+  public static final String TYPE_PRP$ = "PRP$";
+  public static final String TYPE_PRT = "PRT";
+  public static final String TYPE_QP = "QP";
+  public static final String TYPE_RB = "RB";
+  public static final String TYPE_RBR = "RBR";
+  public static final String TYPE_RBS = "RBS";
+  public static final String TYPE_RP = "RP";
+  public static final String TYPE_RRC = "RRC";
+  public static final String TYPE_S = "S";
+  public static final String TYPE_SBAR = "SBAR";
+  public static final String TYPE_SBARQ = "SBARQ";
+  public static final String TYPE_SINV = "SINV";
+  public static final String TYPE_SQ = "SQ";
+  public static final String TYPE_SYM = "SYM";
+  public static final String TYPE_TO = "TO";
+  public static final String TYPE_TOP = "TOP";
+  public static final String TYPE_UCP = "UCP";
+  public static final String TYPE_UH = "UH";
+  public static final String TYPE_VB = "VB";
+  public static final String TYPE_VBD = "VBD";
+  public static final String TYPE_VBG = "VBG";
+  public static final String TYPE_VBN = "VBN";
+  public static final String TYPE_VBP = "VBP";
+  public static final String TYPE_VBZ = "VBZ";
+  public static final String TYPE_VP = "VP";
+  public static final String TYPE_WDT = "WDT";
+  public static final String TYPE_WHADJP = "WHADJP";
+  public static final String TYPE_WHADVP = "WHADVP";
+  public static final String TYPE_WHNP = "WHNP";
+  public static final String TYPE_WHPP = "WHPP";
+  public static final String TYPE_WP = "WP";
+  public static final String TYPE_WP$ = "WP$";
+  public static final String TYPE_WRB = "WRB";
+  public static final String TYPE_X = "X";
 }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserPure2MatcherProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserPure2MatcherProcessor.java
index 5f34fc7..65abdef 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserPure2MatcherProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserPure2MatcherProcessor.java

@@ -43,114 +43,120 @@
 import opennlp.tools.textsimilarity.SentencePairMatchResult;
 import opennlp.tools.textsimilarity.TextProcessor;
 
-public class ParserPure2MatcherProcessor extends ParserChunker2MatcherProcessor{
-	protected static ParserPure2MatcherProcessor pinstance;
-	private static Logger LOG = Logger.getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserPure2MatcherProcessor");
+public class ParserPure2MatcherProcessor extends ParserChunker2MatcherProcessor {
+  protected static ParserPure2MatcherProcessor pinstance;
+  private static Logger LOG = Logger
+      .getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserPure2MatcherProcessor");
 
-	public synchronized static ParserPure2MatcherProcessor getInstance() {
-		if (pinstance == null)
-			pinstance = new ParserPure2MatcherProcessor();
+  public synchronized static ParserPure2MatcherProcessor getInstance() {
+    if (pinstance == null)
+      pinstance = new ParserPure2MatcherProcessor();
 
-		return pinstance;
-	}
-	
-	private ParserPure2MatcherProcessor() {
-		initializeSentenceDetector();
-		initializeTokenizer();
-		initializePosTagger();
-		initializeParser();
-	}
+    return pinstance;
+  }
 
-	public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForSentence(String sentence) {
-		if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH)
-			return null;
+  private ParserPure2MatcherProcessor() {
+    initializeSentenceDetector();
+    initializeTokenizer();
+    initializePosTagger();
+    initializeParser();
+  }
 
-		sentence = TextProcessor.removePunctuation(sentence);
-		SentenceNode node  = parseSentenceNode(sentence);
-		if (node==null){
-			LOG.info("Problem parsing sentence '"+sentence);
-			return null;
-		}
-		List<ParseTreeChunk> ptcList = node.getParseTreeChunkList();
-		List<String> POSlist = node.getOrderedPOSList();
-		List<String> TokList = node.getOrderedLemmaList();
-	
-		List<List<ParseTreeChunk>> listOfChunks = new ArrayList<List<ParseTreeChunk>>();
-		List<ParseTreeChunk> nounPhr = new ArrayList<ParseTreeChunk>(), 
-		prepPhr = new ArrayList<ParseTreeChunk>(), verbPhr  = new ArrayList<ParseTreeChunk>(), 
-		adjPhr  = new ArrayList<ParseTreeChunk>(), 
-		// to store the whole sentence
-		wholeSentence = new ArrayList<ParseTreeChunk>();
+  public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForSentence(
+      String sentence) {
+    if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH)
+      return null;
 
-		wholeSentence.add(new ParseTreeChunk("SENTENCE", TokList, POSlist));
-		for(ParseTreeChunk phr: ptcList){
-			String phrType = phr.getMainPOS();
-			if (phrType.startsWith("NP")){
-				nounPhr.add(phr);
-			} else if (phrType.startsWith("VP")){
-				verbPhr.add(phr);
-			} else if (phrType.startsWith("PP")){
-				prepPhr.add(phr);
-			} else if (phrType.endsWith("ADJP")){
-				adjPhr.add(phr);
-			} else {
-				//LOG.info("Unexpected phrase type found :"+ phr);				
-			}
-			
-		}
-	
-		listOfChunks.add(nounPhr);
-		listOfChunks.add(verbPhr);
-		listOfChunks.add(prepPhr);
-		listOfChunks.add(adjPhr);
-		listOfChunks.add(wholeSentence);
+    sentence = TextProcessor.removePunctuation(sentence);
+    SentenceNode node = parseSentenceNode(sentence);
+    if (node == null) {
+      LOG.info("Problem parsing sentence '" + sentence);
+      return null;
+    }
+    List<ParseTreeChunk> ptcList = node.getParseTreeChunkList();
+    List<String> POSlist = node.getOrderedPOSList();
+    List<String> TokList = node.getOrderedLemmaList();
 
-		return listOfChunks;
-	}
-	
-	public SentencePairMatchResult assessRelevance(String para1, String para2)
-	{
-	
-		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(para1), 
-		sent2GrpLst = formGroupedPhrasesFromChunksForPara(para2);
+    List<List<ParseTreeChunk>> listOfChunks = new ArrayList<List<ParseTreeChunk>>();
+    List<ParseTreeChunk> nounPhr = new ArrayList<ParseTreeChunk>(), prepPhr = new ArrayList<ParseTreeChunk>(), verbPhr = new ArrayList<ParseTreeChunk>(), adjPhr = new ArrayList<ParseTreeChunk>(),
+    // to store the whole sentence
+    wholeSentence = new ArrayList<ParseTreeChunk>();
 
-		List<LemmaPair> origChunks1 = listListParseTreeChunk2ListLemmaPairs(sent1GrpLst); //TODO  need to populate it!
+    wholeSentence.add(new ParseTreeChunk("SENTENCE", TokList, POSlist));
+    for (ParseTreeChunk phr : ptcList) {
+      String phrType = phr.getMainPOS();
+      if (phrType.startsWith("NP")) {
+        nounPhr.add(phr);
+      } else if (phrType.startsWith("VP")) {
+        verbPhr.add(phr);
+      } else if (phrType.startsWith("PP")) {
+        prepPhr.add(phr);
+      } else if (phrType.endsWith("ADJP")) {
+        adjPhr.add(phr);
+      } else {
+        // LOG.info("Unexpected phrase type found :"+ phr);
+      }
 
+    }
 
-		ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic();
-		List<List<ParseTreeChunk>> res = md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);
-		return new SentencePairMatchResult(res, origChunks1);
+    listOfChunks.add(nounPhr);
+    listOfChunks.add(verbPhr);
+    listOfChunks.add(prepPhr);
+    listOfChunks.add(adjPhr);
+    listOfChunks.add(wholeSentence);
 
-	}
-	
+    return listOfChunks;
+  }
 
-	public static void main(String[] args) throws Exception {
-		ParserPure2MatcherProcessor parser = ParserPure2MatcherProcessor.getInstance();
-		String text = "Its classy design and the Mercedes name make it a very cool vehicle to drive. ";
+  public SentencePairMatchResult assessRelevance(String para1, String para2) {
 
-		List<List<ParseTreeChunk>> res = parser.formGroupedPhrasesFromChunksForPara(text);
-		System.out.println(res);
-		
-	//	System.exit(0);
+    List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(para1), sent2GrpLst = formGroupedPhrasesFromChunksForPara(para2);
 
-		
-		String phrase1 = "Its classy design and the Mercedes name make it a very cool vehicle to drive. "
-			+ "The engine makes it a powerful car. "
-			+ "The strong engine gives it enough power. "
-			+ "The strong engine gives the car a lot of power.";
-		String phrase2 = "This car has a great engine. "
-			+ "This car has an amazingly good engine. "
-			+ "This car provides you a very good mileage.";
-		String sentence = "Not to worry with the 2cv.";
+    List<LemmaPair> origChunks1 = listListParseTreeChunk2ListLemmaPairs(sent1GrpLst); // TODO
+                                                                                      // need
+                                                                                      // to
+                                                                                      // populate
+                                                                                      // it!
 
+    ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic();
+    List<List<ParseTreeChunk>> res = md
+        .matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);
+    return new SentencePairMatchResult(res, origChunks1);
 
-		System.out.println(parser.assessRelevance(phrase1, phrase2).getMatchResult());
+  }
 
+  public static void main(String[] args) throws Exception {
+    ParserPure2MatcherProcessor parser = ParserPure2MatcherProcessor
+        .getInstance();
+    String text = "Its classy design and the Mercedes name make it a very cool vehicle to drive. ";
 
-		System.out.println(parser.formGroupedPhrasesFromChunksForSentence("Its classy design and the Mercedes name make it a very cool vehicle to drive. "));
-		System.out.println(parser.formGroupedPhrasesFromChunksForSentence("Sounds too good to be true but it actually is, the world's first flying car is finally here. "));
-		System.out.println(parser.formGroupedPhrasesFromChunksForSentence("UN Ambassador Ron Prosor repeated the Israeli position that the only way the Palestinians will get UN membership and statehood is through direct negotiations with the Israelis on a comprehensive peace agreement"));
+    List<List<ParseTreeChunk>> res = parser
+        .formGroupedPhrasesFromChunksForPara(text);
+    System.out.println(res);
 
+    // System.exit(0);
 
-	}
+    String phrase1 = "Its classy design and the Mercedes name make it a very cool vehicle to drive. "
+        + "The engine makes it a powerful car. "
+        + "The strong engine gives it enough power. "
+        + "The strong engine gives the car a lot of power.";
+    String phrase2 = "This car has a great engine. "
+        + "This car has an amazingly good engine. "
+        + "This car provides you a very good mileage.";
+    String sentence = "Not to worry with the 2cv.";
+
+    System.out.println(parser.assessRelevance(phrase1, phrase2)
+        .getMatchResult());
+
+    System.out
+        .println(parser
+            .formGroupedPhrasesFromChunksForSentence("Its classy design and the Mercedes name make it a very cool vehicle to drive. "));
+    System.out
+        .println(parser
+            .formGroupedPhrasesFromChunksForSentence("Sounds too good to be true but it actually is, the world's first flying car is finally here. "));
+    System.out
+        .println(parser
+            .formGroupedPhrasesFromChunksForSentence("UN Ambassador Ron Prosor repeated the Israeli position that the only way the Palestinians will get UN membership and statehood is through direct negotiations with the Israelis on a comprehensive peace agreement"));
+
+  }
 }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNode.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNode.java
index 6d7446a..8469441 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNode.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNode.java

@@ -21,123 +21,123 @@
 import java.util.List;
 
 public class PhraseNode extends SyntacticTreeNode {
-	// children nodes within a phrase node
-	private List<SyntacticTreeNode> children;
+  // children nodes within a phrase node
+  private List<SyntacticTreeNode> children;
 
-	public PhraseNode(String type, List<SyntacticTreeNode> children) {
-		super(type);
-		setChildren(children);
-	}
+  public PhraseNode(String type, List<SyntacticTreeNode> children) {
+    super(type);
+    setChildren(children);
+  }
 
-	@Override
-	public List<SyntacticTreeNode> getChildren() {
-		return children;
-	}
+  @Override
+  public List<SyntacticTreeNode> getChildren() {
+    return children;
+  }
 
-	public void setChildren(List<SyntacticTreeNode> children) {
-		this.children = children;
+  public void setChildren(List<SyntacticTreeNode> children) {
+    this.children = children;
 
-		// set the parent of the children nodes
-		if (children != null && children.size() > 0) {
-			for (SyntacticTreeNode child : children) {
-				if (child != null)
-					child.setParentNode(this);
-			}
-		}
-	}
+    // set the parent of the children nodes
+    if (children != null && children.size() > 0) {
+      for (SyntacticTreeNode child : children) {
+        if (child != null)
+          child.setParentNode(this);
+      }
+    }
+  }
 
-	public void addChild(SyntacticTreeNode child) {
-		if (child == null)
-			return;
+  public void addChild(SyntacticTreeNode child) {
+    if (child == null)
+      return;
 
-		if (children == null) {
-			children = new ArrayList<SyntacticTreeNode>();
-		}
+    if (children == null) {
+      children = new ArrayList<SyntacticTreeNode>();
+    }
 
-		// set the parent of the child node
-		child.setParentNode(this);
+    // set the parent of the child node
+    child.setParentNode(this);
 
-		children.add(child);
-	}
+    children.add(child);
+  }
 
-	@Override
-	public String getText() {
-		return getText(false, false);
-	}
+  @Override
+  public String getText() {
+    return getText(false, false);
+  }
 
-	@Override
-	public String getLemma(boolean removeStopWord) {
-		return getText(true, removeStopWord);
-	}
+  @Override
+  public String getLemma(boolean removeStopWord) {
+    return getText(true, removeStopWord);
+  }
 
-	private String getText(boolean useLemma, boolean removeStopWord) {
-		if (children == null || children.size() == 0)
-			return null;
+  private String getText(boolean useLemma, boolean removeStopWord) {
+    if (children == null || children.size() == 0)
+      return null;
 
-		StringBuilder builder = new StringBuilder();
-		boolean first = true;
-		for (SyntacticTreeNode child : children) {
-			String childText = null;
-			if (useLemma)
-				childText = child.getLemma(removeStopWord);
-			else
-				childText = child.getText();
+    StringBuilder builder = new StringBuilder();
+    boolean first = true;
+    for (SyntacticTreeNode child : children) {
+      String childText = null;
+      if (useLemma)
+        childText = child.getLemma(removeStopWord);
+      else
+        childText = child.getText();
 
-			if (childText == null || childText.length() == 0)
-				continue;
+      if (childText == null || childText.length() == 0)
+        continue;
 
-			// add a leading space for children other than the first
-			if (first)
-				first = false;
-			else
-				builder.append(" ");
+      // add a leading space for children other than the first
+      if (first)
+        first = false;
+      else
+        builder.append(" ");
 
-			// add the child
-			builder.append(childText);
-		}
+      // add the child
+      builder.append(childText);
+    }
 
-		return builder.toString();
-	}
+    return builder.toString();
+  }
 
-	@Override
-	public String toStringIndented(int numTabs) {
-		StringBuilder builder = new StringBuilder();
+  @Override
+  public String toStringIndented(int numTabs) {
+    StringBuilder builder = new StringBuilder();
 
-		String indent = SyntacticTreeNode.getIndent(numTabs);
-		builder.append(indent).append("type = ").append(getType());
+    String indent = SyntacticTreeNode.getIndent(numTabs);
+    builder.append(indent).append("type = ").append(getType());
 
-		// output all the children
-		if (children != null && children.size() > 0) {
-			numTabs++;
-			for (SyntacticTreeNode child : children) {
-				builder.append("\n").append(child.toStringIndented(numTabs));
-			}
-		}
+    // output all the children
+    if (children != null && children.size() > 0) {
+      numTabs++;
+      for (SyntacticTreeNode child : children) {
+        builder.append("\n").append(child.toStringIndented(numTabs));
+      }
+    }
 
-		return builder.toString();
-	}
-	
-	@Override
-	public List<String> getOrderedPOSList(){
-		List<String> types = new ArrayList<String>(); 
-		if (children != null && children.size() > 0) {
-			for (SyntacticTreeNode child : children) {
-				types.addAll(child.getOrderedPOSList());
-			}
-		} else
-			types.add(getType());
-		return types;
-	}
-	
-	@Override
-	public List<String> getOrderedLemmaList(){
-		List<String> types = new ArrayList<String>(); 
-		if (children != null && children.size() > 0) {
-			for (SyntacticTreeNode child : children) {
-				types.addAll(child.getOrderedLemmaList());
-			}
-		} else
-			types.add(getType());
-		return types;
-	}
+    return builder.toString();
+  }
+
+  @Override
+  public List<String> getOrderedPOSList() {
+    List<String> types = new ArrayList<String>();
+    if (children != null && children.size() > 0) {
+      for (SyntacticTreeNode child : children) {
+        types.addAll(child.getOrderedPOSList());
+      }
+    } else
+      types.add(getType());
+    return types;
+  }
+
+  @Override
+  public List<String> getOrderedLemmaList() {
+    List<String> types = new ArrayList<String>();
+    if (children != null && children.size() > 0) {
+      for (SyntacticTreeNode child : children) {
+        types.addAll(child.getOrderedLemmaList());
+      }
+    } else
+      types.add(getType());
+    return types;
+  }
 }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SentenceNode.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SentenceNode.java
index 6f38a22..6dd170a 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SentenceNode.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SentenceNode.java

@@ -27,85 +27,97 @@
  * 
  */
 public class SentenceNode extends PhraseNode {
-	private String sentence;
+  private String sentence;
 
-	public SentenceNode(String sentence, List<SyntacticTreeNode> children) {
-		super(ParserConstants.TYPE_S, children);
+  public SentenceNode(String sentence, List<SyntacticTreeNode> children) {
+    super(ParserConstants.TYPE_S, children);
 
-		this.sentence = sentence;
-	}
+    this.sentence = sentence;
+  }
 
-	@Override
-	public String getText() {
-		return sentence;
-	}
+  @Override
+  public String getText() {
+    return sentence;
+  }
 
-	public String getSentence() {
-		return sentence;
-	}
+  public String getSentence() {
+    return sentence;
+  }
 
-	public void setSentence(String sentence) {
-		this.sentence = sentence;
-	}
+  public void setSentence(String sentence) {
+    this.sentence = sentence;
+  }
 
-	@Override
-	public String toStringIndented(int numTabs) {
-		StringBuilder builder = new StringBuilder();
-		String indent = SyntacticTreeNode.getIndent(numTabs);
+  @Override
+  public String toStringIndented(int numTabs) {
+    StringBuilder builder = new StringBuilder();
+    String indent = SyntacticTreeNode.getIndent(numTabs);
 
-		// output the sentence
-		builder.append(indent).append(sentence).append("\n");
-		builder.append(super.toStringIndented(numTabs));
+    // output the sentence
+    builder.append(indent).append(sentence).append("\n");
+    builder.append(super.toStringIndented(numTabs));
 
-		return builder.toString();
-	}
-	
-	@Override
-	public List<String> getOrderedPOSList(){
-		List<String> types = new ArrayList<String>(); 
-		if (this.getChildren()!= null && this.getChildren().size() > 0) {
-			for (SyntacticTreeNode child : this.getChildren()) {
-				types.addAll(child.getOrderedPOSList());
-			}
-		}
-		return types;
-	}
-	
-	@Override
-	public List<String> getOrderedLemmaList(){
-		List<String> types = new ArrayList<String>(); 
-		if (this.getChildren()!= null && this.getChildren().size() > 0) {
-			for (SyntacticTreeNode child : this.getChildren()) {
-				types.addAll(child.getOrderedLemmaList());
-			}
-		}
-		return types;
-	}
-	
-	public List<ParseTreeChunk> getParseTreeChunkList(){
-		List<ParseTreeChunk> chunks = new ArrayList<ParseTreeChunk>();
-		
-		if (this.getChildren()!= null && this.getChildren().size() > 0) {
-			for (SyntacticTreeNode child : this.getChildren()) {
-			//	if (child.getType().endsWith("P"))
-					chunks.add(new ParseTreeChunk(child.getType(),  
-							child.getOrderedPOSList(), child.getOrderedLemmaList()));
-			}
-		}
-		return chunks;
-	}
-	
-	
-	
+    return builder.toString();
+  }
+
+  @Override
+  public List<String> getOrderedPOSList() {
+    List<String> types = new ArrayList<String>();
+    if (this.getChildren() != null && this.getChildren().size() > 0) {
+      for (SyntacticTreeNode child : this.getChildren()) {
+        types.addAll(child.getOrderedPOSList());
+      }
+    }
+    return types;
+  }
+
+  @Override
+  public List<String> getOrderedLemmaList() {
+    List<String> types = new ArrayList<String>();
+    if (this.getChildren() != null && this.getChildren().size() > 0) {
+      for (SyntacticTreeNode child : this.getChildren()) {
+        types.addAll(child.getOrderedLemmaList());
+      }
+    }
+    return types;
+  }
+
+  public List<ParseTreeChunk> getParseTreeChunkList() {
+    List<ParseTreeChunk> chunks = new ArrayList<ParseTreeChunk>();
+
+    if (this.getChildren() != null && this.getChildren().size() > 0) {
+      for (SyntacticTreeNode child : this.getChildren()) {
+        // if (child.getType().endsWith("P"))
+        chunks.add(new ParseTreeChunk(child.getType(), child
+            .getOrderedPOSList(), child.getOrderedLemmaList()));
+      }
+    }
+    return chunks;
+  }
+
 }
 
 /*
- * [[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your NN-town NN-doesn NN-t ], 
- * NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], 
- * NP [DT-a NNP-Selectman ], NP [PRP-them IN-that PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], 
- * NP [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my NNP-House WP-What MD-Can PRP-I ], 
- * NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP [NNP-Pine NNP-Tree NNP-Legal ]], 
+ * [[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your
+ * NN-town NN-doesn NN-t ], NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk
+ * CC-or DT-a NNP-Selectman ], NP [DT-a NNP-Selectman ], NP [PRP-them IN-that
+ * PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], NP
+ * [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my
+ * NNP-House WP-What MD-Can PRP-I ], NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP
+ * [NNP-Pine NNP-Tree NNP-Legal ]],
  * 
- * [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], 
- * VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do NNP-Pine NNP-Tree NNP-Legal ]], [PP [TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], PP [IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ]], [], [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do NNP-Pine NNP-Tree NNP-Legal ]]]
-*/
+ * [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office
+ * CC-or NN-city NN-hall ], VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the
+ * NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that
+ * PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay
+ * DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do
+ * NNP-Pine NNP-Tree NNP-Legal ]], [PP [TO-to PRP$-your NN-town NN-office CC-or
+ * NN-city NN-hall ], PP [IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ]], [],
+ * [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your
+ * NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town
+ * NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or
+ * DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need
+ * DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the
+ * NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do NNP-Pine
+ * NNP-Tree NNP-Legal ]]]
+ */

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SyntacticTreeNode.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SyntacticTreeNode.java
index eca3ef4..2b38d5f 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SyntacticTreeNode.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/SyntacticTreeNode.java

@@ -24,138 +24,135 @@
 import opennlp.tools.parser.Parse;
 
 public abstract class SyntacticTreeNode {
-	// the POS type
-	private String type;
+  // the POS type
+  private String type;
 
-	// parent node, it is null for the root node
-	private PhraseNode parentNode;
+  // parent node, it is null for the root node
+  private PhraseNode parentNode;
 
-	public abstract List<SyntacticTreeNode> getChildren();
+  public abstract List<SyntacticTreeNode> getChildren();
 
-	public abstract String getText();
+  public abstract String getText();
 
-	public abstract String getLemma(boolean removeStopWord);
+  public abstract String getLemma(boolean removeStopWord);
 
-	public abstract String toStringIndented(int numTabs);
-	
-	public abstract List<String> getOrderedPOSList(); 
-	
-	public abstract List<String> getOrderedLemmaList(); 
+  public abstract String toStringIndented(int numTabs);
 
-	public SyntacticTreeNode(String type) {
-		this.type = type;
-	}
+  public abstract List<String> getOrderedPOSList();
 
-	public String getType() {
-		return type;
-	}
+  public abstract List<String> getOrderedLemmaList();
 
-	public void setType(String type) {
-		this.type = type;
-	}
+  public SyntacticTreeNode(String type) {
+    this.type = type;
+  }
 
-	public String getLemma() {
-		return getLemma(false);
-	}
+  public String getType() {
+    return type;
+  }
 
-	public PhraseNode getParentNode() {
-		return parentNode;
-	}
+  public void setType(String type) {
+    this.type = type;
+  }
 
-	public void setParentNode(PhraseNode parentNode) {
-		this.parentNode = parentNode;
-	}
+  public String getLemma() {
+    return getLemma(false);
+  }
 
-	public int getChildrenCount() {
-		List<SyntacticTreeNode> childrenList = getChildren();
-		if (childrenList == null)
-			return 0;
+  public PhraseNode getParentNode() {
+    return parentNode;
+  }
 
-		return childrenList.size();
-	}
+  public void setParentNode(PhraseNode parentNode) {
+    this.parentNode = parentNode;
+  }
 
-	public String toString() {
-		return toStringIndented(0);
-	}
+  public int getChildrenCount() {
+    List<SyntacticTreeNode> childrenList = getChildren();
+    if (childrenList == null)
+      return 0;
 
-	public static String getIndent(int numTabs) {
-		if (numTabs <= 0)
-			return "";
+    return childrenList.size();
+  }
 
-		StringBuilder builder = new StringBuilder();
-		for (int i = 0; i < numTabs; i++) {
-			builder.append("\t");
-		}
+  public String toString() {
+    return toStringIndented(0);
+  }
 
-		return builder.toString();
-	}
+  public static String getIndent(int numTabs) {
+    if (numTabs <= 0)
+      return "";
 
-	public static boolean isJunkType(String type, Parse parse) {
-		if (type == null)
-			return true;
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < numTabs; i++) {
+      builder.append("\t");
+    }
 
-		// the token node is useless
-		if (type.equals(AbstractBottomUpParser.TOK_NODE))
-			return true;
+    return builder.toString();
+  }
 
-		// the punctuation nodes are not useful, '.', '.', '?', '!', ';', etc
-		if ((type.equals(",") || type.equals(".") || type.equals("?")
-				|| type.equals("!") || type.equals(";")) 
-				// TODO : Parser gives type = '.' instead of VB
-				//&& ( parse.getHead().toString().length()<2 
-				)
-			return true;
+  public static boolean isJunkType(String type, Parse parse) {
+    if (type == null)
+      return true;
 
-		return false;
-	}
+    // the token node is useless
+    if (type.equals(AbstractBottomUpParser.TOK_NODE))
+      return true;
 
-	public static void replaceNode(SyntacticTreeNode nodeToReplace,
-			SyntacticTreeNode newNode) {
-		List<SyntacticTreeNode> newNodeList = null;
-		if (newNode != null) {
-			newNodeList = new ArrayList<SyntacticTreeNode>(1);
-			newNodeList.add(newNode);
-		}
+    // the punctuation nodes are not useful, '.', '.', '?', '!', ';', etc
+    if ((type.equals(",") || type.equals(".") || type.equals("?")
+        || type.equals("!") || type.equals(";"))
+    // TODO : Parser gives type = '.' instead of VB
+    // && ( parse.getHead().toString().length()<2
+    )
+      return true;
 
-		replaceNode(nodeToReplace, newNodeList);
-	}
+    return false;
+  }
 
-	public static void replaceNode(SyntacticTreeNode nodeToReplace,
-			List<SyntacticTreeNode> newNodeList) {
-		if (nodeToReplace == null)
-			throw new NullPointerException("The node to replace cannot be null");
+  public static void replaceNode(SyntacticTreeNode nodeToReplace,
+      SyntacticTreeNode newNode) {
+    List<SyntacticTreeNode> newNodeList = null;
+    if (newNode != null) {
+      newNodeList = new ArrayList<SyntacticTreeNode>(1);
+      newNodeList.add(newNode);
+    }
 
-		PhraseNode parentNode = nodeToReplace.getParentNode();
+    replaceNode(nodeToReplace, newNodeList);
+  }
 
-		if (parentNode == null) {
-			// the node to replace is the root node
-			// clear all children of the existing root node and use it as the
-			// new root node
-			if (nodeToReplace instanceof PhraseNode)
-				((PhraseNode) nodeToReplace).setChildren(newNodeList);
-			return;
-		}
+  public static void replaceNode(SyntacticTreeNode nodeToReplace,
+      List<SyntacticTreeNode> newNodeList) {
+    if (nodeToReplace == null)
+      throw new NullPointerException("The node to replace cannot be null");
 
-		List<SyntacticTreeNode> childrenNodes = parentNode.getChildren();
-		int index = childrenNodes.indexOf(nodeToReplace);
-		if (index >= 0) {
-			// remove the old node
-			childrenNodes.remove(index);
+    PhraseNode parentNode = nodeToReplace.getParentNode();
 
-			// put the new node list at the place of the old node if there are
-			// any
-			if (newNodeList != null && newNodeList.size() > 0) {
-				childrenNodes.addAll(index, newNodeList);
+    if (parentNode == null) {
+      // the node to replace is the root node
+      // clear all children of the existing root node and use it as the
+      // new root node
+      if (nodeToReplace instanceof PhraseNode)
+        ((PhraseNode) nodeToReplace).setChildren(newNodeList);
+      return;
+    }
 
-				// set the parent node of the new children
-				for (SyntacticTreeNode newNode : newNodeList) {
-					newNode.setParentNode(parentNode);
-				}
-			}
-		}
-	}
+    List<SyntacticTreeNode> childrenNodes = parentNode.getChildren();
+    int index = childrenNodes.indexOf(nodeToReplace);
+    if (index >= 0) {
+      // remove the old node
+      childrenNodes.remove(index);
 
+      // put the new node list at the place of the old node if there are
+      // any
+      if (newNodeList != null && newNodeList.size() > 0) {
+        childrenNodes.addAll(index, newNodeList);
 
+        // set the parent node of the new children
+        for (SyntacticTreeNode newNode : newNodeList) {
+          newNode.setParentNode(parentNode);
+        }
+      }
+    }
+  }
 
-	
 }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/WordNode.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/WordNode.java
index 9dc9d35..59fd77e 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/WordNode.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/WordNode.java

@@ -21,71 +21,73 @@
 import java.util.List;
 
 public class WordNode extends SyntacticTreeNode {
-	// the word in the sentence
-	private String word;
-	private String lemma;
+  // the word in the sentence
+  private String word;
+  private String lemma;
 
-	public WordNode(String type, String word) {
-		super(type);
+  public WordNode(String type, String word) {
+    super(type);
 
-		setWord(word);
-	}
+    setWord(word);
+  }
 
-	public String getWord() {
-		return word;
-	}
+  public String getWord() {
+    return word;
+  }
 
-	public void setWord(String word) {
-		this.word = word;
+  public void setWord(String word) {
+    this.word = word;
 
-		// update lemma accordingly
-		this.lemma = null;
-		/*WordDictionary.getInstance().getLemmaOrWord(word,
-				getType()); */
-	}
+    // update lemma accordingly
+    this.lemma = null;
+    /*
+     * WordDictionary.getInstance().getLemmaOrWord(word, getType());
+     */
+  }
 
-	@Override
-	public String getLemma(boolean removeStopWord) {
-		if (removeStopWord) // && Feature.isStopWord(lemma, getType()))
-			return null;
+  @Override
+  public String getLemma(boolean removeStopWord) {
+    if (removeStopWord) // && Feature.isStopWord(lemma, getType()))
+      return null;
 
-		return lemma;
-	}
+    return lemma;
+  }
 
-	@Override
-	public List<SyntacticTreeNode> getChildren() {
-		// a word node is a leaf and has no children
-		return null;
-	}
+  @Override
+  public List<SyntacticTreeNode> getChildren() {
+    // a word node is a leaf and has no children
+    return null;
+  }
 
-	@Override
-	public String getText() {
-		return word;
-	}
+  @Override
+  public String getText() {
+    return word;
+  }
 
-	@Override
-	public String toStringIndented(int numTabs) {
-		String indent = SyntacticTreeNode.getIndent(numTabs);
-		StringBuilder builder = new StringBuilder();
-		builder.append(indent).append("type = ").append(getType())
-				.append(", word = ").append(word);
+  @Override
+  public String toStringIndented(int numTabs) {
+    String indent = SyntacticTreeNode.getIndent(numTabs);
+    StringBuilder builder = new StringBuilder();
+    builder.append(indent).append("type = ").append(getType())
+        .append(", word = ").append(word);
 
-		return builder.toString();
-	}
+    return builder.toString();
+  }
 
-	public static void main(String[] args) {
-	}
+  public static void main(String[] args) {
+  }
 
-	@Override
-	public List<String> getOrderedPOSList() {
-		List<String> types = new ArrayList<String>();
-		types.add(getType());
-		return types;
-	}
-	@Override
-	public List<String> getOrderedLemmaList() {
-		List<String> types = new ArrayList<String>();
-		types.add(this.getWord());
-		return types;
-	}
+  @Override
+  public List<String> getOrderedPOSList() {
+    List<String> types = new ArrayList<String>();
+    types.add(getType());
+    return types;
+  }
+
+  @Override
+  public List<String> getOrderedLemmaList() {
+    List<String> types = new ArrayList<String>();
+    types.add(this.getWord());
+    return types;
+  }
 }

diff --git a/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java b/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java
index ba15611..dc59a90 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java

@@ -26,24 +26,26 @@
 
 public class SpeechRecognitionResultsProcessorTest extends TestCase {
 
-	public void testRestaurantEntityInSpeechRecognitionResults(){
-		 SpeechRecognitionResultsProcessor proc = new  SpeechRecognitionResultsProcessor();
-		 List<SentenceMeaningfullnessScore> res = proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{
-				 "remember to buy milk tomorrow for details",
-				 "remember to buy milk tomorrow from trader joes",
-				 "remember to buy milk tomorrow from 3 to jones",
-				 "remember to buy milk tomorrow for for details",
-				 "remember to buy milk tomorrow from third to joes",
-				 "remember to buy milk tomorrow from third to jones",
-				 "remember to buy milk tomorrow from for d jones"
-		 }));
-		 
-		 assertTrue(res.get(1).getScore()> res.get(0).getScore()  && res.get(1).getScore()> res.get(2).getScore()  &&
-				 res.get(1).getScore()> res.get(3).getScore()  && res.get(1).getScore()> res.get(4).getScore()  &&
-				 res.get(1).getScore()> res.get(5).getScore()  && res.get(1).getScore()> res.get(6).getScore()  
-				 );
-		 proc.close();
-		 
-	 }
+  public void testRestaurantEntityInSpeechRecognitionResults() {
+    SpeechRecognitionResultsProcessor proc = new SpeechRecognitionResultsProcessor();
+    List<SentenceMeaningfullnessScore> res = proc
+        .runSearchAndScoreMeaningfulness(Arrays.asList(new String[] {
+            "remember to buy milk tomorrow for details",
+            "remember to buy milk tomorrow from trader joes",
+            "remember to buy milk tomorrow from 3 to jones",
+            "remember to buy milk tomorrow for for details",
+            "remember to buy milk tomorrow from third to joes",
+            "remember to buy milk tomorrow from third to jones",
+            "remember to buy milk tomorrow from for d jones" }));
+
+    assertTrue(res.get(1).getScore() > res.get(0).getScore()
+        && res.get(1).getScore() > res.get(2).getScore()
+        && res.get(1).getScore() > res.get(3).getScore()
+        && res.get(1).getScore() > res.get(4).getScore()
+        && res.get(1).getScore() > res.get(5).getScore()
+        && res.get(1).getScore() > res.get(6).getScore());
+    proc.close();
+
+  }
 
 }

diff --git a/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyBuildMatchTest.java b/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyBuildMatchTest.java
index 6c64059..cb55caa 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyBuildMatchTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyBuildMatchTest.java

@@ -38,12 +38,15 @@
   }

 

   public void testTaxonomyMatch() {

-    TaxoQuerySnapshotMatcher matcher = new TaxoQuerySnapshotMatcher("src/test/resources/taxonomies/irs_domTaxo.dat");

-    int score = matcher.getTaxoScore("Can Form 1040 EZ be used to claim the earned income credit.",

-    "Can Form 1040EZ be used to claim the earned income credit? . Must I be entitled to claim a child as a dependent to claim the earned income credit based on the child being ");

+    TaxoQuerySnapshotMatcher matcher = new TaxoQuerySnapshotMatcher(

+        "src/test/resources/taxonomies/irs_domTaxo.dat");

+    int score = matcher

+        .getTaxoScore(

+            "Can Form 1040 EZ be used to claim the earned income credit.",

+            "Can Form 1040EZ be used to claim the earned income credit? . Must I be entitled to claim a child as a dependent to claim the earned income credit based on the child being ");

 

-    System.out.println("The score is: "+ score);

-    assertTrue(score>3);

+    System.out.println("The score is: " + score);

+    assertTrue(score > 3);

     matcher.close();

   }

 }


diff --git a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/GeneralizationListReducerTest.java b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/GeneralizationListReducerTest.java
index 7657a47..e2a1de3 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/GeneralizationListReducerTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/GeneralizationListReducerTest.java

@@ -22,8 +22,8 @@
 
 import junit.framework.TestCase;
 
-public class GeneralizationListReducerTest extends TestCase{
-  private GeneralizationListReducer generalizationListReducer = new  GeneralizationListReducer();
+public class GeneralizationListReducerTest extends TestCase {
+  private GeneralizationListReducer generalizationListReducer = new GeneralizationListReducer();
 
   public void notNull() {
     assertNotNull(generalizationListReducer);
@@ -61,16 +61,18 @@
     assertFalse(ch2.isASubChunk(ch1));
     assertFalse(ch5.isASubChunk(ch4));
     assertTrue(ch4.isASubChunk(ch5));
-    
+
     assertFalse(ch2.isASubChunk(ch3));
     assertFalse(ch3.isASubChunk(ch2));
-    
+
     assertFalse(ch5.isASubChunk(ch3));
     assertFalse(ch3.isASubChunk(ch5));
 
     List<ParseTreeChunk> res = generalizationListReducer
         .applyFilteringBySubsumption(inp);
-    assertEquals(res.toString(), "[VP [VB-run IN-around NP-tigers NP-zoo ], NP [DT-the NP-tigers ], NP [DT-the NN-* VBG-flying NN-car ]]");
+    assertEquals(
+        res.toString(),
+        "[VP [VB-run IN-around NP-tigers NP-zoo ], NP [DT-the NP-tigers ], NP [DT-the NN-* VBG-flying NN-car ]]");
     System.out.println(res);
 
   }

diff --git a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/LemmaFormManagerTest.java b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/LemmaFormManagerTest.java
index 8ea60ca..dda18b2 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/LemmaFormManagerTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/LemmaFormManagerTest.java

@@ -23,23 +23,27 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 
-public class LemmaFormManagerTest extends TestCase{
+public class LemmaFormManagerTest extends TestCase {
 
   private LemmaFormManager lemmaFormManager = new LemmaFormManager();
 
-
   public void testNotNull() {
     assertNotNull(lemmaFormManager);
   }
 
-
   public void testMatches() {
-	assertEquals(lemmaFormManager.matchLemmas(null, "loud", "loudness", "NN"), "loud" );
+    assertEquals(lemmaFormManager.matchLemmas(null, "loud", "loudness", "NN"),
+        "loud");
     assertEquals(lemmaFormManager.matchLemmas(null, "24", "12", "CD"), null);
-    assertEquals(lemmaFormManager.matchLemmas(null, "loud", "loudly", "NN"), "loud" );
-    assertEquals(lemmaFormManager.matchLemmas(null, "!upgrade", "upgrade", "NN"), "!upgrade" );
-    assertEquals(lemmaFormManager.matchLemmas(null, "!upgrade", "upgrades", "NN"), null );
-    assertEquals(lemmaFormManager.matchLemmas(null, "!upgrade", "get", "NN"), null);
+    assertEquals(lemmaFormManager.matchLemmas(null, "loud", "loudly", "NN"),
+        "loud");
+    assertEquals(
+        lemmaFormManager.matchLemmas(null, "!upgrade", "upgrade", "NN"),
+        "!upgrade");
+    assertEquals(
+        lemmaFormManager.matchLemmas(null, "!upgrade", "upgrades", "NN"), null);
+    assertEquals(lemmaFormManager.matchLemmas(null, "!upgrade", "get", "NN"),
+        null);
   }
 
 }

diff --git a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java
index 24adef3..fd7961e 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java

@@ -25,7 +25,7 @@
 import org.junit.runner.RunWith;
 import junit.framework.TestCase;
 
-public class ParseTreeChunkListScorerTest extends TestCase{
+public class ParseTreeChunkListScorerTest extends TestCase {
   private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
   private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
 
@@ -35,7 +35,7 @@
             + " [ [VB-get NN-visa IN-* NN-* IN-in .-* ],  [VBD-* IN-* NN-* NN-* .-* ],  [VB-* NP-* ]]]");
 
     double sc = parseTreeChunkListScorer.getParseTreeChunkListScore(chs);
-    assertTrue(sc>1.90);
-    
+    assertTrue(sc > 1.90);
+
   }
 }

diff --git a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkTest.java b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkTest.java
index 99330da..bc39669 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkTest.java

@@ -26,7 +26,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 
-public class ParseTreeChunkTest extends TestCase{
+public class ParseTreeChunkTest extends TestCase {
   private ParseTreeMatcherDeterministic parseTreeMatcher = new ParseTreeMatcherDeterministic();
   private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
   private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

diff --git a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java
index 0466b3c..129e36e 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java

@@ -32,88 +32,97 @@
 
 public class SyntMatcherTest extends TestCase {
 
-	private ParserChunker2MatcherProcessor parserChunker2Matcher;
+  private ParserChunker2MatcherProcessor parserChunker2Matcher;
 
-	private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
+  private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
 
-	public void notNullTest() {
-		parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
-		assertNotNull(parserChunker2Matcher);
-	}
+  public void notNullTest() {
+    parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
+    assertNotNull(parserChunker2Matcher);
+  }
 
-	public void testMatch() {
-		parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
-		List<List<ParseTreeChunk>> matchResult = parserChunker2Matcher
-		.assessRelevance(
-				// "Can I get auto focus lens for digital camera",
-				// "How can I get short focus zoom lens for digital camera"
-				"Pulitzer Prize-Winning Reporter is an Illegal Immigrant",
-				"Gay Pulitzer Prize-Winning Reporter Jose Antonio Vargas Comes Out as Undocumented " +
-				"Immigrant Jose Antonio Vargas, a gay journalist who won a Pulitzer Prize " +
-		"for his coverage of the Virginia Tech shootings in the Washington Post")
-		.getMatchResult();
+  public void testMatch() {
+    parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
+    List<List<ParseTreeChunk>> matchResult = parserChunker2Matcher
+        .assessRelevance(
+            // "Can I get auto focus lens for digital camera",
+            // "How can I get short focus zoom lens for digital camera"
+            "Pulitzer Prize-Winning Reporter is an Illegal Immigrant",
+            "Gay Pulitzer Prize-Winning Reporter Jose Antonio Vargas Comes Out as Undocumented "
+                + "Immigrant Jose Antonio Vargas, a gay journalist who won a Pulitzer Prize "
+                + "for his coverage of the Virginia Tech shootings in the Washington Post")
+        .getMatchResult();
 
-		System.out.println(matchResult);
-		assertEquals( "[[ [NNP-pulitzer NNP-prize NNP-winning NNP-reporter ],  [NN-immigrant ]], []]",
-				matchResult.toString());
-		System.out.println(parseTreeChunk.listToString(matchResult));
-		assertEquals(" np [ [NNP-pulitzer NNP-prize NNP-winning NNP-reporter ],  [NN-immigrant ]]",
-				parseTreeChunk.listToString(matchResult));
+    System.out.println(matchResult);
+    assertEquals(
+        "[[ [NNP-pulitzer NNP-prize NNP-winning NNP-reporter ],  [NN-immigrant ]], []]",
+        matchResult.toString());
+    System.out.println(parseTreeChunk.listToString(matchResult));
+    assertEquals(
+        " np [ [NNP-pulitzer NNP-prize NNP-winning NNP-reporter ],  [NN-immigrant ]]",
+        parseTreeChunk.listToString(matchResult));
 
-		matchResult = parserChunker2Matcher
-		.assessRelevance(
-				"Sounds too good to be true but it actually is, the world's first flying car is finally here. ",
-				"While it may seem like something straight out of a sci-fi " +
-		"movie, the  flying  car  might soon become a reality. ").getMatchResult();
+    matchResult = parserChunker2Matcher
+        .assessRelevance(
+            "Sounds too good to be true but it actually is, the world's first flying car is finally here. ",
+            "While it may seem like something straight out of a sci-fi "
+                + "movie, the  flying  car  might soon become a reality. ")
+        .getMatchResult();
 
-		// TODO: possibly problem in new POS tagger from Parser
-		System.out.println(matchResult);
-		// was  "[[ [DT-the NN-* VBG-flying NN-car ]], []]"
-		assertEquals("[[ [PRP-it ],  [DT-the NN-* NNS-* ]], [ [DT-the NN-* NNS-* ]]]",
-				matchResult.toString()
-		);
-		System.out.println(parseTreeChunk.listToString(matchResult));
-		assertEquals( " np [ [PRP-it ],  [DT-the NN-* NNS-* ]] vp [ [DT-the NN-* NNS-* ]]",
-				parseTreeChunk.listToString(matchResult));
-		
-		parserChunker2Matcher.close();
+    // TODO: possibly problem in new POS tagger from Parser
+    System.out.println(matchResult);
+    // was "[[ [DT-the NN-* VBG-flying NN-car ]], []]"
+    assertEquals(
+        "[[ [PRP-it ],  [DT-the NN-* NNS-* ]], [ [DT-the NN-* NNS-* ]]]",
+        matchResult.toString());
+    System.out.println(parseTreeChunk.listToString(matchResult));
+    assertEquals(
+        " np [ [PRP-it ],  [DT-the NN-* NNS-* ]] vp [ [DT-the NN-* NNS-* ]]",
+        parseTreeChunk.listToString(matchResult));
 
-	}
+    parserChunker2Matcher.close();
 
+  }
 
-	public void testMatchDigitalCamera() {
-		parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
-		List<List<ParseTreeChunk>> matchResult = parserChunker2Matcher.assessRelevance(       
-				"I am curious how to use the digital zoom of this camera for filming insects",
-		"How can I get short focus zoom lens for digital camera").getMatchResult();
+  public void testMatchDigitalCamera() {
+    parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
+    List<List<ParseTreeChunk>> matchResult = parserChunker2Matcher
+        .assessRelevance(
+            "I am curious how to use the digital zoom of this camera for filming insects",
+            "How can I get short focus zoom lens for digital camera")
+        .getMatchResult();
 
-		System.out.println(matchResult);
-		assertEquals("[[ [PRP-i ],  [NN-zoom NN-camera ],  [JJ-digital NN-* ],  [NN-* IN-for ],  [NN-camera ]], [ [JJ-digital NN-* ],  [NN-zoom NN-camera ],  [NN-* IN-for ]]]",
-				matchResult.toString());
-		System.out.println(parseTreeChunk.listToString(matchResult));
-		assertEquals(" np [ [PRP-i ],  [NN-zoom NN-camera ],  [JJ-digital NN-* ],  [NN-* IN-for ],  [NN-camera ]] vp [ [JJ-digital NN-* ],  [NN-zoom NN-camera ],  [NN-* IN-for ]]",
-				parseTreeChunk.listToString(matchResult));
-		parserChunker2Matcher.close();
-	}
-	
-	
-	public void testHighSimilarity() {
-		parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
-		List<List<ParseTreeChunk>>  matchResult = parserChunker2Matcher.assessRelevance(
-				"Can I get auto focus lens for digital camera",
-		"How can I get short focus zoom lens for digital camera").getMatchResult();
+    System.out.println(matchResult);
+    assertEquals(
+        "[[ [PRP-i ],  [NN-zoom NN-camera ],  [JJ-digital NN-* ],  [NN-* IN-for ],  [NN-camera ]], [ [JJ-digital NN-* ],  [NN-zoom NN-camera ],  [NN-* IN-for ]]]",
+        matchResult.toString());
+    System.out.println(parseTreeChunk.listToString(matchResult));
+    assertEquals(
+        " np [ [PRP-i ],  [NN-zoom NN-camera ],  [JJ-digital NN-* ],  [NN-* IN-for ],  [NN-camera ]] vp [ [JJ-digital NN-* ],  [NN-zoom NN-camera ],  [NN-* IN-for ]]",
+        parseTreeChunk.listToString(matchResult));
+    parserChunker2Matcher.close();
+  }
 
-		System.out.println(matchResult);
-		assertEquals( "[[ [PRP-i ],  [NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ],  [JJ-digital NN-camera ]], [ [VB-get NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ]]]",
-				matchResult.toString());
-		System.out.println(parseTreeChunk.listToString(matchResult));
-		assertEquals(" np [ [PRP-i ],  [NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ],  [JJ-digital NN-camera ]] vp [ [VB-get NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ]]",
-				parseTreeChunk.listToString(matchResult) );
-		parserChunker2Matcher.close();
-		}
-	
-	 public void testZClose(){
-		 ParserChunker2MatcherProcessor.getInstance().close();
-	 }
+  public void testHighSimilarity() {
+    parserChunker2Matcher = ParserChunker2MatcherProcessor.getInstance();
+    List<List<ParseTreeChunk>> matchResult = parserChunker2Matcher
+        .assessRelevance("Can I get auto focus lens for digital camera",
+            "How can I get short focus zoom lens for digital camera")
+        .getMatchResult();
+
+    System.out.println(matchResult);
+    assertEquals(
+        "[[ [PRP-i ],  [NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ],  [JJ-digital NN-camera ]], [ [VB-get NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ]]]",
+        matchResult.toString());
+    System.out.println(parseTreeChunk.listToString(matchResult));
+    assertEquals(
+        " np [ [PRP-i ],  [NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ],  [JJ-digital NN-camera ]] vp [ [VB-get NN-focus NNS-* NNS-lens IN-for JJ-digital NN-camera ]]",
+        parseTreeChunk.listToString(matchResult));
+    parserChunker2Matcher.close();
+  }
+
+  public void testZClose() {
+    ParserChunker2MatcherProcessor.getInstance().close();
+  }
 
 }

diff --git a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java
index da45a96..4ff1b67 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java

@@ -1,3 +1,20 @@
+/*

+

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

 package opennlp.tools.textsimilarity.chunker2matcher;

 

 import java.util.List;

@@ -8,95 +25,110 @@
 import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

 import opennlp.tools.textsimilarity.TextSimilarityBagOfWords;

 

-public class ParserChunker2MatcherProcessorTest extends TestCase{

-	private ParserChunker2MatcherProcessor parser;

-	private TextSimilarityBagOfWords parserBOW = new TextSimilarityBagOfWords ();

-	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+public class ParserChunker2MatcherProcessorTest extends TestCase {

+  private ParserChunker2MatcherProcessor parser;

+  private TextSimilarityBagOfWords parserBOW = new TextSimilarityBagOfWords();

+  private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

 

-	public void testGroupedPhrasesFormer(){

-		parser = ParserChunker2MatcherProcessor.getInstance();

-		String text = "Where do I apply? Go to your town office or city hall. If your town doesn't have an office, ask the town clerk or a Selectman. Tell them that you need a 1040 tax form . I Can 't Pay the Taxes on my House: What Can I Do?. Pine Tree Legal";

+  public void testGroupedPhrasesFormer() {

+    parser = ParserChunker2MatcherProcessor.getInstance();

+    String text = "Where do I apply? Go to your town office or city hall. If your town doesn't have an office, ask the town clerk or a Selectman. Tell them that you need a 1040 tax form . I Can 't Pay the Taxes on my House: What Can I Do?. Pine Tree Legal";

 

+    List<List<ParseTreeChunk>> res = parser

+        .formGroupedPhrasesFromChunksForPara(text);

+    System.out.println(res);

+    assertEquals(

+        "[[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your NN-town NN-doesn NN-t ], NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], NP [DT-a NNP-Selectman ], NP [PRP-them IN-that PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], NP [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP [NNP-Pine NNP-Tree NNP-Legal ]], [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do ]], [PP [TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], PP [IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ]], [], [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do ], SENTENCE [NNP-Pine NNP-Tree NNP-Legal ]]]",

+        // "[[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your NN-town NN-doesn NN-t ], NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], NP [DT-a NNP-Selectman ], NP [PRP-them IN-that PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], NP [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP [NNP-Pine NNP-Tree NNP-Legal ]], [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do NNP-Pine NNP-Tree NNP-Legal ]], [PP [TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], PP [IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ]], [], [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do NNP-Pine NNP-Tree NNP-Legal ]]]",

+        res.toString());

 

+    res = parser

+        .formGroupedPhrasesFromChunksForSentence("How can I get short focus zoom lens for digital camera");

+    assertEquals(

+        "[[NP [PRP-I ], NP [JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ], NP [JJ-digital NN-camera ]], [VP [VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]], [PP [IN-for JJ-digital NN-camera ]], [], [SENTENCE [WRB-How MD-can PRP-I VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]]]",

+        res.toString());

 

-		List<List<ParseTreeChunk>> res = parser.formGroupedPhrasesFromChunksForPara(text);

-		System.out.println(res);

-		assertEquals(

-			"[[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your NN-town NN-doesn NN-t ], NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], NP [DT-a NNP-Selectman ], NP [PRP-them IN-that PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], NP [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP [NNP-Pine NNP-Tree NNP-Legal ]], [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do ]], [PP [TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], PP [IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ]], [], [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do ], SENTENCE [NNP-Pine NNP-Tree NNP-Legal ]]]",

-				//	"[[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your NN-town NN-doesn NN-t ], NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], NP [DT-a NNP-Selectman ], NP [PRP-them IN-that PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], NP [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP [NNP-Pine NNP-Tree NNP-Legal ]], [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do NNP-Pine NNP-Tree NNP-Legal ]], [PP [TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], PP [IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ]], [], [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do NNP-Pine NNP-Tree NNP-Legal ]]]",

-				res.toString());

+    res = parser

+        .formGroupedPhrasesFromChunksForSentence("Its classy design and the Mercedes name make it a very cool vehicle to drive. ");

+    assertEquals(

+        "[[NP [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name ], NP [DT-the NNP-Mercedes NN-name ], NP [PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [NN-drive ]], [VP [VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]], [PP [TO-to NN-drive ]], [], [SENTENCE [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]]]",

+        res.toString());

+    res = parser

+        .formGroupedPhrasesFromChunksForSentence("Sounds too good to be true but it actually is, the world's first flying car is finally here. ");

+    assertEquals(

+        "[[NP [PRP-it RB-actually ], NP [DT-the NN-world NNS-s JJ-first NN-flying NN-car ]], [VP [VBZ-Sounds RB-too JJ-good ], VP [TO-to VB-be JJ-true CC-but PRP-it RB-actually ], VP [VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car ], VP [VBZ-is RB-finally RB-here ]], [], [ADJP [RB-too JJ-good ], ADJP [JJ-true CC-but PRP-it RB-actually ]], [SENTENCE [VBZ-Sounds RB-too JJ-good TO-to VB-be JJ-true CC-but PRP-it RB-actually VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car VBZ-is RB-finally RB-here ]]]",

+        res.toString());

+    res = parser

+        .formGroupedPhrasesFromChunksForSentence("UN Ambassador Ron Prosor repeated the Israeli position that the only way the Palestinians will get UN membership and statehood is through direct negotiations with the Israelis on a comprehensive peace agreement");

+    assertEquals(

+        "[[NP [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor ], NP [DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the NNPS-Palestinians ], NP [NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-a JJ-comprehensive NN-peace NN-agreement ]], [VP [VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], VP [MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [PP [IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], PP [IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [], [SENTENCE [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]]]",

+        res.toString());

+    parser.close();

+  }

 

-		res = parser.formGroupedPhrasesFromChunksForSentence("How can I get short focus zoom lens for digital camera");

-		assertEquals(

-				"[[NP [PRP-I ], NP [JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ], NP [JJ-digital NN-camera ]], [VP [VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]], [PP [IN-for JJ-digital NN-camera ]], [], [SENTENCE [WRB-How MD-can PRP-I VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]]]", 

-				res.toString());

+  public void testPrintParseTree() {

+    parser = ParserChunker2MatcherProcessor.getInstance();

+    try {

+      parser

+          .printParseTree("How can I get short focus zoom lens for digital camera");

+    } catch (Exception e) {

+      // when models does not read

+    }

+    parser.close();

+  }

 

-		res = parser.formGroupedPhrasesFromChunksForSentence("Its classy design and the Mercedes name make it a very cool vehicle to drive. ");

-		assertEquals(

-				"[[NP [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name ], NP [DT-the NNP-Mercedes NN-name ], NP [PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [NN-drive ]], [VP [VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]], [PP [TO-to NN-drive ]], [], [SENTENCE [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]]]",

-				res.toString());

-		res = parser.formGroupedPhrasesFromChunksForSentence("Sounds too good to be true but it actually is, the world's first flying car is finally here. ");

-		assertEquals(

-				"[[NP [PRP-it RB-actually ], NP [DT-the NN-world NNS-s JJ-first NN-flying NN-car ]], [VP [VBZ-Sounds RB-too JJ-good ], VP [TO-to VB-be JJ-true CC-but PRP-it RB-actually ], VP [VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car ], VP [VBZ-is RB-finally RB-here ]], [], [ADJP [RB-too JJ-good ], ADJP [JJ-true CC-but PRP-it RB-actually ]], [SENTENCE [VBZ-Sounds RB-too JJ-good TO-to VB-be JJ-true CC-but PRP-it RB-actually VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car VBZ-is RB-finally RB-here ]]]",

-				res.toString());

-		res = parser.formGroupedPhrasesFromChunksForSentence("UN Ambassador Ron Prosor repeated the Israeli position that the only way the Palestinians will get UN membership and statehood is through direct negotiations with the Israelis on a comprehensive peace agreement");

-		assertEquals(

-				"[[NP [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor ], NP [DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the NNPS-Palestinians ], NP [NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-a JJ-comprehensive NN-peace NN-agreement ]], [VP [VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], VP [MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [PP [IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], PP [IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [], [SENTENCE [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]]]",

-				res.toString());

-		parser.close();

-	}

+  public void testRelevanceAssessm() {

+    parser = ParserChunker2MatcherProcessor.getInstance();

+    String phrase1 = "Its classy design and the Mercedes name make it a very cool vehicle to drive. "

+        + "The engine makes it a powerful car. "

+        + "The strong engine gives it enough power. "

+        + "The strong engine gives the car a lot of power.";

+    String phrase2 = "This car has a great engine. "

+        + "This car has an amazingly good engine. "

+        + "This car provides you a very good mileage.";

 

-	public void testPrintParseTree(){

-		parser = ParserChunker2MatcherProcessor.getInstance();

-		try {

-			parser.printParseTree("How can I get short focus zoom lens for digital camera");

-		} catch (Exception e) {

-			// when models does not read

-		}

-		parser.close();

-	}

+    System.out.println(parser.assessRelevance(phrase1, phrase2)

+        .getMatchResult());

+    parser.close();

 

-	public void testRelevanceAssessm(){

-		parser = ParserChunker2MatcherProcessor.getInstance();

-		String phrase1 = "Its classy design and the Mercedes name make it a very cool vehicle to drive. "

-			+ "The engine makes it a powerful car. "

-			+ "The strong engine gives it enough power. "

-			+ "The strong engine gives the car a lot of power.";

-		String phrase2 = "This car has a great engine. "

-			+ "This car has an amazingly good engine. "

-			+ "This car provides you a very good mileage.";

+  }

 

-		System.out.println(parser.assessRelevance(phrase1, phrase2).getMatchResult());

-		parser.close();

+  public void testCompareRelevanceAssessmWithBagOfWords() {

+    parser = ParserChunker2MatcherProcessor.getInstance();

+    // we first demonstrate how similarity expression for DIFFERENT cases have

+    // too high score for bagOfWords

+    String phrase1 = "How to deduct rental expense from income ";

+    String phrase2 = "How to deduct repair expense from rental income.";

+    List<List<ParseTreeChunk>> matchResult = parser.assessRelevance(phrase1,

+        phrase2).getMatchResult();

+    assertEquals(

+        matchResult.toString(),

+        "[[ [NN-expense IN-from NN-income ],  [JJ-rental NN-* ],  [NN-income ]], [ [TO-to VB-deduct JJ-rental NN-* ],  [VB-deduct NN-expense IN-from NN-income ]]]");

+    System.out.println(matchResult);

+    double matchScore = parseTreeChunkListScorer

+        .getParseTreeChunkListScore(matchResult);

+    double bagOfWordsScore = parserBOW.assessRelevanceAndGetScore(phrase1,

+        phrase2);

+    assertTrue(matchScore + 2 < bagOfWordsScore);

+    System.out.println("MatchScore is adequate ( = " + matchScore

+        + ") and bagOfWordsScore = " + bagOfWordsScore + " is too high");

 

-	}

+    // we now demonstrate how similarity can be captured by POS and cannot be

+    // captured by bagOfWords

+    phrase1 = "Way to minimize medical expense for my daughter";

+    phrase2 = "Means to deduct educational expense for my son";

+    matchResult = parser.assessRelevance(phrase1, phrase2).getMatchResult();

+    assertEquals(

+        matchResult.toString(),

+        "[[ [JJ-* NN-expense IN-for PRP$-my NN-* ],  [PRP$-my NN-* ]], [ [TO-to VB-* JJ-* NN-expense IN-for PRP$-my NN-* ]]]");

+    System.out.println(matchResult);

+    matchScore = parseTreeChunkListScorer

+        .getParseTreeChunkListScore(matchResult);

+    bagOfWordsScore = parserBOW.assessRelevanceAndGetScore(phrase1, phrase2);

+    assertTrue(matchScore > 2 * bagOfWordsScore);

+    System.out.println("MatchScore is adequate ( = " + matchScore

+        + ") and bagOfWordsScore = " + bagOfWordsScore + " is too low");

+    parser.close();

 

-	public void testCompareRelevanceAssessmWithBagOfWords(){

-		parser = ParserChunker2MatcherProcessor.getInstance();

-		// we first demonstrate how similarity expression for DIFFERENT cases have too high score for bagOfWords

-		String phrase1 = "How to deduct rental expense from income ";

-		String phrase2 = "How to deduct repair expense from rental income.";

-		List<List<ParseTreeChunk>> matchResult  = parser.assessRelevance(phrase1, phrase2).getMatchResult();

-		assertEquals(matchResult.toString(), 

-				"[[ [NN-expense IN-from NN-income ],  [JJ-rental NN-* ],  [NN-income ]], [ [TO-to VB-deduct JJ-rental NN-* ],  [VB-deduct NN-expense IN-from NN-income ]]]"); 

-		System.out.println(matchResult);

-		double matchScore = parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);

-		double bagOfWordsScore = parserBOW.assessRelevanceAndGetScore(phrase1, phrase2);

-		assertTrue(matchScore+2 < bagOfWordsScore);

-		System.out.println("MatchScore is adequate ( = "+matchScore + ") and bagOfWordsScore = "+bagOfWordsScore+" is too high");

-

-		// we now demonstrate how similarity can be captured by POS and cannot be captured by bagOfWords

-		phrase1 = "Way to minimize medical expense for my daughter";

-		phrase2 = "Means to deduct educational expense for my son";

-		matchResult  = parser.assessRelevance(phrase1, phrase2).getMatchResult();

-		assertEquals(matchResult.toString(), 

-			"[[ [JJ-* NN-expense IN-for PRP$-my NN-* ],  [PRP$-my NN-* ]], [ [TO-to VB-* JJ-* NN-expense IN-for PRP$-my NN-* ]]]"); 

-		System.out.println(matchResult);

-		matchScore = parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);

-		bagOfWordsScore = parserBOW.assessRelevanceAndGetScore(phrase1, phrase2);

-		assertTrue(matchScore > 2*bagOfWordsScore);

-		System.out.println("MatchScore is adequate ( = "+matchScore + ") and bagOfWordsScore = "+bagOfWordsScore+" is too low");

-		parser.close();

-

-	}

+  }

 }


diff --git a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java
index bf0d963..62b2cf8 100644
--- a/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java
+++ b/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java

@@ -1,30 +1,49 @@
+/*

+

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

 package opennlp.tools.textsimilarity.chunker2matcher;

 

 import java.util.List;

 

 import junit.framework.TestCase;

 

-public class PhraseNodeTest extends TestCase{

-	ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor.getInstance();

-    public void testPOSTagsExtraction(){

-    	

-    	SentenceNode node  = proc.parseSentenceNode("How can I get there");

-    	

-		try {

-			List<String> pOSlist = node.getOrderedPOSList();

-			assertEquals("[WRB, MD, PRP, VB, RB]", pOSlist.toString());

-			

-			node  = proc.parseSentenceNode("where do I apply");

-			pOSlist = node.getOrderedPOSList();

-			assertEquals("[WRB, VBP, PRP, RB]", pOSlist.toString());

-			

-			// should NOT start with upper case! last tag is missing

-			node  = proc.parseSentenceNode("Where do I apply");

-			pOSlist = node.getOrderedPOSList();

-			assertEquals("[WRB, VBP, PRP]", pOSlist.toString());

-		} catch (Exception e) { // for run without models, where init fails

-			assertEquals(node, null);

-		}

+public class PhraseNodeTest extends TestCase {

+  ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor

+      .getInstance();

+

+  public void testPOSTagsExtraction() {

+

+    SentenceNode node = proc.parseSentenceNode("How can I get there");

+

+    try {

+      List<String> pOSlist = node.getOrderedPOSList();

+      assertEquals("[WRB, MD, PRP, VB, RB]", pOSlist.toString());

+

+      node = proc.parseSentenceNode("where do I apply");

+      pOSlist = node.getOrderedPOSList();

+      assertEquals("[WRB, VBP, PRP, RB]", pOSlist.toString());

+

+      // should NOT start with upper case! last tag is missing

+      node = proc.parseSentenceNode("Where do I apply");

+      pOSlist = node.getOrderedPOSList();

+      assertEquals("[WRB, VBP, PRP]", pOSlist.toString());

+    } catch (Exception e) { // for run without models, where init fails

+      assertEquals(node, null);

     }

-    	

+  }

+

 }
commit	20d048b22fbb3f7ad4dbc84e653afb0a60357842	[log] [tgz]
author	Boris Galitsky <bgalitsky@apache.org>	Thu Apr 05 23:43:57 2012 +0000
committer	Boris Galitsky <bgalitsky@apache.org>	Thu Apr 05 23:43:57 2012 +0000
tree	0fd1080a563ca2c58a8843971c2475c8692914fa
parent	b1ad93ded003a2b487cf60ab04eedec7c47d8261 [diff]