OPENNLP-628
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
new file mode 100644
index 0000000..b712847
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
@@ -0,0 +1,54 @@
+package opennlp.tools.apps.contentgen.multithreaded;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+

+public class BingWebQueryRunnerThread extends BingQueryRunner implements Runnable{

+	

+	private String query;

+	private List<HitBase> results= new ArrayList<HitBase>();

+	public BingWebQueryRunnerThread(String Query){

+		super();

+		this.query=Query;

+	}

+	public void run(){

+		results=runSearch(query);

+		fireMyEvent(new MyEvent(this));

+	}

+	public List<HitBase> getResults() {

+		return results;

+	}

+	

+	public String getQuery() {

+		return query;

+	}

+	

+	// Create the listener list

+    protected javax.swing.event.EventListenerList listenerList = new javax.swing.event.EventListenerList();

+    // This methods allows classes to register for MyEvents 

+

+    public void addMyEventListener(MyEventListener listener) {

+        listenerList.add(MyEventListener.class, listener);

+    }

+    // This methods allows classes to unregister for MyEvents

+

+    public void removeMyEventListener(MyEventListener listener) {

+        listenerList.remove(MyEventListener.class, listener);

+    }

+

+    void fireMyEvent(MyEvent evt) {

+        Object[] listeners = listenerList.getListenerList();

+        // Each listener occupies two elements - the first is the listener class

+        // and the second is the listener instance

+        for (int i = 0; i < listeners.length; i += 2) {

+            if (listeners[i] == MyEventListener.class) {

+                ((MyEventListener) listeners[i + 1]).MyEvent(evt);

+            }

+        }

+    }

+	

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
new file mode 100644
index 0000000..328d95c
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
@@ -0,0 +1,88 @@
+package opennlp.tools.apps.contentgen.multithreaded;

+

+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

+

+

+public class Fragment {

+	

+		public String resultText;      // result

+		public double score;

+		public String fragment; // original

+		public String sourceURL;

+

+		Fragment(String text, double score) {

+			this.resultText = text;

+			this.score = score;

+		}

+		

+			

+		public String getResultText() {

+			return resultText;

+		}

+

+		public void setResultText(String resultText) {

+			this.resultText = resultText;

+		}

+

+

+

+		public double getScore() {

+			return score;

+		}

+

+

+

+		public void setScore(double score) {

+			this.score = score;

+		}

+

+

+

+		public String getFragment() {

+			return fragment;

+		}

+

+

+

+		public void setFragment(String fragment) {

+			this.fragment = fragment;

+		}

+

+		

+

+		public String getSourceURL() {

+			return sourceURL;

+		}

+

+

+		public void setSourceURL(String sourceURL) {

+			this.sourceURL = sourceURL;

+		}

+

+

+		public String toString(){

+			return this.resultText;

+		}

+

+		@Override

+		public boolean equals(Object o) {

+			if (this == o) return true;

+			if (o == null || getClass() != o.getClass()) return false;

+

+			Fragment fragment = (Fragment) o;

+

+			if (resultText == null && fragment.resultText == null) {

+				return true;

+			} else if ((resultText == null && fragment.resultText != null) || (resultText != null && fragment.resultText == null)) {

+				return false;

+			}

+

+			StringDistanceMeasurer sdm = new StringDistanceMeasurer();

+			return sdm.measureStringDistance(resultText, fragment.resultText) > 0.8;

+		}

+

+		@Override

+		public int hashCode() {

+			return resultText != null ? resultText.hashCode() : 0;

+		}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
new file mode 100644
index 0000000..14e7daa
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
@@ -0,0 +1,12 @@
+package opennlp.tools.apps.contentgen.multithreaded;

+

+import java.util.EventObject;

+

+public class MyEvent extends EventObject {

+

+	public MyEvent(Object arg0) {

+		super(arg0);

+		// TODO Auto-generated constructor stub

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
new file mode 100644
index 0000000..ecdced4
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
@@ -0,0 +1,8 @@
+package opennlp.tools.apps.contentgen.multithreaded;

+

+import java.util.EventListener;

+

+

+public interface MyEventListener extends EventListener{

+	public void MyEvent(MyEvent evt);

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
new file mode 100644
index 0000000..1c5dfb2
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
@@ -0,0 +1,37 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"

+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">

+ 

+<html xmlns='http://www.w3.org/1999/xhtml'>

+   <head >

+      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>

+      <title >Submit Your Essay Writing request here</title>

+   </head>

+<body>

+<h1>Submit Your Essay Writing request here / Envie su solicitud ensayo escrito aqui</h1>

+ 

+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/contentgen/?resourceDir=/home/solr/solr-4.4.0/example/src/test/resources&workDir=/home/solr/solr-4.4.0/example/solr-webapp/webapp/WEB-INF/lib&relevanceThreshold=0.5&bingKey=e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=' >

+   <p>

+   Topic for your essay/Tema de su ensayo: <input type='text' name='q' value='albert einstein' size='35' maxlength='100'/>

+   </p>

+   <p>

+   Email to receive your essay/para recibir su ensayo: <input type='text' name='email' />

+   </p>

+   

+   <p>

+   Select language/seleccionar el idioma: <select name="lang" >

+   		<option value="en-US"> English</option>

+ 		<option value="es-US"> Espaniol</option>

+ 		<option value="de-DE"> German</option>

+	</select>

+	</p>

+	<p>

+   Number of Bing calls to write a this essay: <input type='text' name='stepsNum' value='20' size='5' maxlength='10'/>

+   Number of Bing search results for each call to use for writing: <input type='text' name='searchResultsNum' value='100' size='5' maxlength='10'/>

+   </p>

+<p>

+   <input type='submit' name='Submit' value='Submit/presentar' />

+   </p>

+</form>

+ 

+</body>

+</html>

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
new file mode 100644
index 0000000..2fbf1c9
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
@@ -0,0 +1,47 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"

+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">

+ 

+<html xmlns='http://www.w3.org/1999/xhtml'>

+   <head >

+      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>

+      <title >Submit Your Code Writing request here</title>

+   </head>

+<body>

+<h1>Submit Your Code Writing request here</h1>

+ 

+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/nlprog2code/?' >

+   <p>

+   Write what you want your program to do in natural language <input type='text' name='line' value='define a class named ...' size='35' maxlength='120'/>

+   </p>

+    <p>

+    <input type='text' name='line' value='define a function taking a string s1 and an integer i2 ' size='35' maxlength='150'/>

+   </p>

+   <p>

+     <input type='text' name='line' size='35' maxlength='200'/>

+   </p>

+   <p>

+    <input type='text' name='line' size='35' maxlength='200'/>

+   </p>

+   <p>

+    <input type='text' name='line' size='35' maxlength='200'/>

+   </p>

+   <p>

+     <input type='text' name='line' size='35' maxlength='200'/>

+   </p>

+   <p>

+    <input type='text' name='line' size='35' maxlength='200'/>

+   </p>

+   <p>

+    <input type='text' name='line' size='35' maxlength='200'/>

+   </p>

+   <p>

+    <input type='text' name='line' size='35' maxlength='200'/>

+   </p>

+   

+<p>

+   <input type='submit' name='Submit' value='Submit' />

+   </p>

+</form>

+ 

+</body>

+</html>

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
new file mode 100644
index 0000000..45dadf9
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
@@ -0,0 +1,74 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+public interface POStags {
+	// added new POS types for infinitive phrase and participle phrase
+	public static final String TYPE_STP = "STP"; // infinitive phrase
+	public static final String TYPE_SGP = "SGP"; // present participle phrase
+	public static final String TYPE_SNP = "SNP"; // past participle phrase
+
+	// below are the standard POS types,
+	// http://bulba.sdsu.edu/jeanette/thesis/PennTags.html
+	public static final String TYPE_ADJP = "ADJP";
+	public static final String TYPE_ADVP = "ADVP";
+	public static final String TYPE_CC = "CC";
+	public static final String TYPE_CD = "CD";
+	public static final String TYPE_CONJP = "CONJP";
+	public static final String TYPE_DT = "DT";
+	public static final String TYPE_EX = "EX";
+	public static final String TYPE_FRAG = "FRAG";
+	public static final String TYPE_FW = "FW";
+	public static final String TYPE_IN = "IN";
+	public static final String TYPE_INTJ = "INTJ";
+	public static final String TYPE_JJ = "JJ";
+	public static final String TYPE_JJR = "JJR";
+	public static final String TYPE_JJS = "JJS";
+	public static final String TYPE_LS = "LS";
+	public static final String TYPE_LST = "LST";
+	public static final String TYPE_MD = "MD";
+	public static final String TYPE_NAC = "NAC";
+	public static final String TYPE_NN = "NN";
+	public static final String TYPE_NNS = "NNS";
+	public static final String TYPE_NNP = "NNP";
+	public static final String TYPE_NNPS = "NNPS";
+	public static final String TYPE_NP = "NP";
+	public static final String TYPE_NX = "NX";
+	public static final String TYPE_PDT = "PDT";
+	public static final String TYPE_POS = "POS";
+	public static final String TYPE_PP = "PP";
+	public static final String TYPE_PRN = "PRN";
+	public static final String TYPE_PRP = "PRP";
+	public static final String TYPE_PRP$ = "PRP$";
+	public static final String TYPE_PRT = "PRT";
+	public static final String TYPE_QP = "QP";
+	public static final String TYPE_RB = "RB";
+	public static final String TYPE_RBR = "RBR";
+	public static final String TYPE_RBS = "RBS";
+	public static final String TYPE_RP = "RP";
+	public static final String TYPE_RRC = "RRC";
+	public static final String TYPE_S = "S";
+	public static final String TYPE_SBAR = "SBAR";
+	public static final String TYPE_SBARQ = "SBARQ";
+	public static final String TYPE_SINV = "SINV";
+	public static final String TYPE_SQ = "SQ";
+	public static final String TYPE_SYM = "SYM";
+	public static final String TYPE_TO = "TO";
+	public static final String TYPE_TOP = "TOP";
+	public static final String TYPE_UCP = "UCP";
+	public static final String TYPE_UH = "UH";
+	public static final String TYPE_VB = "VB";
+	public static final String TYPE_VBD = "VBD";
+	public static final String TYPE_VBG = "VBG";
+	public static final String TYPE_VBN = "VBN";
+	public static final String TYPE_VBP = "VBP";
+	public static final String TYPE_VBZ = "VBZ";
+	public static final String TYPE_VP = "VP";
+	public static final String TYPE_WDT = "WDT";
+	public static final String TYPE_WHADJP = "WHADJP";
+	public static final String TYPE_WHADVP = "WHADVP";
+	public static final String TYPE_WHNP = "WHNP";
+	public static final String TYPE_WHPP = "WHPP";
+	public static final String TYPE_WP = "WP";
+	public static final String TYPE_WP$ = "WP$";
+	public static final String TYPE_WRB = "WRB";
+	public static final String TYPE_X = "X";
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
new file mode 100644
index 0000000..ae2772b
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
@@ -0,0 +1,215 @@
+package opennlp.tools.apps.relevanceVocabs;

+

+import java.util.ArrayList;

+import java.util.Comparator;

+import java.util.List;

+

+import org.apache.commons.lang.StringUtils;

+

+import opennlp.tools.parser.Parse;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.TextProcessor;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+import opennlp.tools.util.Span;

+

+public class PhraseProcessor {

+	

+	private ParserChunker2MatcherProcessor nlProc = ParserChunker2MatcherProcessor.getInstance() ;

+	

+	public static boolean allChildNodesArePOSTags(Parse p)

+	{

+		Parse[] subParses = p.getChildren();

+		for (int pi = 0; pi < subParses.length; pi++)

+			if (!((Parse) subParses[pi]).isPosTag())

+				return false;

+		return true;

+	}

+	

+	public ArrayList<String> getNounPhrases(Parse p)

+	{

+		ArrayList<String> nounphrases = new ArrayList<String>();

+

+		Parse[] subparses = p.getChildren();

+		for (int pi = 0; pi < subparses.length; pi++)

+		{

+

+			if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi]))

+			{

+				Span _span = subparses[pi].getSpan();

+				nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));

+			}

+			else if (!((Parse) subparses[pi]).isPosTag())

+				nounphrases.addAll(getNounPhrases(subparses[pi]));

+		}

+

+		return nounphrases;

+	}

+	

+	public ArrayList<String> getVerbPhrases(Parse p)

+	{

+		ArrayList<String> verbPhrases = new ArrayList<String>();

+

+		Parse[] subparses = p.getChildren();

+		for (int pi = 0; pi < subparses.length; pi++)

+		{

+

+			if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi]))

+			{

+				Span _span = subparses[pi].getSpan();

+				verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));

+			}

+			else if (!((Parse) subparses[pi]).isPosTag())

+				verbPhrases.addAll(getNounPhrases(subparses[pi]));

+		}

+

+		return verbPhrases;

+	}

+	

+	// forms phrases from text which are candidate expressions for events lookup

+			public List<ParseTreeChunk> getVerbPhrases(String sentence) {

+				if (sentence==null)

+					return null;

+				if (sentence.split(" ").length ==1) { // this is a word, return empty

+					//queryArrayStr.add( sentence);

+					return null;

+				}

+				if (sentence.length()>100)

+					return null ; // too long of a sentence to parse

+				

+				System.out.println("About to parse: "+sentence);

+				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 

+				if (groupedChunks.size()<1)

+					return null;

+

+				List<ParseTreeChunk> vPhrases = groupedChunks.get(1);

+				

+				return vPhrases;

+			}

+

+			public List<List<ParseTreeChunk>> getPhrasesOfAllTypes(String sentence) {

+				if (sentence==null)

+					return null;

+				if (sentence.split(" ").length ==1) { // this is a word, return empty

+					//queryArrayStr.add( sentence);

+					return null;

+				}

+				if (sentence.length()>200)

+					return null ; // too long of a sentence to parse

+				

+				System.out.println("About to parse: "+sentence);

+				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 

+				if (groupedChunks.size()<1)

+					return null;

+

+				return groupedChunks;

+			}

+	

+	// forms phrases from text which are candidate expressions for events lookup

+		public List<String> extractNounPhraseProductNameCandidate(String sentence) {

+			

+			List<String> queryArrayStr = new ArrayList<String>();

+			

+			if (sentence.split(" ").length ==1) { // this is a word, return empty

+				//queryArrayStr.add( sentence);

+				return queryArrayStr;

+			}

+			String quoted1 = StringUtils.substringBetween(sentence, "\"", "\"");

+			String quoted2 = StringUtils.substringBetween(sentence, "\'", "\'");

+			List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 

+			if (groupedChunks.size()<1)

+				return queryArrayStr;

+

+			List<ParseTreeChunk> nPhrases = groupedChunks.get(0);

+

+			for (ParseTreeChunk ch : nPhrases) {

+				String query = "";

+				int size = ch.getLemmas().size();

+				boolean phraseBeingFormed = false;

+				for (int i = 0; i < size; i++) {

+					if ((ch.getPOSs().get(i).startsWith("N") || ch.getPOSs().get(i)

+							.startsWith("J") || ch.getPOSs().get(i).startsWith("CD") ) )

+					//		&& StringUtils.isAlpha(ch.getLemmas().get(i)))

+					{

+						query += ch.getLemmas().get(i) + " ";

+						phraseBeingFormed = true;

+					} else 

+						if ((ch.getPOSs().get(i).startsWith("PR") || ch.getPOSs().get(i).startsWith("IN") || ch.getPOSs().get(i).startsWith("TO")  ) 

+								&& phraseBeingFormed )

+							break;

+						else if (ch.getPOSs().get(i).startsWith("DT") || ch.getPOSs().get(i).startsWith("CC"))

+						continue;

+				}

+				query = query.trim();

+				int len = query.split(" ").length;

+				if (len > 5 || len < 2) // too long or too short

+					continue;

+				

+	/*				

+				if (len < 4 && len>1) { // every word should start with capital

+					String[] qs = query.split(" ");

+					boolean bAccept = true;

+					for (String w : qs) {

+						if (w.toLowerCase().equals(w)) // idf only two words then

+														// has to be person name,

+														// title or geo

+														// location

+							bAccept = false;

+					}

+					if (!bAccept)

+						continue;

+				}

+		*/		

+				 // individual word, possibly a frequent word

+				// if len==1 do nothing

+

+				query = query.trim();

+				queryArrayStr.add(query);

+

+			}

+	/*		

+			if (queryArrayStr.size() < 1) { // release constraints on NP down to 2

+											// keywords

+				for (ParseTreeChunk ch : nPhrases) {

+					String query = "";

+					int size = ch.getLemmas().size();

+

+					for (int i = 0; i < size; i++) {

+						if (ch.getPOSs().get(i).startsWith("N")

+								|| ch.getPOSs().get(i).startsWith("J")) {

+							query += ch.getLemmas().get(i) + " ";

+						}

+					}

+					query = query.trim();

+					int len = query.split(" ").length;

+					if (len < 2)

+						continue;

+

+					query = TextProcessor.fastTokenize(query.toLowerCase(), false)

+							.toString().replace('[', ' ').replace(']', ' ').trim();

+					if (query.length() > 6)

+						queryArrayStr.add(query);

+				}

+			}

+			//queryArrayStr = Utils

+			//		.removeDuplicatesFromQueries(queryArrayStr);

+			if (quoted1 != null

+					&& ((quoted1.length() > 5 && !stopList.isCommonWord(quoted1)) || quoted1

+							.length() > 10))

+				queryArrayStr.add(quoted1);

+			if (quoted2 != null

+					&& ((quoted2.length() > 5 && !stopList.isCommonWord(quoted2)) || quoted2

+							.length() > 10))

+				queryArrayStr.add(quoted2);

+		*/	return queryArrayStr;

+		}

+		

+

+	

+		

+		public static void main(String[] args){

+			String sent = "Appliances and Kitchen Gadgets - CNET Blogs";

+					//"The tablet phenomenon turns Silicon Valley upside down - SiliconValley.com";

+			List<String> res = new PhraseProcessor().extractNounPhraseProductNameCandidate(sent);

+			System.out.println(res);

+		}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
new file mode 100644
index 0000000..150b3df
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
@@ -0,0 +1,199 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+
+public class SentimentVocab {
+	private static final String[] POSITIVE_ADJECTTIVE_LIST = { "accessible",
+			"advanced", "affordable", "amazing", "awesome", "beautiful",
+			"brilliant", "capable", "classic", "clear", "comfortable",
+			"convenient", "cool", "courteous", "cute", "decent", "delight",
+			"easy", "elegant", "enjoyable", "enough", "excellent",
+			"exceptional", "fabulous", "fancy", "fantastic", "fast",
+			"favorable", "fine", "friendly", "fun", "good", "great", "handy",
+			"happy", "hefty", "helpful", "high", "immaculate", "impressive",
+			"incredible", "interesting", "jealous", "lovely", "lucky",
+			"luxurious", "marvelous", "maximum", "memorable", "neat", "nice",
+			"outstanding", "perfect", "pleasant", "positive", "pretty",
+			"powerful", "quiet", "reasonable", "remarkable", "right", "safe",
+			"silky", "sleek", "slick", "stylish", "suitable", "superb",
+			"tasteful", "terrific", "top", "unbelievable", "useful",
+			"welcoming", "wonderful", "worthwhile" };
+
+	private static final String[] NEGATIVE_ADJECTTIVE_LIST = { "angry",
+			"annoyed", "annoying", "anxious", "arrogant", "ashamed", "awful",
+			"bad", "bored", "boring", "broke", "broken", "clumsy",
+			"complicate", "complicated", "confused", "cranky", "crazy",
+			"cumbersome", "defective", "depressed", "dead", "depressing",
+			"difficult", "dirty", "disappointed", "disappointing", "disgusted",
+			"disgusting", "disheartened", "disheartening", "dissatisfactory",
+			"dissatisfying", "distant", "disturbed", "dizzy", "doubtful",
+			"down", "drab", "dull", "dysfunctional", "embarrassed", "evil",
+			"exhausted", "fatal", "filthy", "flawed", "fragile", "frightened",
+			"frustrating", "goofy", "grieving", "hard", "horrific",
+			"horrifying", "harsh", "horrible", "impossible", "inconvenient",
+			"insane", "lack", "lacking", "lazy", "leaking", "leaky", "lonely",
+			"low", "mediocre", "messy", "mysterious", "nasty", "naughty",
+			"negative", "noisy", "nonclean", "nutty", "outdated", "outrageous",
+			"over priced", "pathetic", "poor", "premature", "pricey", "pricy",
+			"problematic", "putrid", "puzzled", "rickety", "ridiculous",
+			"ripped off", "rugged", "slow", "stinky", "strange", "stupid",
+			"sweaty", "tedious", "terrible", "tired", "tough", "toxic",
+			"trubled", "ugly", "unbearable", "unclean", "uncomfortable",
+			"unfortunate", "unhelpful", "uninviting", "unpleasent",
+			"unsanitary", "upseting", "unusable", "weird", "worn", "worn down",
+			"wretched", "wrong" };
+
+	private static final String[] POSITIVE_ADVERB_LIST = { "absolutely",
+			"amazingly", "completely", "definitely", "easily", "fairly",
+			"highly", "immensely", "incredibly", "nicely", "really", "rich",
+			"simply", "surprisingly", "tastefully", "totally", "truly", "very",
+			"well" };
+
+	private static final String[] NEGATIVE_ADVERB_LIST = { "badly",
+			"deceptfully", "down", "horribly", "oddly", "pathetically",
+			"terribly", "too", "unfortunately" };
+
+	private static final String[] POSITIVE_NOUN_LIST = { "ability", "benefit",
+			"character", "charm", "comfort", "discount", "dream", "elegance",
+			"favourite", "feature", "improvement", "luck", "luxury", "offer",
+			"pro", "quality", "requirement", "usability" };
+
+	private static final String[] NEGATIVE_NOUN_LIST = { "blocker",
+			"challenge", "complain", "complaint", "compromise", "con",
+			"concern", "crap", "disappointment", "disillusion", "doubt",
+			"downside", "drawback", "embarrassment", "error", "failure",
+			"fault", "garbage", "glitch", "inability", "issue", "junk",
+			"long line", "malfunction", "mess", "mistake", "nightmare",
+			"noise", "odor", "pain", "pitfall", "problem", "rip off", "roach",
+			"rude", "sacrifice", "shame", "shock", "stain", "threat",
+			"trouble", "urine", "worry" };
+
+	private static final String[] POSITIVE_VERB_LIST = { "admire", "amaze",
+			"assist", "disgust", "enjoy", "help", "guarantee", "impress",
+			"improve", "like", "love", "patronize", "prefer", "recommend",
+			"want" };
+
+	private static final String[] NEGATIVE_VERB_LIST = { "annoy", "appall",
+			"break", "complain", "confuse", "depress", "disappoint",
+			"dishearten", "dislike", "dissatisfy", "embarrass", "fail", "fear",
+			"flaw", "frustrate", "hate", "ruin", "scare", "stink", "suck",
+			"think twice", "thwart", "upset", "vomit" };
+
+	public static final int SENTIMENT_POSITIVE = 1;
+	public static final int SENTIMENT_UNKNOWN = 0;
+	public static final int SENTIMENT_NEGATIVE = -1;
+
+	private static SentimentVocab instance = new SentimentVocab();
+
+	// complete sentiment word map, key = word, value = sentiment object
+	private Map<String, Sentiment> sentimentMap = new HashMap<String, Sentiment>();
+
+	// sentiment word sets, key = POS type, value = word set
+	private Map<String, HashSet<String>> wordSetMap = new HashMap<String, HashSet<String>>();
+
+	public static class Sentiment {
+		public String posType;
+		public int sentimentType;
+
+		Sentiment(String posType, int sentimentType) {
+			this.posType = posType;
+			this.sentimentType = sentimentType;
+		}
+	}
+
+	public static SentimentVocab getInstance() {
+		return instance;
+	}
+
+	public Sentiment getSentiment(String word) {
+		if (word == null)
+			return null;
+
+		// get the normalized form of the word
+		//word = WordDictionary.getInstance().getLemmaOrWord(word);
+
+		return sentimentMap.get(word);
+	}
+
+	public Sentiment getSentiment(String word, String posType) {
+		if (word == null)
+			return null;
+
+		// get the normalized form of the word
+		word = WordDictionary.getInstance().getLemmaOrWord(word, posType);
+
+		return sentimentMap.get(word);
+	}
+
+	public boolean isSentimentWord(String word) {
+		return (getSentiment(word) != null);
+	}
+
+	public boolean isSentimentWord(String word, String posType) {
+		Sentiment sentiment = getSentiment(word, posType);
+		if (sentiment == null)
+			return false;
+
+		return sentiment.posType == posType;
+	}
+
+	public HashSet<String> getSentimentWordSet(String posType) {
+		if (posType == null)
+			return null;
+
+		return wordSetMap.get(posType);
+	}
+
+	public static String getSentimentName(int sentimentType) {
+		switch (sentimentType) {
+		case SENTIMENT_POSITIVE:
+			return "positive";
+		case SENTIMENT_NEGATIVE:
+			return "negative";
+		default:
+			return "unknown";
+		}
+	}
+
+	private SentimentVocab() {
+		// populate the sentiment map
+		addWordsToSentimentMap(POSITIVE_ADJECTTIVE_LIST,
+				POStags.TYPE_JJ, SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_ADJECTTIVE_LIST,
+				POStags.TYPE_JJ, SENTIMENT_NEGATIVE);
+		addWordsToSentimentMap(POSITIVE_ADVERB_LIST, POStags.TYPE_RB,
+				SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_ADVERB_LIST, POStags.TYPE_RB,
+				SENTIMENT_NEGATIVE);
+		addWordsToSentimentMap(POSITIVE_NOUN_LIST, POStags.TYPE_NN,
+				SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_NOUN_LIST, POStags.TYPE_NN,
+				SENTIMENT_NEGATIVE);
+		addWordsToSentimentMap(POSITIVE_VERB_LIST, POStags.TYPE_VB,
+				SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_VERB_LIST, POStags.TYPE_VB,
+				SENTIMENT_NEGATIVE);
+	}
+
+	private void addWordsToSentimentMap(String[] words, String posType,
+			int sentimentType) {
+
+		// add the word to the complete sentiment word map
+		for (String word : words) {
+			sentimentMap.put(word, new Sentiment(posType, sentimentType));
+		}
+
+		// add the word to the corresponding sentiment word set
+		HashSet<String> wordSet = wordSetMap.get(posType);
+		if (wordSet == null) {
+			wordSet = new HashSet<String>();
+			wordSetMap.put(posType, wordSet);
+		}
+		for (String word : words) {
+			wordSet.add(word);
+		}
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
new file mode 100644
index 0000000..7c12c9a
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
@@ -0,0 +1,88 @@
+package opennlp.tools.apps.relevanceVocabs;

+

+import java.io.BufferedReader;

+import java.io.File;

+import java.io.FileInputStream;

+import java.io.FileNotFoundException;

+import java.io.FileOutputStream;

+import java.io.FileReader;

+import java.io.IOException;

+import java.io.InputStreamReader;

+import java.io.ObjectInputStream;

+import java.io.ObjectOutputStream;

+import java.io.Serializable;

+import java.net.URL;

+import java.net.URLConnection;

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+

+import org.slf4j.Logger;

+import org.slf4j.LoggerFactory;

+

+

+

+public class SynonymListFilter {

+	SynonymMap map=null;

+	

+	public SynonymListFilter(String dir){

+		dir = dir.replace("maps/analytics","");

+		try {

+			map = new SynonymMap( new FileInputStream(dir+"wn_s.pl"));

+		} catch (FileNotFoundException e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		} catch (IOException e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+	}

+	

+	protected static Map<String, List<String>> filteredKeyword_synonyms = new HashMap<String, List<String>>();

+

+	static public List<String> getFileLines(File aFile) {

+

+		List<String> items = new ArrayList<String>();

+

+		StringBuilder contents = new StringBuilder();		    

+		try {

+

+			BufferedReader input =  new BufferedReader(new FileReader(aFile));

+			try {

+				String line = null; //not declared within while loop

+				while (( line = input.readLine()) != null){

+					int endOfWord = line.indexOf(';');

+					if (endOfWord>2)

+						line = line.substring(1, endOfWord -1 );

+

+					items.add(line);

+

+				}

+			}

+			finally {

+				input.close();

+			}

+		}

+		catch (IOException ex){

+			ex.printStackTrace();

+		}

+

+		return items;

+	}

+	public String getSynonym (String word){

+			String[] synonyms = map.getSynonyms(word);

+			if (synonyms==null || synonyms.length<1)

+				return null;

+			int index = (int) Math.floor(Math.random()*(double)synonyms.length);

+			System.out.println("Found synonyms "+Arrays.asList(synonyms).toString()+ " | selected synonym = "+synonyms[index] +" | for the input = "+ word);

+			return synonyms[index];

+			

+	}	

+	public static void main(String[] args){

+		SynonymListFilter filter = new  SynonymListFilter("/src/test/resources");

+		String syn = filter.getSynonym("bring");

+		syn = filter.getSynonym("yell");

+	}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
new file mode 100644
index 0000000..804fc2b
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
@@ -0,0 +1,379 @@
+package opennlp.tools.apps.relevanceVocabs;

+

+import java.io.IOException;

+  import java.io.InputStream;

+   import java.nio.ByteBuffer;

+   import java.nio.charset.Charset;

+   import java.util.ArrayList;

+   import java.util.Arrays;

+   import java.util.HashMap;

+   import java.util.Iterator;

+   import java.util.Map;

+   import java.util.TreeMap;

+   import java.util.TreeSet;

+   

+   /**

+    * Loads the <a target="_blank" 

+    * href="http://www.cogsci.princeton.edu/~wn/">WordNet </a> prolog file <a

+    * href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">wn_s.pl </a>

+    * into a thread-safe main-memory hash map that can be used for fast

+    * high-frequency lookups of synonyms for any given (lowercase) word string.

+    * <p>

+    * There holds: If B is a synonym for A (A -> B) then A is also a synonym for B (B -> A).

+    * There does not necessarily hold: A -> B, B -> C then A -> C.

+    * <p>

+    * Loading typically takes some 1.5 secs, so should be done only once per

+    * (server) program execution, using a singleton pattern. Once loaded, a

+    * synonym lookup via {@link #getSynonyms(String)}takes constant time O(1).

+    * A loaded default synonym map consumes about 10 MB main memory.

+    * An instance is immutable, hence thread-safe.

+    * <p>

+    * This implementation borrows some ideas from the Lucene Syns2Index demo that 

+    * Dave Spencer originally contributed to Lucene. Dave's approach

+    * involved a persistent Lucene index which is suitable for occasional

+    * lookups or very large synonym tables, but considered unsuitable for 

+    * high-frequency lookups of medium size synonym tables.

+    * <p>

+    * Example Usage:

+    * <pre>

+    * String[] words = new String[] { "hard", "woods", "forest", "wolfish", "xxxx"};

+    * SynonymMap map = new SynonymMap(new FileInputStream("samples/fulltext/wn_s.pl"));

+    * for (int i = 0; i &lt; words.length; i++) {

+    *     String[] synonyms = map.getSynonyms(words[i]);

+    *     System.out.println(words[i] + ":" + java.util.Arrays.asList(synonyms).toString());

+    * }

+    * 

+    * Example output:

+    * hard:[arduous, backbreaking, difficult, fermented, firmly, grueling, gruelling, heavily, heavy, intemperately, knockout, laborious, punishing, severe, severely, strong, toilsome, tough]

+    * woods:[forest, wood]

+   * forest:[afforest, timber, timberland, wood, woodland, woods]

+    * wolfish:[edacious, esurient, rapacious, ravening, ravenous, voracious, wolflike]

+    * xxxx:[]

+    * </pre>

+    *

+    * @see <a target="_blank"

+    *      href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html">prologdb

+    *      man page </a>

+    * @see <a target="_blank" href="http://www.hostmon.com/rfc/advanced.jsp">Dave's synonym demo site</a>

+    */

+   public class SynonymMap {

+   

+     /** the index data; Map<String word, String[] synonyms> */

+     private final HashMap<String,String[]> table;

+     

+     private static final String[] EMPTY = new String[0];

+     

+     private static final boolean DEBUG = false;

+   

+     /**

+      * Constructs an instance, loading WordNet synonym data from the given input

+      * stream. Finally closes the stream. The words in the stream must be in

+      * UTF-8 or a compatible subset (for example ASCII, MacRoman, etc.).

+      * 

+      * @param input

+      *            the stream to read from (null indicates an empty synonym map)

+      * @throws IOException

+      *             if an error occured while reading the stream.

+      */

+     public SynonymMap(InputStream input) throws IOException {

+       this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));

+     }

+     

+     /**

+      * Returns the synonym set for the given word, sorted ascending.

+      * 

+      * @param word

+      *            the word to lookup (must be in lowercase).

+      * @return the synonyms; a set of zero or more words, sorted ascending, each

+      *         word containing lowercase characters that satisfy

+      *         <code>Character.isLetter()</code>.

+      */

+     public String[] getSynonyms(String word) {

+       String[] synonyms = table.get(word);

+       if (synonyms == null) return EMPTY;

+       String[] copy = new String[synonyms.length]; // copy for guaranteed immutability

+       System.arraycopy(synonyms, 0, copy, 0, synonyms.length);

+       return copy;

+     }

+     

+     /**

+      * Returns a String representation of the index data for debugging purposes.

+      * 

+      * @return a String representation

+      */

+     @Override

+     public String toString() {

+       StringBuilder buf = new StringBuilder();

+       Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();

+       int count = 0;

+       int f0 = 0;

+       int f1 = 0;

+       int f2 = 0;

+       int f3 = 0;

+       

+       while (iter.hasNext()) {

+         String word = iter.next();

+         buf.append(word + ":");

+         String[] synonyms = getSynonyms(word);

+         buf.append(Arrays.asList(synonyms));

+         buf.append("\n");

+         count += synonyms.length;

+         if (synonyms.length == 0) f0++;

+         if (synonyms.length == 1) f1++;

+         if (synonyms.length == 2) f2++;

+         if (synonyms.length == 3) f3++;

+       }

+       

+       buf.append("\n\nkeys=" + table.size() + ", synonyms=" + count + ", f0=" + f0 +", f1=" + f1 + ", f2=" + f2 + ", f3=" + f3);

+       return buf.toString();

+     }

+     

+     /**

+      * Analyzes/transforms the given word on input stream loading. This default implementation simply

+      * lowercases the word. Override this method with a custom stemming

+      * algorithm or similar, if desired.

+      * 

+      * @param word

+      *            the word to analyze

+      * @return the same word, or a different word (or null to indicate that the

+      *         word should be ignored)

+      */

+     protected String analyze(String word) {

+       return word.toLowerCase();

+     }

+   

+     private static boolean isValid(String str) {

+       for (int i=str.length(); --i >= 0; ) {

+         if (!Character.isLetter(str.charAt(i))) return false;

+       }

+       return true;

+     }

+   

+     private HashMap<String,String[]> read(byte[] data) {

+       int WORDS  = (int) (76401 / 0.7); // presizing

+       int GROUPS = (int) (88022 / 0.7); // presizing

+       HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS);  // Map<String word, int[] groups>

+       HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>

+       HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>

+   

+       Charset charset = Charset.forName("UTF-8");

+       int lastNum = -1;

+       Integer lastGroup = null;

+       int len = data.length;

+       int i=0;

+       

+       while (i < len) { // until EOF

+         /* Part A: Parse a line */

+         

+         // scan to beginning of group

+         while (i < len && data[i] != '(') i++;

+         if (i >= len) break; // EOF

+         i++;

+         

+         // parse group

+         int num = 0;

+         while (i < len && data[i] != ',') {

+           num = 10*num + (data[i] - 48);

+           i++;

+         }

+         i++;

+   //      if (DEBUG) System.err.println("num="+ num);

+         

+         // scan to beginning of word

+         while (i < len && data[i] != '\'') i++;

+         i++;

+     

+         // scan to end of word

+         int start = i;

+         do {

+           while (i < len && data[i] != '\'') i++;

+           i++;

+         } while (i < len && data[i] != ','); // word must end with "',"

+         

+         if (i >= len) break; // EOF

+         String word = charset.decode(ByteBuffer.wrap(data, start, i-start-1)).toString();

+   //      String word = new String(data, 0, start, i-start-1); // ASCII

+         

+         /*

+          * Part B: ignore phrases (with spaces and hyphens) and

+          * non-alphabetic words, and let user customize word (e.g. do some

+          * stemming)

+          */

+         if (!isValid(word)) continue; // ignore

+         word = analyze(word);

+         if (word == null || word.length() == 0) continue; // ignore

+         

+         

+         /* Part C: Add (group,word) to tables */

+         

+         // ensure compact string representation, minimizing memory overhead

+         String w = internedWords.get(word);

+         if (w == null) {

+           word = new String(word); // ensure compact string

+           internedWords.put(word, word);

+         } else {

+           word = w;

+         }

+         

+         Integer group = lastGroup;

+         if (num != lastNum) {

+           group = Integer.valueOf(num);

+           lastGroup = group;

+           lastNum = num;

+         }

+         

+         // add word --> group

+         ArrayList<Integer> groups =  word2Groups.get(word);

+         if (groups == null) {

+           groups = new ArrayList<Integer>(1);

+           word2Groups.put(word, groups);

+         }

+         groups.add(group);

+   

+         // add group --> word

+         ArrayList<String> words = group2Words.get(group);

+         if (words == null) {

+           words = new ArrayList<String>(1);

+           group2Words.put(group, words);

+         } 

+         words.add(word);

+       }

+       

+       

+       /* Part D: compute index data structure */

+       HashMap<String,String[]> word2Syns = createIndex(word2Groups, group2Words);    

+           

+       /* Part E: minimize memory consumption by a factor 3 (or so) */

+   //    if (true) return word2Syns;

+       word2Groups = null; // help gc

+       //TODO: word2Groups.clear(); would be more appropriate  ? 

+       group2Words = null; // help gc

+       //TODO: group2Words.clear(); would be more appropriate  ? 

+       

+       return optimize(word2Syns, internedWords);

+     }

+     

+    private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {

+       HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();

+       

+       for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word

+         ArrayList<Integer> group = entry.getValue();     

+         String word = entry.getKey();

+         

+   //      HashSet synonyms = new HashSet();

+         TreeSet<String> synonyms = new TreeSet<String>();

+         for (int i=group.size(); --i >= 0; ) { // for each groupID of word

+           ArrayList<String> words = group2Words.get(group.get(i));

+           for (int j=words.size(); --j >= 0; ) { // add all words       

+             String synonym = words.get(j); // note that w and word are interned

+             if (synonym != word) { // a word is implicitly it's own synonym

+               synonyms.add(synonym);

+             }

+           }

+         }

+   

+         int size = synonyms.size();

+         if (size > 0) {

+           String[] syns = new String[size];

+           if (size == 1)  

+             syns[0] = synonyms.first();

+           else

+             synonyms.toArray(syns);

+   //        if (syns.length > 1) Arrays.sort(syns);

+   //        if (DEBUG) System.err.println("word=" + word + ":" + Arrays.asList(syns));

+           word2Syns.put(word, syns);

+         }

+       }

+     

+       return word2Syns;

+     }

+   

+     private HashMap<String,String[]> optimize(HashMap<String,String[]> word2Syns, HashMap<String,String> internedWords) {

+       if (DEBUG) {

+         System.err.println("before gc");

+         for (int i=0; i < 10; i++) System.gc();

+         System.err.println("after gc");

+       }

+       

+       // collect entries

+       int len = 0;

+       int size = word2Syns.size();

+       String[][] allSynonyms = new String[size][];

+       String[] words = new String[size];

+       Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();

+       for (int j=0; j < size; j++) {

+         Map.Entry<String,String[]> entry = iter.next();

+         allSynonyms[j] = entry.getValue(); 

+         words[j] = entry.getKey();

+         len += words[j].length();

+       }

+       

+       // assemble large string containing all words

+       StringBuilder buf = new StringBuilder(len);

+       for (int j=0; j < size; j++) buf.append(words[j]);

+       String allWords = new String(buf.toString()); // ensure compact string across JDK versions

+       buf = null;

+       

+       // intern words at app level via memory-overlaid substrings

+       for (int p=0, j=0; j < size; j++) {

+         String word = words[j];

+         internedWords.put(word, allWords.substring(p, p + word.length()));

+         p += word.length();

+       }

+       

+       // replace words with interned words

+       for (int j=0; j < size; j++) {

+         String[] syns = allSynonyms[j];

+         for (int k=syns.length; --k >= 0; ) {

+           syns[k] = internedWords.get(syns[k]);

+         }

+         word2Syns.remove(words[j]);

+         word2Syns.put(internedWords.get(words[j]), syns);

+      }

+       

+       if (DEBUG) {

+         words = null;

+         allSynonyms = null;

+         internedWords = null;

+         allWords = null;

+         System.err.println("before gc");

+         for (int i=0; i < 10; i++) System.gc();

+         System.err.println("after gc");

+       }

+       return word2Syns;

+     }

+     

+     // the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux

+     private static byte[] toByteArray(InputStream input) throws IOException {

+       try {

+         // safe and fast even if input.available() behaves weird or buggy

+         int len = Math.max(256, input.available());

+         byte[] buffer = new byte[len];

+         byte[] output = new byte[len];

+         

+         len = 0;

+         int n;

+         while ((n = input.read(buffer)) >= 0) {

+           if (len + n > output.length) { // grow capacity

+             byte tmp[] = new byte[Math.max(output.length << 1, len + n)];

+             System.arraycopy(output, 0, tmp, 0, len);

+             System.arraycopy(buffer, 0, tmp, len, n);

+             buffer = output; // use larger buffer for future larger bulk reads

+             output = tmp;

+           } else {

+             System.arraycopy(buffer, 0, output, len, n);

+           }

+           len += n;

+         }

+   

+         if (len == output.length) return output;

+         buffer = null; // help gc

+         buffer = new byte[len];

+         System.arraycopy(output, 0, buffer, 0, len);

+         return buffer;

+       } finally {

+         if (input != null) input.close();

+       }

+     }

+     

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java
new file mode 100644
index 0000000..1505096
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.coref.mention.Dictionary;
+
+import net.didion.jwnl.JWNL;
+import net.didion.jwnl.JWNLException;
+import net.didion.jwnl.data.Adjective;
+import net.didion.jwnl.data.IndexWord;
+import net.didion.jwnl.data.POS;
+import net.didion.jwnl.data.Pointer;
+import net.didion.jwnl.data.PointerType;
+import net.didion.jwnl.data.Synset;
+import net.didion.jwnl.data.VerbFrame;
+import net.didion.jwnl.dictionary.MapBackedDictionary;
+import net.didion.jwnl.dictionary.MorphologicalProcessor;
+import net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor;
+import net.didion.jwnl.dictionary.morph.DetachSuffixesOperation;
+import net.didion.jwnl.dictionary.morph.LookupExceptionsOperation;
+import net.didion.jwnl.dictionary.morph.LookupIndexWordOperation;
+import net.didion.jwnl.dictionary.morph.Operation;
+import net.didion.jwnl.dictionary.morph.TokenizerOperation;
+import net.didion.jwnl.princeton.file.PrincetonObjectDictionaryFile;
+
+/**
+ * An implementation of the Dictionary interface using the JWNL library.
+ */
+public class TopJWNLDictionary implements Dictionary {
+
+	private net.didion.jwnl.dictionary.Dictionary dict;
+	private MorphologicalProcessor morphy;
+	private static String[] empty = new String[0];
+
+	public TopJWNLDictionary(String propertiesFile) throws IOException,
+			JWNLException {
+		JWNL.initialize(this.getClass().getResourceAsStream(propertiesFile));
+		dict = net.didion.jwnl.dictionary.Dictionary.getInstance();
+		morphy = dict.getMorphologicalProcessor();
+	}
+
+	@SuppressWarnings("unchecked")
+	public String[] getLemmas(String word, String tag) {
+		try {
+			POS pos;
+			if (tag.startsWith("N") || tag.startsWith("n")) {
+				pos = POS.NOUN;
+			} else if (tag.startsWith("N") || tag.startsWith("v")) {
+				pos = POS.VERB;
+			} else if (tag.startsWith("J") || tag.startsWith("a")) {
+				pos = POS.ADJECTIVE;
+			} else if (tag.startsWith("R") || tag.startsWith("r")) {
+				pos = POS.ADVERB;
+			} else {
+				pos = POS.NOUN;
+			}
+			List<String> lemmas = morphy.lookupAllBaseForms(pos, word);
+			return lemmas.toArray(new String[lemmas.size()]);
+		} catch (JWNLException e) {
+			e.printStackTrace();
+			return null;
+		}
+	}
+
+	public String getSenseKey(String lemma, String pos, int sense) {
+		try {
+			IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+			if (iw == null) {
+				return null;
+			}
+			return String.valueOf(iw.getSynsetOffsets()[sense]);
+		} catch (JWNLException e) {
+			e.printStackTrace();
+			return null;
+		}
+
+	}
+
+	public int getNumSenses(String lemma, String pos) {
+		try {
+			IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+			if (iw == null) {
+				return 0;
+			}
+			return iw.getSenseCount();
+		} catch (JWNLException e) {
+			return 0;
+		}
+	}
+
+	private void getParents(Synset synset, List<String> parents)
+			throws JWNLException {
+		Pointer[] pointers = synset.getPointers();
+		for (int pi = 0, pn = pointers.length; pi < pn; pi++) {
+			if (pointers[pi].getType() == PointerType.HYPERNYM) {
+				Synset parent = pointers[pi].getTargetSynset();
+				parents.add(String.valueOf(parent.getOffset()));
+				getParents(parent, parents);
+			}
+		}
+	}
+
+	public String[] getParentSenseKeys(String lemma, String pos, int sense) {
+		// System.err.println("JWNLDictionary.getParentSenseKeys: lemma="+lemma);
+		try {
+			IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+			if (iw != null) {
+				Synset synset = iw.getSense(sense + 1);
+				List<String> parents = new ArrayList<String>();
+				getParents(synset, parents);
+				return parents.toArray(new String[parents.size()]);
+			} else {
+				return empty;
+			}
+		} catch (JWNLException e) {
+			e.printStackTrace();
+			return null;
+		}
+	}
+
+	public static void main(String[] args) throws IOException, JWNLException {
+		String searchDir = System.getProperty("WNSEARCHDIR");
+		System.err.println("searchDir=" + searchDir);
+		searchDir = "models/WordNet_2.1";
+		if (searchDir != null) {
+			Dictionary dict = new TopJWNLDictionary(
+					System.getProperty("WNSEARCHDIR"));
+			// Dictionary dict = new TopJWNLDictionary();
+			// String word = args[0];
+			String[] lemmas = dict.getLemmas("test", "NN");
+			for (int li = 0, ln = lemmas.length; li < ln; li++) {
+				for (int si = 0, sn = dict.getNumSenses(lemmas[li], "NN"); si < sn; si++) {
+					System.out.println(lemmas[li]
+							+ " ("
+							+ si
+							+ ")\t"
+							+ java.util.Arrays.asList(dict.getParentSenseKeys(
+									lemmas[li], "NN", si)));
+				}
+			}
+		}
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
new file mode 100644
index 0000000..dbbec1d
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
@@ -0,0 +1,137 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.coref.mention.Dictionary;
+
+public class WordDictionary {
+	private static final String[][] SPECIAL_CASES = { { "lens", "lenses" } };
+
+	//private static final String WORDNET_PROPERTITES_KEY = "wordnet.propertites.file";
+	//private static final String PROPERTIES_FILE = null;;
+
+	// private static final String DATA_DIR;
+	private static WordDictionary instance;
+
+	private Dictionary dictionary;
+	private Map<String, String> specialCaseMap;
+
+	/*static {
+		ConfigProperties config = ConfigFactory.getInstance()
+				.getConfigProperties(ConfigFactory.NLP_CONFIG_PATH);
+		PROPERTIES_FILE = config.getProperty(WORDNET_PROPERTITES_KEY);
+	}*/
+
+	public synchronized static WordDictionary getInstance() {
+		if (instance == null)
+			instance = new WordDictionary();
+
+		return instance;
+	}
+
+	private WordDictionary() {
+		// initialize the dictionary by loading the WordNet database
+		try {
+			dictionary = new TopJWNLDictionary("PROPERTIES_FILE");
+		} catch (Exception e) {
+			e.printStackTrace();
+			System.err.println("Failed to load the WordNet database: " + e);
+		}
+
+		// build the dictionary for special cases
+		specialCaseMap = buildSpecialCaseMap();
+	}
+
+	public String getLemmaOrWord(String word, String type) {
+		String lemma = getLemma(word, type);
+		if (lemma != null)
+			return lemma;
+		else
+			return (word == null) ? null : word.trim().toLowerCase();
+	}
+
+	public String getLemma(String word, String type) {
+		if (word == null)
+			return null;
+		// skip some long word,avoid dictionary getLemmas dead
+		if (word.length() >= 20)
+			return word;
+		word = word.trim().toLowerCase();
+		if (word.length() == 0)
+			return null;
+
+		// check special cases first
+		String lemma = specialCaseMap.get(word);
+		if (lemma != null)
+			return lemma;
+
+		// use the dictionary for general cases
+		// JWNLDictionary has a bug, and we have to use lower case type
+		type = (type == null) ? null : type.toLowerCase();
+		String[] lemmas = dictionary.getLemmas(word, type);
+		if (lemmas == null || lemmas.length == 0)
+			return null;
+
+		return lemmas[0];
+	}
+
+	/**
+	 * get the lemma for a word of unknown POS type return the word if no lemma
+	 * is found
+	 * 
+	 * @param word
+	 * @return
+	 */
+	public String getLemmaOrWord(String word) {
+		if (word == null)
+			return null;
+
+		// try noun first
+		String lemma = getLemma(word, "NN");
+		if (lemma != null)
+			return lemma;
+
+		// then try verb
+		lemma = getLemma(word, "VB");
+		if (lemma != null)
+			return lemma;
+
+		// return word now
+		return word.trim().toLowerCase();
+	}
+
+	private Map<String, String> buildSpecialCaseMap() {
+
+		Map<String, String> specialCaseMap = new HashMap<String, String>();
+		for (String[] wordList : SPECIAL_CASES) {
+			String lemma = wordList[0];
+			for (String word : wordList) {
+				specialCaseMap.put(word, lemma);
+			}
+		}
+
+		return specialCaseMap;
+	}
+
+	public static void main(String[] args) {
+		String[] verbs = { "is", "has", "were", "likes", "TaKen", "going" };
+		String[] nouns = { "efficient", "Cars", "lens", "wives", "lenses",
+				"photos" };
+		String[] adverbs = { "would", "could", "should", "might" };
+		WordDictionary dictionary = WordDictionary.getInstance();
+
+		for (String word : verbs) {
+			System.out
+					.println(word + " ==> " + dictionary.getLemma(word, "VB"));
+		}
+		for (String word : nouns) {
+			System.out
+					.println(word + " ==> " + dictionary.getLemma(word, "NN"));
+		}
+		for (String word : adverbs) {
+			System.out
+					.println(word + " ==> " + dictionary.getLemma(word, "JJ"));
+		}
+	}
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
new file mode 100644
index 0000000..b1afe09
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
@@ -0,0 +1,68 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+

+import org.apache.commons.lang.StringUtils;

+

+public class BingAPIProductSearchManager {

+	BingQueryRunner search = new BingQueryRunner();

+

+	public List<HitBase> findProductByName(String name, int count){

+		List<HitBase> foundFBPages = search.runSearch("site:amazon.com"+" "+name + " reviews"

+				, 10);

+		List<HitBase> results = new ArrayList<HitBase>();

+		int ct=0;

+		for(HitBase h: foundFBPages){

+			if (ct>=count) break; ct++; 

+			String title = h.getTitle().toLowerCase();

+			if (h.getUrl().indexOf("amazon.com")<0)

+				continue;

+			String[] merchantWords = name.toLowerCase().split(" ");

+			int overlapCount=0;

+/*			for(String commonWord:merchantWords){

+				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){

+					overlapCount++;

+					System.out.println(" found word "+ commonWord + " in title = "+title);

+				}

+			}

+			float coverage = (float)overlapCount/(float) (merchantWords.length);

+			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))

+*/				results.add(h);

+		}

+		return results;

+	}

+	

+	public List<HitBase> findProductByNameNoReview(String name, int count){

+		List<HitBase> foundFBPages = search.runSearch(name, count);

+		List<HitBase> results = new ArrayList<HitBase>();

+		int ct=0;

+		for(HitBase h: foundFBPages){

+			if (ct>=count) break; ct++; 

+			String title = h.getTitle().toLowerCase();

+			String[] merchantWords = name.toLowerCase().split(" ");

+			int overlapCount=0;

+			for(String commonWord:merchantWords){

+				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){

+					overlapCount++;

+					System.out.println(" found word "+ commonWord + " in title = "+title);

+				}

+			}

+			float coverage = (float)overlapCount/(float) (merchantWords.length);

+			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))

+				results.add(h);

+		}

+		return results;

+	}

+

+	

+

+	public static void main(String[] args){

+		BingAPIProductSearchManager man = new BingAPIProductSearchManager ();

+		List<HitBase> res = man.findProductByName("chain saw", 5);

+		System.out.println(res);  	

+	}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
new file mode 100644
index 0000000..926a723
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
@@ -0,0 +1,143 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.ArrayList;

+import java.util.Calendar;

+import java.util.List;

+import org.apache.commons.lang.StringUtils;

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.similarity.apps.utils.PageFetcher;

+import com.restfb.Connection;

+import com.restfb.DefaultFacebookClient;

+import com.restfb.FacebookClient;

+import com.restfb.Parameter;

+import com.restfb.exception.FacebookException;

+import com.restfb.types.Event;

+import com.restfb.types.Page;

+

+

+public class FBOpenGraphSearchManager {

+

+	public List<String[]> profiles = null;

+	protected FacebookClient mFBClient;

+	protected PageFetcher pageFetcher = new PageFetcher();

+	protected static final int NUM_TRIES = 5;

+	protected static final long WAIT_BTW_TRIES=1000; //milliseconds between re-tries

+	

+		

+	public FBOpenGraphSearchManager(){

+		profiles = ProfileReaderWriter.readProfiles("C:\\nc\\features\\analytics\\dealanalyzer\\sweetjack-localcoupon-may12012tooct302012.csv");

+		

+	}

+	

+		

+	public void setFacebookClient(FacebookClient c){

+		this.mFBClient=c;

+	}

+	

+	public List<Event> getFBEventsByName(String event)

+	{

+	    List<Event> events = new ArrayList<Event>();

+	    

+	    for(int i=0; i < NUM_TRIES; i++)

+	    {

+    	    try

+    	    {

+        	    Connection<Event> publicSearch =

+        	            mFBClient.fetchConnection("search", Event.class,

+        	              Parameter.with("q", event), Parameter.with("type", "event"),Parameter.with("limit", 100));

+        	    System.out.println("Searching FB events for " + event);

+        	    events= publicSearch.getData();

+        	    break;

+    	    }

+    	    catch(FacebookException e)

+    	    {

+    	    	System.out.println("FBError "+e);

+    	        try

+                {

+                    Thread.sleep(WAIT_BTW_TRIES);

+                }

+                catch (InterruptedException e1)

+                {

+                    // TODO Auto-generated catch block

+                	System.out.println("Error "+e1);

+                }

+    	    }

+	    }

+	    return events;

+	}

+	

+	public Long getFBPageLikes(String merchant)

+	{

+        List<Page> groups = new ArrayList<Page>();

+        

+        for(int i=0; i < NUM_TRIES; i++)

+        {

+            try

+            {

+                Connection<Page> publicSearch =

+                        mFBClient.fetchConnection("search", Page.class,

+                          Parameter.with("q", merchant), Parameter.with("type", "page"),Parameter.with("limit", 100));

+                System.out.println("Searching FB Pages for " + merchant);

+                groups= publicSearch.getData();

+                break;

+            }

+            catch(FacebookException e)

+            {

+            	System.out.println("FBError "+e);

+                try

+                {

+                    Thread.sleep(WAIT_BTW_TRIES);

+                }

+                catch (InterruptedException e1)

+                {

+                    // TODO Auto-generated catch block

+                	System.out.println("Error "+e1);

+                }

+            }

+        }

+        

+        for (Page p: groups){

+        	if (p!=null && p.getLikes()!=null && p.getLikes()>0) 

+        		return p.getLikes();

+        }

+        

+        //stats fwb">235</span>

+        

+        for (Page p: groups){

+        	if (p.getId()==null)

+        		continue;

+        	String content = pageFetcher.fetchOrigHTML("http://www.facebook.com/"+p.getId());

+        

+        	String likes = StringUtils.substringBetween(content, "stats fwb\">", "<" );

+        	if (likes==null)

+        		continue;

+        	Integer nLikes =0;

+        	try {

+        	nLikes = Integer.parseInt(likes);

+        	} catch (Exception e){

+        		

+        	}

+        	if (nLikes>0){

+        		return (long)nLikes;

+        	}

+        	

+        }

+        

+        

+        return null;

+	}

+	

+

+    // 

+    

+    public static void main(String[] args){

+    	FBOpenGraphSearchManager man = new FBOpenGraphSearchManager ();

+    	man.setFacebookClient(new DefaultFacebookClient());

+       	

+    	

+    	long res = man.getFBPageLikes("chain saw");

+    	System.out.println(res);

+    	    	

+    }

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
new file mode 100644
index 0000000..8ddf502
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
@@ -0,0 +1,86 @@
+package opennlp.tools.apps.review_builder;

+

+import java.io.BufferedReader;

+import java.io.IOException;

+import java.io.InputStreamReader;

+import java.net.MalformedURLException;

+import java.net.URL;

+import java.net.URLConnection;

+import java.net.URLDecoder;

+import java.util.HashMap;

+import java.util.Map;

+

+import org.apache.commons.lang.StringUtils;

+import org.json.JSONArray;

+import org.json.JSONException;

+import org.json.JSONObject;

+

+public class MachineTranslationWrapper  {

+	private String translatorURL = "http://mymemory.translated.net/api/get?q=";

+	

+	public String translate(String sentence, String lang2lang){

+		if (sentence==null)

+			return null;

+		String request = translatorURL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";

+		JSONArray arr=null, prodArr = null, searchURLviewArr = null;

+		try {

+			URL urlC = new URL(request);

+			URLConnection connection = urlC.openConnection();

+

+			String line;

+			String result = "";

+			BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));

+			int count = 0;

+			while ((line = reader.readLine()) != null)

+			{

+				result+=line;

+				count++;

+			}

+			JSONObject rootObject = new JSONObject(result);

+			JSONObject  findObject = rootObject.getJSONObject("responseData");

+			String transl = findObject.getString("translatedText");

+			try {

+				transl = URLDecoder.decode(transl, "UTF-8");

+			} catch (Exception e) {

+				

+			}

+			

+			return transl;

+			

+		} catch (MalformedURLException e) {

+			

+			e.printStackTrace();

+			return null;

+		} catch (JSONException e) {

+			e.printStackTrace();

+			return null;			

+		} catch (IOException e) {

+			e.printStackTrace();

+			return null;			

+		}	

+		

+	}

+	

+	public String rePhrase(String sentence){

+		System.out.println("orig = "+ sentence);

+		String transl = translate(sentence, "en|es");

+		System.out.println("tranls = "+transl);

+		String inverseTransl = translate(transl, "es|en");

+		if (!(inverseTransl.indexOf("NO QUERY SPECIFIED")>-1) && !(inverseTransl.indexOf("INVALID LANGUAGE")>-1) && !(inverseTransl.indexOf("MYMEMORY WARNING")>-1))

+			return inverseTransl;

+		else 

+			return sentence;

+	}

+	

+	

+	

+	public static void main(String[] args){

+		MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();

+		

+		System.out.println(rePhraser.translate("I went to the nearest bookstore to buy a book written by my friend and his aunt", "en|ru"));

+		

+		System.out.println(rePhraser.rePhrase("I went to the nearest bookstore to buy a book written by my friend and his aunt"));

+

+	}

+		

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
new file mode 100644
index 0000000..73d8417
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
@@ -0,0 +1,210 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.apps.review_builder;

+

+import java.util.Arrays;

+import java.util.List;

+

+import opennlp.tools.similarity.apps.utils.Utils;

+

+import org.apache.commons.lang.StringUtils;

+

+public class MinedSentenceProcessor {

+  public static String acceptableMinedSentence(String sent) {

+    // if too many commas => seo text

+

+    String[] commas = StringUtils.split(sent, ',');

+    String[] spaces = StringUtils.split(sent, ' ');

+    if ((float) commas.length / (float) spaces.length > 0.7) {

+      System.out.println("Rejection: too many commas");

+      return null;

+    }

+    

+    String[] otherDelimiters = StringUtils.split(sent, '/');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    

+    otherDelimiters = StringUtils.split(sent, '.');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    otherDelimiters = StringUtils.split(sent, '!');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    otherDelimiters = StringUtils.split(sent, '=');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    

+    String[] pipes = StringUtils.split(sent, '|');

+    if (StringUtils.split(sent, '|').length > 2

+        || StringUtils.split(sent, '>').length > 2) {

+      System.out.println("Rejection: too many |s or >s ");

+      return null;

+    }

+    String sentTry = sent.toLowerCase();

+    // if too many long spaces

+    String sentSpaces = sentTry.replace("   ", "");

+    if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -

+      // suspicious

+      return null;

+

+    if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1

+        || sentTry.indexOf("copyright") > -1

+        || sentTry.indexOf("operating hours") > -1

+        || sentTry.indexOf("days per week") > -1

+        || sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1

+        || sentTry.indexOf("find the latest") > -1

+        || sentTry.startsWith("subscribe")

+        || sentTry.indexOf("Terms of Service") > -1

+        || sentTry.indexOf("clicking here") > -1

+        || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1

+        || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")

+        || sentTry.indexOf("available online") > -1

+        || sentTry.indexOf("get online") > -1

+        || sentTry.indexOf("buy online") > -1

+        || sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1

+        || sentTry.indexOf("official site") > -1

+        || sentTry.indexOf("this video") > -1

+        || sentTry.indexOf("this book") > -1

+        || sentTry.indexOf("this product") > -1

+        || sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1

+        || sentTry.indexOf("audio cd") > -1

+        || sentTry.indexOf("related searches") > -1

+        || sentTry.indexOf("permission is granted") > -1

+        || sentTry.indexOf("[edit") > -1

+        || sentTry.indexOf("edit categories") > -1

+        || sentTry.indexOf("free license") > -1

+        || sentTry.indexOf("permission is granted") > -1

+        || sentTry.indexOf("under the terms") > -1

+        || sentTry.indexOf("rights reserved") > -1

+        || sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")

+        || sentTry.endsWith("the.") || sentTry.startsWith("below") 

+        || sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 

+        ||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1

+        ||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 

+        ||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1

+        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1

+        ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1

+        ||sentTry.startsWith( "we ") ||sentTry.indexOf( "the recipient")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1

+        

+        ||sentTry.startsWith( "fax") ||sentTry.indexOf( "refund it")>-1 || sentTry.indexOf( "your money")>-1

+        ||sentTry.startsWith( "free") ||sentTry.indexOf( "purchase orders")>-1

+        ||sentTry.startsWith( "exchange it ") ||sentTry.indexOf( "return it")>-1 ||sentTry.indexOf( "credit card")>-1 

+        

+        ||sentTry.indexOf( "storeshop")>-1 || sentTry.startsWith( "find") || sentTry.startsWith( "shop") || sentTry.startsWith( "unlimited") 

+        ||sentTry.indexOf( "for a limited time")>-1 ||sentTry.indexOf( "prime members")>-1 ||sentTry.indexOf( "amazon members")>-1 ||sentTry.indexOf( "unlimited free")>-1 

+        ||sentTry.indexOf( "shipping")>-1 || sentTry.startsWith( "amazon")

+// not a script text

+        ||sentTry.indexOf( "document.body")>-1 ||sentTry.indexOf( " var ")>-1         ||sentTry.indexOf( "search suggestions")>-1 ||sentTry.startsWith( "Search") 

+        

+    		)

+      return null;

+    

+    //Millions of Amazon Prime members enjoy instant videos, free Kindle books and unlimited free two-day shipping.

+

+    // count symbols indicating wrong parts of page to mine for text

+    // if short and contains too many symbols indicating wrong area: reject

+    String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")

+        .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")

+        .replace("-", "&&&").replace("%", "&&&");

+    if ((sentWrongSym.length() - sentTry.length()) >= 4

+        && sentTry.length() < 200) // twice ot more

+      return null;

+

+    sent = sent.replace('[', ' ').replace(']', ' ')

+        .replace("_should_find_orig_", "").replace(".   .", ". ")

+        .replace("amp;", " ").replace("1.", " ").replace("2.", " ")

+        .replace("3.", " ").replace("4.", " ").replace("2009", "2011")

+        .replace("2008", "2011").replace("2006", "2011")

+        .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")

+        .replace("no comments", " ").replace("  ", " ").replace("  ", " ")

+        .replace("(more.)", "").replace("more.", "").replace("<more>", "")

+        .replace("[more]", "").replace(".,", ".").replace("&lt;", "")

+        .replace("p&gt;", "").replace("product description", "");

+

+    // TODO .replace("a.", ".");

+

+    int endIndex = sent.indexOf(" posted");

+    if (endIndex > 0)

+      sent = sent.substring(0, endIndex);

+

+    return sent;

+  }

+

+  public static String processSentence(String pageSentence) {

+    if (pageSentence == null)

+      return "";

+    pageSentence = Utils.fullStripHTML(pageSentence);

+    pageSentence = StringUtils.chomp(pageSentence, "..");

+    pageSentence = StringUtils.chomp(pageSentence, ". .");

+    pageSentence = StringUtils.chomp(pageSentence, " .");

+    pageSentence = StringUtils.chomp(pageSentence, ".");

+    pageSentence = StringUtils.chomp(pageSentence, "...");

+    pageSentence = StringUtils.chomp(pageSentence, " ....");

+    pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")

+        .replace("(.)", "");

+

+    pageSentence = pageSentence.trim();

+    pageSentence = pageSentence.replaceAll("\\s+", " "); // make single

+    // spaces

+    // everywhere

+

+    String[] pipes = StringUtils.split(pageSentence, '|'); // removed

+    // shorter part

+    // of sentence

+    // at the end

+    // after pipe

+    if (pipes.length == 2

+        && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {

+      int pipePos = pageSentence.indexOf("|");

+      if (pipePos > -1)

+        pageSentence = pageSentence.substring(0, pipePos - 1).trim();

+

+    }

+

+    if (!StringUtils.contains(pageSentence, '.')

+        && !StringUtils.contains(pageSentence, '?')

+        && !StringUtils.contains(pageSentence, '!'))

+      pageSentence = pageSentence + ". ";

+

+    pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();

+    if (!pageSentence.endsWith("."))

+      pageSentence += ". ";

+    return pageSentence;

+  }

+

+  

+  public static String normalizeForSentenceSplitting(String pageContent) {

+    pageContent.replace("Jan.", "January").replace("Feb.", "February")

+        .replace("Mar.", "March").replace("Apr.", "April")

+        .replace("Jun.", "June").replace("Jul.", "July")

+        .replace("Aug.", "August").replace("Sep.", "September")

+        .replace("Oct.", "October").replace("Nov.", "November")

+        .replace("Dec.", "December");

+

+    return pageContent;

+

+  }

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java
new file mode 100644
index 0000000..9862ffb
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java
@@ -0,0 +1,74 @@
+package opennlp.tools.apps.review_builder;
+
+public interface ParserConstants {
+	// added new POS types for infinitive phrase and participle phrase
+	public static final String TYPE_STP = "STP"; // infinitive phrase
+	public static final String TYPE_SGP = "SGP"; // present participle phrase
+	public static final String TYPE_SNP = "SNP"; // past participle phrase
+
+	// below are the standard POS types,
+	// http://bulba.sdsu.edu/jeanette/thesis/PennTags.html
+	public static final String TYPE_ADJP = "ADJP";
+	public static final String TYPE_ADVP = "ADVP";
+	public static final String TYPE_CC = "CC";
+	public static final String TYPE_CD = "CD";
+	public static final String TYPE_CONJP = "CONJP";
+	public static final String TYPE_DT = "DT";
+	public static final String TYPE_EX = "EX";
+	public static final String TYPE_FRAG = "FRAG";
+	public static final String TYPE_FW = "FW";
+	public static final String TYPE_IN = "IN";
+	public static final String TYPE_INTJ = "INTJ";
+	public static final String TYPE_JJ = "JJ";
+	public static final String TYPE_JJR = "JJR";
+	public static final String TYPE_JJS = "JJS";
+	public static final String TYPE_LS = "LS";
+	public static final String TYPE_LST = "LST";
+	public static final String TYPE_MD = "MD";
+	public static final String TYPE_NAC = "NAC";
+	public static final String TYPE_NN = "NN";
+	public static final String TYPE_NNS = "NNS";
+	public static final String TYPE_NNP = "NNP";
+	public static final String TYPE_NNPS = "NNPS";
+	public static final String TYPE_NP = "NP";
+	public static final String TYPE_NX = "NX";
+	public static final String TYPE_PDT = "PDT";
+	public static final String TYPE_POS = "POS";
+	public static final String TYPE_PP = "PP";
+	public static final String TYPE_PRN = "PRN";
+	public static final String TYPE_PRP = "PRP";
+	public static final String TYPE_PRP$ = "PRP$";
+	public static final String TYPE_PRT = "PRT";
+	public static final String TYPE_QP = "QP";
+	public static final String TYPE_RB = "RB";
+	public static final String TYPE_RBR = "RBR";
+	public static final String TYPE_RBS = "RBS";
+	public static final String TYPE_RP = "RP";
+	public static final String TYPE_RRC = "RRC";
+	public static final String TYPE_S = "S";
+	public static final String TYPE_SBAR = "SBAR";
+	public static final String TYPE_SBARQ = "SBARQ";
+	public static final String TYPE_SINV = "SINV";
+	public static final String TYPE_SQ = "SQ";
+	public static final String TYPE_SYM = "SYM";
+	public static final String TYPE_TO = "TO";
+	public static final String TYPE_TOP = "TOP";
+	public static final String TYPE_UCP = "UCP";
+	public static final String TYPE_UH = "UH";
+	public static final String TYPE_VB = "VB";
+	public static final String TYPE_VBD = "VBD";
+	public static final String TYPE_VBG = "VBG";
+	public static final String TYPE_VBN = "VBN";
+	public static final String TYPE_VBP = "VBP";
+	public static final String TYPE_VBZ = "VBZ";
+	public static final String TYPE_VP = "VP";
+	public static final String TYPE_WDT = "WDT";
+	public static final String TYPE_WHADJP = "WHADJP";
+	public static final String TYPE_WHADVP = "WHADVP";
+	public static final String TYPE_WHNP = "WHNP";
+	public static final String TYPE_WHPP = "WHPP";
+	public static final String TYPE_WP = "WP";
+	public static final String TYPE_WP$ = "WP$";
+	public static final String TYPE_WRB = "WRB";
+	public static final String TYPE_X = "X";
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
new file mode 100644
index 0000000..956640f
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
@@ -0,0 +1,166 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.Triple;

+

+public class ReviewBuilderRunner {

+

+	private List<Triple> input = new ArrayList<Triple>(); 

+

+	public ReviewBuilderRunner(){

+

+		/*	input.add( new Pair<String, Integer>("chief architect portable mobile tv", 204973051));

+

+	input.add( new Pair<String, Integer>("lg plasma tv", 215734562));

+	input.add( new Pair<String, Integer>("magnavox lcd hdtv", 215415652));

+	input.add( new Pair<String, Integer>("yamaha aventage home theater receiver", 215742271));

+	input.add( new Pair<String, Integer>("panasonic 24inch lcd tv", 215742233));

+	input.add( new Pair<String, Integer>("otterbox barnes and noble nook commuter case", 215572161));

+	input.add( new Pair<String, Integer>("sony kdl32ex340 led tv", 215743925));

+	input.add( new Pair<String, Integer>("alpine waterfall tabletop fountain lighting", 215135546));

+    input.add( new Pair<String, Integer>("ihome rechargeable speaker system", 215363231 ));

+	input.add( new Pair<String, Integer>("ion slide film scanner", 212088884));

+

+		 input.add( new Pair<String, Integer>("mens dr martens shoes black nappa", 210813142));

+		 input.add( new Pair<String, Integer>("calvin klein seamless thong panty", 201984853));

+		 input.add( new Pair<String, Integer>("mens clarks shoes wallabee beeswax leather", 210808477));

+		//? input.add( new Pair<String, Integer>("mens sperry topsider shoes", 210809238));

+		 input.add( new Pair<String, Integer>("mens giorgio brutini shoes italian calf", 210809508));

+

+		input.add( new Pair<String, Integer>("halo portable backup battery", 1640825398));

+input.add( new Pair<String, Integer>("kenwood pkgmp18 cd receiver  coaxial speakers",1642712915));

+input.add( new Pair<String, Integer>("element ultraslim hdtv",1643167865));

+input.add( new Pair<String, Integer>("westinghouse  dled hdtv black",1641930013));

+input.add( new Pair<String, Integer>("boss audio receiver speaker package system",1643532459));

+input.add( new Pair<String, Integer>("kenwood  cd receiver coaxial speakers bundle",1646566070));

+input.add( new Pair<String, Integer>("element electronics lcd tv black ",1637163018));

+input.add( new Pair<String, Integer>("stunt copter rechargeable battery pack",1636937811));

+input.add( new Pair<String, Integer>("element led ultraslim hdtv  soundbar",1637572596));

+input.add( new Pair<String, Integer>("boss  receiver speaker package system bundle",1646566067));

+input.add( new Pair<String, Integer>("coby  hd tv",1638746307));

+input.add( new Pair<String, Integer>("vizio  diag led smart hdtv",1660162001));

+input.add( new Pair<String, Integer>("sony dock for ipad ipod and iphone",1646826284));

+input.add( new Pair<String, Integer>("vizio  led  ultraslim hdtv",1642018249));

+input.add( new Pair<String, Integer>("lcd kula tv multimedia player",1640265845));

+

+input.add(new Pair<String, Integer>("liz and co alex tall leather boots",1630836375));

+input.add( new Pair<String, Integer>("total girl silvia sequin moccasin", 1630828314));

+input.add( new Pair<String, Integer>("new england patriots new era nfl sport sideline knit", 1588531904));

+input.add( new Pair<String, Integer>("betseyville sequin backpack", 1630825375));

+input.add( new Pair<String, Integer>("the north face womens osito jacket mojito", 1639791775));

+input.add( new Pair<String, Integer>("misty harbor raincoat trench removable liner", 903542613));

+input.add(new Pair<String, Integer>("ae womens camo jacket ", 1229070780));

+input.add(new Pair<String, Integer>("indianapolis colts sideline knit", 1588531896));

+input.add(new Pair<String, Integer>("b o c korah boot", 1622401738));

+input.add(new Pair<String, Integer>("adidas mens speed cut track suit", 920744865));

+input.add(new Pair<String, Integer>("liz and co lulu zipper boots", 1630836380));

+input.add(new Pair<String, Integer>("black navy  lightweight oxford shoes", 906123996));

+input.add(new Pair<String, Integer>("liz and co farley tall boots", 1639960280));

+input.add(new Pair<String, Integer>("call it spring karpin  pullon boots", 1629938981));

+input.add(new Pair<String, Integer>("ugg australia bailey bow boots", 1594029054));

+input.add(new Pair<String, Integer>("dream chasers  jacket", 1631247949));

+input.add(new Pair<String, Integer>("guess military  tiewaist coat", 1629993909));

+input.add(new Pair<String, Integer>("madden girl allstaar womens zip boots", 1581506993));

+input.add(new Pair<String, Integer>("michael womens shoes", 1590598743));

+input.add(new Pair<String, Integer>("sonoma life style suede midcalf boots women", 1617302927));

+

+		input.add(new Pair<String, Integer>("absolute pnf300 power noise filterground loop isolator with adjustable controls", 1521965454));

+		input.add(new Pair<String, Integer>("sennheiser ie8 stereo earbuds", 211969101));

+		input.add(new Pair<String, Integer>("sanus vlmf109 motorized full motion mount for tvs 37 60 up to 110 lbs", 214893385));

+		input.add(new Pair<String, Integer>("s2fmcy003 earset stereo earbud binaural open miniphone black", 214972916));

+		input.add(new Pair<String, Integer>("boconi bags and leather bryant safari bag carry on luggage brown", 1646568995));

+		input.add(new Pair<String, Integer>("diesel derik pant jyt mens pajama gray", 1645725530));

+		input.add(new Pair<String, Integer>("sole society gina sandal", 1633021283));

+		input.add(new Pair<String, Integer>("toms bimini stitchout slipon women", 1633012540));

+		input.add(new Pair<String, Integer>("the north face womens p r tka 100 microvelour glacier 14 zip tnf blackjk3 medium", 1618022193));

+		input.add(new Pair<String, Integer>("robert graham manuel dress shirt mens long sleeve button up blue", 1631119485));

+

+		input.add(new Pair<String, Integer>("b o c leesa", 1584193288));

+			input.add(new Pair<String, Integer>("blair stirrup pants", 1525621516));

+			input.add(new Pair<String, Integer>("donna karan shirtdress", 1463793963));

+			input.add(new Pair<String, Integer>("columbia sportswear terminal tackle shirt", 1661238030));

+			input.add(new Pair<String, Integer>("carters jersey pajamas", 1573999243));

+			input.add(new Pair<String, Integer>("vince camuto dena", 1626272001));

+			input.add(new Pair<String, Integer>("pistil hudson knit hats", 1660874149));

+			input.add(new Pair<String, Integer>("naturalizer trinity wide shaft womens zip", 1569191459));

+			input.add(new Pair<String, Integer>("bare traps chelby womens sandals", 1513387756));

+			input.add(new Pair<String, Integer>("overland storage hard drive 1 tb hotswap", 212107374));

+			input.add(new Pair<String, Integer>("humminbird indash depth finder", 1616650484));

+			input.add(new Pair<String, Integer>("grepsr800 gre dig scanner", 215723895));

+			input.add(new Pair<String, Integer>("humminbird kayak transducer", 215392426));

+			input.add(new Pair<String, Integer>("garmin nuvi suction cup mount ", 215728710));

+			input.add(new Pair<String, Integer>("crosley radio black", 215662289));

+

+		    input.add(new Triple<String, Integer, String >("avaya ip telephone", 1440488008, "lucent phone system"));

+			input.add(new Triple<String, Integer, String>("clarks trolley womens shoes", 1581854074, "clark womens shoes"));

+			input.add(new Triple<String, Integer, String>("mens evans shoes imperial deer", 210808400, "lb evans slippers"));

+			input.add(new Triple<String, Integer, String>("ugg classic bow shorty gloves", 1665094898, "leather gloves women"));

+			input.add(new Triple<String, Integer, String>("jumping beans man tee baby", 1667155332, "jumping beans clothing"));

+			input.add(new Triple<String, Integer, String>("asics mens shoes", 1630208773, "asics mens running shoes"));

+			input.add(new Triple<String, Integer, String>("oakley hoodie mens fleece", 1656661466, "hoodies for men"));

+			input.add(new Triple<String, Integer, String>("usb sound control digital voice recorder", 1654662662, "digital voice recorder with usb"));

+			input.add(new Triple<String, Integer, String>("motorola bluetooth headset", 215376254, "motorola oasis bluetooth headset"));

+			input.add(new Triple<String, Integer, String>("sony sound bar home theater system", 215450833, "sony sound bar"));

+			input.add(new Triple<String, Integer, String>("jvc full hd everio camcorder", 1664479999, "jvc everio camcorder"));

+		 */

+		

+		 input.add(new Triple<String, Integer, String>("dr martens beckett laceup boots", 1651452641, "doc martin shoes"));

+		 input.add(new Triple<String, Integer, String>("pioneer cd changer",204654672, "pioneer cd player"));

+		 input.add(new Triple<String, Integer, String>("tablet handler strap and desk mount", 1634326303, "tablet holder"));

+		 input.add(new Triple<String, Integer, String>("sockwell loden womens overthecalf socks", 1644572708, "compression stockings, support stockings"));

+		 input.add(new Triple<String, Integer, String>("nike eclipse womens shoes", 1657807048, "nike eclipse ii women s shoe"));

+		 input.add(new Triple<String, Integer, String>("cherokee workwear womens scrub pant black stall",211643295, "cherokee workwear scrubs"));

+		 input.add(new Triple<String, Integer, String>("columbia sportswear jacket ", 1667381935, "columbia omni heat"));

+		 input.add(new Triple<String, Integer, String>("adidas adipure jacket", 1040124787, "adidas track jacket"));

+		 input.add(new Triple<String, Integer, String>("clarks may orchid womens shoes", 1585805688, "clarks loafers"));

+		 input.add(new Triple<String, Integer, String>("levis pants empire blue", 1670283141, "skinny jeans for guys"));

+		 input.add(new Triple<String, Integer, String>("nike jordan black cat tee", 1653598764, "jordan black cat"));

+		 input.add(new Triple<String, Integer, String>("obermeyer womens kassandra down coat", 1670629180, "down winter coats"));

+/*

+		 input.add(new Triple<String, Integer, String>("paramax  surround sound", 835422569, "paramax im3"));

+		 input.add(new Triple<String, Integer, String>("mia quincy wedge", 1285886230, "mia quincy wedge"));

+		 input.add(new Triple<String, Integer, String>("able planet headphones", 1648522886, "able planet nc210g"));

+		 input.add(new Triple<String, Integer, String>("samsung replacement lamp", 695793593, "lamp code bp96"));

+		 input.add(new Triple<String, Integer, String>("paul green emerson boot castagno", 1313967918, "paul green emerson boot"));

+		 input.add(new Triple<String, Integer, String>("bandolino caresse boots", 1448643623, "bandolino caresse boots"));

+		 input.add(new Triple<String, Integer, String>("nine west modiley", 1365998968, "nine west modiley"));

+		 input.add(new Triple<String, Integer, String>("converse chuck taylor  bisay", 1555900934, "turquoise chuck taylors"));

+		 input.add(new Triple<String, Integer, String>("gentle souls bay leaf flats", 1436175162, "gentle souls bay leaf"));

+		 input.add(new Triple<String, Integer, String>("sauce hockey  back hat", 1644440355, "sauce hockey discount code"));

+		 input.add(new Triple<String, Integer, String>("aravon farren oxford shoes", 1644573438, "aravon wef07sh"));

+	*/	 input.add(new Triple<String, Integer, String>("kooba crosby hobo handbags", 1326503038, "kooba crosby"));

+		 input.add(new Triple<String, Integer, String>("bcbgmaxazria sheath dress", 1313949777, "bcbgmaxazria illusion bodice ruched sheath dress"));

+		 input.add(new Triple<String, Integer, String>("billabong boardshorts trunks", 1316823074, "la siesta boardshorts"));

+		 input.add(new Triple<String, Integer, String>("mootsies tootsies boot", 1503727310, "mootsies tootsies draker"));

+		 input.add(new Triple<String, Integer, String>("nine west bootie", 1503730060, "nine west drina"));

+		 input.add(new Triple<String, Integer, String>("playtex support cotton ", 1331026244, "playtex t723"));

+		 input.add(new Triple<String, Integer, String>("fossil morgan satchel taupe", 1355165745, "fossil morgan satchel"));

+		 input.add(new Triple<String, Integer, String>("katonah womens boots brown", 1420057844, "boc katonah boots"));

+		 input.add(new Triple<String, Integer, String>("boot cut jeans supernova", 1363356262, "levis 527 supernova"));

+		 input.add(new Triple<String, Integer, String>("steve madden buckie boot", 1313965918, "steve madden buckie boot"));

+		 input.add(new Triple<String, Integer, String>("charlies horse tshirt", 1428490587, "charlie s horse shirt"));

+		 input.add(new Triple<String, Integer, String>("igloo little playmate ice chest", 205421625, "igloo little playmate"));

+		 input.add(new Triple<String, Integer, String>("mark nason boot", 1313951044, "mark nason rudd"));

+

+

+

+	}

+

+	public static void main(String[] args){

+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");

+		ReviewBuilderRunner r = new ReviewBuilderRunner();

+		WebPageReviewExtractor extractor = new WebPageReviewExtractor("C:/workspace/relevanceEngine/src/test/resources");

+		for(Triple query_ID : r.input ){

+			String query = (String) query_ID.getFirst();

+			List<String> res = extractor.formReviewsForAProduct(query);

+

+			ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences"+ query +".csv");

+		}

+

+

+

+	}

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
new file mode 100644
index 0000000..ebf42d7
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
@@ -0,0 +1,137 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.List;

+

+public class ReviewObj {

+	

+		long bpid;

+		long pid;

+		float rating;

+		String pros;

+	    String cons;

+		String url;

+		String title;

+		String review;

+		String keywordsName;

+		float score;

+		String[] origSentences;

+		String[] featurePhrases;

+		

+		List<String> originalizedSentences ; //obtained from sentences;

+		List<String> sentimentPhrases ; //obtained from sentences;

+		

+		public ReviewObj(long bpid, long pid, float rating, String pros,

+				String cons, String url, String title, String review,

+				float score) {

+			super();

+			this.bpid = bpid;

+			this.pid = pid;

+			this.rating = rating;

+			this.pros = pros;

+			this.cons = cons;

+			this.url = url;

+			this.title = title;

+			this.review = review;

+			this.score = score;

+		}

+		

+		

+		public List<String> getSentimentPhrases() {

+			return sentimentPhrases;

+		}

+

+

+		public void setSentimentPhrases(List<String> sentimentPhrases) {

+			this.sentimentPhrases = sentimentPhrases;

+		}

+

+

+		public ReviewObj() {

+			// TODO Auto-generated constructor stub

+		}

+		public String[] getOrigSentences() {

+			return origSentences;

+		}

+		public void setOrigSentences(String[] sentences) {

+			this.origSentences = sentences;

+		}

+		public List<String> getOriginalizedSentences() {

+			return originalizedSentences;

+		}

+

+

+		public void setOriginalizedSentences(List<String> originalizedSentences) {

+			this.originalizedSentences = originalizedSentences;

+		}

+

+

+		public String[] getFeaturePhrases() {

+			return featurePhrases;

+		}

+		public void setFeaturePhrases(String[] featurePhrases) {

+			this.featurePhrases = featurePhrases;

+		}

+		public long getBpid() {

+			return bpid;

+		}

+		public void setBpid(long bpid) {

+			this.bpid = bpid;

+		}

+		public long getPid() {

+			return pid;

+		}

+		public void setPid(long pid) {

+			this.pid = pid;

+		}

+		public float getRating() {

+			return rating;

+		}

+		public void setRating(float rating) {

+			this.rating = rating;

+		}

+		public String getPros() {

+			return pros;

+		}

+		public void setPros(String pros) {

+			this.pros = pros;

+		}

+		public String getCons() {

+			return cons;

+		}

+		public void setCons(String cons) {

+			this.cons = cons;

+		}

+		public String getUrl() {

+			return url;

+		}

+		public void setUrl(String url) {

+			this.url = url;

+		}

+		public String getTitle() {

+			return title;

+		}

+		public void setTitle(String title) {

+			this.title = title;

+		}

+		public String getReview() {

+			return review;

+		}

+		public void setReview(String review) {

+			this.review = review;

+		}

+		public float getScore() {

+			return score;

+		}

+		public void setScore(float score) {

+			this.score = score;

+		}

+		public String getKeywordsName() {

+			

+			return this.keywordsName;

+		}

+		public void setKeywordsName(String kw) {

+			

+			keywordsName=kw;

+		}

+			

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
new file mode 100644
index 0000000..c4bebb1
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
@@ -0,0 +1,59 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+

+public class SentenceBeingOriginalized {

+	private Map<String, String> sentKey_value= new HashMap<String, String>();

+	private String sentence;

+	private List<List<ParseTreeChunk>> groupedChunks;

+	

+	

+	

+	public Map<String, String> getSentKey_value() {

+		return sentKey_value;

+	}

+

+

+

+	public void setSentKey_value(Map<String, String> sentKey_value) {

+		this.sentKey_value = sentKey_value;

+	}

+

+

+

+	public String getSentence() {

+		return sentence;

+	}

+

+

+

+	public void setSentence(String sentence) {

+		this.sentence = sentence;

+	}

+

+

+

+	public List<List<ParseTreeChunk>> getGroupedChunks() {

+		return groupedChunks;

+	}

+

+

+

+	public void setGroupedChunks(List<List<ParseTreeChunk>> groupedChunks) {

+		this.groupedChunks = groupedChunks;

+	}

+

+

+

+	public SentenceBeingOriginalized(Map<String, String> sentKey_value,

+			String sentence, List<List<ParseTreeChunk>> groupedChunks) {

+		super();

+		this.sentKey_value = sentKey_value;

+		this.sentence = sentence;

+		this.groupedChunks = groupedChunks;

+	}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
new file mode 100644
index 0000000..a9c94dc
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
@@ -0,0 +1,401 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.HashSet;

+import java.util.List;

+

+import org.apache.commons.lang.StringUtils;

+

+import opennlp.tools.apps.relevanceVocabs.PhraseProcessor;

+import opennlp.tools.apps.relevanceVocabs.SentimentVocab;

+import opennlp.tools.apps.relevanceVocabs.SynonymListFilter;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+

+public class SentenceOriginalizer {

+	private String[] sents; 

+	private SentenceBeingOriginalized[] sentenceBeingOriginalized;

+	public List<String> formedPhrases = new ArrayList<String>();

+

+	private MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();

+	private SentimentVocab sVocab = SentimentVocab.getInstance();

+	PhraseProcessor pProc = new PhraseProcessor();

+	SynonymListFilter filter = null;

+	private List<String> verbsShouldStayNoSubstition = Arrays.asList(new String[]{

+			"might", "can", "power", "bonk", "screw", "victimization", "victimize", "victimised", "victimized", "victimise",

+			"hump", "sluttish", "wanton"

+	});

+

+	public SentenceOriginalizer(String[] ss){

+		sentenceBeingOriginalized = new SentenceBeingOriginalized[ss.length];

+		for(int i= 0; i< ss.length; i++){

+			//sentenceBeingOriginalized[i] = new  SentenceBeingOriginalized()

+		}

+	}

+

+	public SentenceOriginalizer(String dir){

+		filter = new  SynonymListFilter(dir);

+	};

+

+	public String[] getSents() {

+		return sents;

+	}

+

+	public void setSents(String[] sents) {

+		this.sents = sents;

+	}

+

+	

+

+	private void substituteProsCons(){

+		for(int i = 0; i< sents.length; i++){

+			if (sents[i]==null)

+				continue;

+

+			sents[i] = sents[i].replace("...", " ").replace("..", " ");

+

+			if (sents[i].startsWith("Pros")){

+				sents[i]="";

+				sents[i+1] = "I liked that "+ sents[i+1];

+			}

+

+			if (sents[i].startsWith("Cons")){

+				sents[i]="";

+				sents[i+1] = "What I did not like was that "+ sents[i+1];

+			}

+		}

+	}

+

+	private void insertProductNameForRefs(String prodName){

+		prodName = prodName.toLowerCase();

+		prodName = StringUtils.trim(prodName);

+		

+		for(int i = 0; i< sents.length; i++){

+			if (sents[i]==null)

+				continue;

+			String snt = sents[i];

+			String line  = snt.replace(" it ", " "+prodName+" ");

+			if (line.equals(snt)){

+				line = snt.replace(" this ", " "+prodName+" ");

+			}

+

+			sents[i]=line;

+		}

+	}

+	

+	private void insertProductNameForRefsFullNameKeywords(String prodName, String keywordsName){

+		prodName = StringUtils.trim(prodName.toLowerCase());

+				

+		for(int i = 0; i< sents.length; i++){

+			double flag = Math.random();

+			String prodNameCurr = null;

+			if (flag>0.4)

+				prodNameCurr = prodName;

+				else

+					prodNameCurr = keywordsName;

+					

+			if (sents[i]==null)

+				continue;

+			String snt = sents[i];

+			String line  = snt.replace(" it ", " "+prodNameCurr+" ");

+			if (line.equals(snt)){

+				line = snt.replace(" this ", " "+prodNameCurr+" ");

+			}

+

+			sents[i]=line;

+		}

+	}

+

+	private void turnTenseToPast(){

+		for(int i = 0; i< sents.length; i++){

+			if (sents[i]==null)

+				continue;

+			sents[i] = sents[i].replace("to do ", "to d_o_ ");

+			sents[i]=sents[i].replace(" is ", " was ").replace(" done ", " was done ").replace(" are ", " were ")

+					.replace(" do ", " did ").replace(" yes, ", " true, ");

+			sents[i]=sents[i].replace("somebody ", "one ").replace("would like", "would want").replace("I am", "users are");

+			sents[i]=sents[i].replace("my wife", "my spouse").replace("I would definitely buy ", "I wouldn't hesitate to buy ")

+					.replace("I haven't tried ", "I did not actually have a chance to try ");

+			sents[i]=sents[i].replace("they arrived ", "they were shipped to my residence ").replace(" ive ", " I have ")

+					.replace("We have ", "I have already tried and written a review on ");

+			

+			sents[i] = sents[i].replace( "to d_o_ ", "to do ");

+	

+			if (sents[i].startsWith("We "))

+				sents[i] = sents[i].replace("We ", "I know they ");

+			if (sents[i].startsWith("You "))

+				sents[i] = sents[i].replace("You ","I believe one can ");

+			

+			if (sents[i].startsWith("Well "))

+				sents[i] = sents[i].replace("Well ","I would state that ");

+

+		}

+	}

+

+	private void turnCounterFactual(){

+		for(int i = 0; i< sents.length; i++){

+			if (sents[i]==null)

+				continue;

+			sents[i]=sents[i].replace("however ", "b1ut1 ").replace("but ", "however ")

+					.replace("b1ut1 ", "but ").replace("I say", "I repeat").

+					replace("same way", "same manner").replace(" you ", " somebody ").replace(" can ", " might ");

+

+		}

+	}

+

+	public void substituteSynonymVerbs(){

+		for(int i = 0; i< sents.length; i++){

+			String line = sents[i];

+			List<List<ParseTreeChunk>> ps = pProc.getPhrasesOfAllTypes(line);

+			if (ps==null || ps.size()<2)

+				continue;

+

+			List<ParseTreeChunk> vps = ps.get(1);

+

+			extractNounPhrasesWithSentiments(ps.get(0));

+

+			line = substituteSentimentSynonyms(line, ps);

+

+			if (vps==null)

+				continue;

+			boolean bVerbRule = false;

+			if (vps.size()==1)

+				line = rePhraser.rePhrase(line);

+			else {

+				if (vps.size()>1)

+

+					for (ParseTreeChunk v: vps){

+						String verbLemma = v.getLemmas().get(0);

+						String newVerb = filter.getSynonym(verbLemma);

+						if (newVerb!=null && newVerb.length()>3 && verbLemma.length()>3 // both old and new words should be above 3

+								&& !newVerb.endsWith("ness") // empirical rule

+								&& !verbsShouldStayNoSubstition.contains(verbLemma) &&

+								!verbsShouldStayNoSubstition.contains(newVerb)	){

+							line = line.replace(verbLemma+" ", newVerb+" "); 	

+							line = line.replace(" "+verbLemma, " "+newVerb); 

+							System.out.println("Synonym for verb substitution: "+verbLemma + "->"+newVerb);

+							bVerbRule = true;

+						}

+					}

+				if (!bVerbRule && vps.size()==2 && Math.random()>0.8) // no other means of originalization worked, so do inverse translation

+					line = rePhraser.rePhrase(line);

+			}

+			sents[i]=line;

+

+		}

+	}

+

+

+	private String substituteSentimentSynonyms(String line,

+			List<List<ParseTreeChunk>> ps) {

+		List<ParseTreeChunk> nounPhrases = ps.get(0);

+		if (nounPhrases.size()<1)

+			return line;

+

+		for(ParseTreeChunk ch: nounPhrases){

+			List<String> lemmas = ch.getLemmas();

+			for(String oldSentim:lemmas){

+				if ( sVocab.isSentimentWord(oldSentim.toLowerCase())) {

+					String newSentim = filter.getSynonym(oldSentim);

+					if (newSentim!=null && newSentim.length()>3 && !verbsShouldStayNoSubstition.contains(newSentim)

+							&& !verbsShouldStayNoSubstition.contains(oldSentim)){

+						line = line.replace(oldSentim+" ", newSentim+" "); 	

+						line = line.replace(" "+oldSentim, " "+newSentim);

+						System.out.println("Synonym for sentiment substitution: "+oldSentim + "->"+newSentim);

+					}

+				}

+			}

+		}

+

+		return line;

+	}

+

+	private void extractNounPhrasesWithSentiments(List<ParseTreeChunk> list) {

+		List<String> phrasesWithSentiments = new ArrayList<String>();

+		for(ParseTreeChunk ch: list){

+			List<String> lemmas = ch.getLemmas();

+			for(String l:lemmas){

+				if ( sVocab.isSentimentWord(l.toLowerCase())) {

+					phrasesWithSentiments.add(lemmas.toString());

+				}

+			}

+		}

+		formedPhrases.addAll(phrasesWithSentiments);

+	}

+

+	public String[] convert(String[] sents, String name, String keywordsName){

+		name = name.replace("Amazon.com:" , "").replace("Amazon.com" , "").replace("..." , " ")

+				.replace("Customer Reviews: ", "");

+

+		this.sents = sents;

+		try {

+			substituteProsCons();

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		try {

+			//insertProductNameForRefs(name);

+			insertProductNameForRefsFullNameKeywords(name, keywordsName);

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		try {

+			turnTenseToPast();

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		try {

+			turnCounterFactual();

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+

+		try {

+			substituteSynonymVerbs();

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		// remove dupes

+		this.formedPhrases = new ArrayList<String>(new HashSet<String>(this.formedPhrases));

+

+		return sents;

+

+	}

+

+	public static void main(String[] args){

+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/productsearchfe/src/test/resources");

+		SentenceOriginalizer orig = new SentenceOriginalizer("src/test/resources");

+		String[] sents = new String[] {

+				"Leave the bulky stabilization rig at home and take smooth handheld videos from any angle thanks to Optical SteadyShot image stabilization with Active Mode."

+				//"Other then that, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar."	

+		};

+		String[] res = orig.convert(sents, "VIP Product", "vv propro");

+		System.out.println(Arrays.asList(res));

+	}

+

+}

+

+/*

+ * 1.	Some Amazon specific text keeps showing up so we might want to put a filter on recurring phrases such as:

+1.	Unlimited Free Two-Day Shipping

+2.	View Larger

+3.	What's in the box

+2.	Period/stop added to punctuation marks: 

+1.	!.

+2.	?.

+3.	:.

+4.	.". 

+5.	-.

+3.	Saw some HTML formatting occasionally, such as <em></em>

+4.	Redundancy with choice phrases appearing multiple times in a single review

+5.	Specific issue with words being added at the end of the letter "s," creating nonsensical words:

+1.	It mispronouncesulphur virtually every caller'sulphur name in waysulphur that..

+2.	In fact, it'southward a rare feature that I recollect southwardhould be commonplace in any southwardurround receiver.

+6.	Adding -iness to make nonsensical words: mightinessiness, powerinessiness

+

+ */

+

+

+

+/*

+ * After using a gasoline powered chain saw for many years had to stop using because of dust and fumes made my copd worse this electric saw is great has surprising amount of power without the gas fumes..

+Nice chainsaw, works great, well built.

+The instant-stop chain is very safe, but a bit abrupt when releasing the trigger.

+I wish there were a half-way release that turned off the motor but did not engage the instant stop break.

+Pros .

+inexpensive compared to gas chainsaws, lightweight, cuts with good power, will do most anything that a gas chainsaw will do. like the automatic chain oiler and easy tension adjustment.

+Cons .

+If you are cutting larger branches and trees, a gas is better.

+However this will work on 8-10" size very well.

+Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).

+Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.

+The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.

+The "no tools needed" chain tensioner seems to be a good design..

+Is a good saw, however it came with the handle that wraps abound the left side of the saw was broken.

+The box looked good, but the saw itself was damaged.

+However, because I had a lot of tree damage in my yard, and more storms coming, I made due with it.

+Other then take, it works well, and the chain stops instantly when you let go of the trigger, or push the safety bar.

+stump w/ this E-saw.

+It keeps doing a super job.

+In terms of a replacement chain, make sure to get the Oregon S-54 (S is style of cutter, 54 means 54 links).

+The MC literature suggests use of a S-55, but it is TOO Long and will soon wind up in the trash can.

+ALSO, the MC factory installed gasket for the lube oil, between the saw and chain bar is total trash.

+When changing out the chain, pull the bar off, pull out and throw away the MC factory gasket, clean the bar and apply a piece of electrical tape, using a knife to cut out a pathway for oil to the bar.

+Will lube perfectly now!

+This is the second electric McCilloch 16" chain saw that I have owned and it is even better and more powerful than the first.

+I still use a gas chain saw out in the woods on my property but I usually do just enough cutting with it to get the logs on a trailer so I can take them bach to my shed to cut them up and save the sawdust for on my garden and flower beds as mulch.

+This electric is lighter and more powerful than my gas saw and makes short work of even 14" well-seasoned oak and poppel logs with a minimum of effort.

+I highly recommend this sae for anyone who has an electric outlet close enough to their cutting station.

+Bought this McCulloch electric chainsaw to replace an old Craftsman electric chain saw. (the Craftsman got ran over by a car).

+Compared to my old Craftsman electric chain saw, the McCulloch seems to be wonderful.

+The first test was to cut a 16" diameter oak branch, cut thru it like hot butter.

+The "no tools needed" chain tensioner seems to be a good design (design seems to be similar to that used by other manufacturers).

+Assuming. this thing keeps cutting/running the same way in the long term, then we have a winner. (note. all the electric chain saws come with cheap looking chains with cutting blades spaced very widely apart along the chain.

+To be ready for the bigger cutting jobs I sprung for a new $18 Oregon s-54 16" chain.).

+Update .

+Having used both gas and electric chain saws for more years than I care to remember, this little beauty is far more than I'd hoped for.

+Yes, it requires a cord to function and, without a handy "Current Bush", serves no useful purpose, but for trimming trees or cutting up firewood in a yard it beats H*** out of fighting the frustration when a gas saw refuses to start or remain running.

+I have another 14" electric MuCulloch along with a 16" gas Homelite and consider this to be a combination of the best qualities of both the others, the convenience of the small electric and the greater cutting ability of the gas powered Homelite.

+This little beauty appears to have as much power as the gas saw without the hassle of mixing fuel and the ongoing maintenence associated with it and cuts far faster than it's small electric brother.

+If I was forced to have a single chainsaw, in my present position(Pleanty of fire wood handy, just in need of cutting to the proper dimensions), this baby would be may choice without any douts or reservations.

+Ordered the Mcculloch 16inch electric chain saw to do some serious pruning of trees around the house which had severe frost damage.

+Although an electric chain saw, it cut through trees up to eight inches like a hot knife through butter.

+Not once did i have problems in two days of cutting.

+The big pros I noticed while using is realtively lightweight for a chainsaw and you can hold in one hand to use.

+Once you release the power switch, the chainsaw chain immediately stops!.

+This is a good thing as it keeps body parts attached.

+One nifty thing about this chainsaw is the chain tightener is outstanding once you figure how it works.

+No tools, just move the knobs and tighten, couldn't be easier and definitely beats hunting down a wrench to tighten.

+Only con is being electric, you have to watch the power cord.

+Very easy to hit extension cord if not careful.

+But it wakes you up when you are tired from your yard work.

+Let a good buddy borrow it and he was also impressed with the ease of use.

+Outstanding for jobs around you house, two thumbs up!

+The McCulloch3516F chainsaw puts an end to my problem of gas engines that don't start when I really need them to.

+I have been cutting out maple branches this summer from trees with verticillium wilt . branches up to 8 inches are no problem at all.

+This saw has an impressive safety feature. a chain brake that stops the saw instantly as soon as the trigger is released or the safety guard is pushed forward.

+I mean instantly. there is a loud clunk as the brake engages and the chain stops dead.

+This takes some getting used to, as the brake engages if you wiggle your finger while running the chainsaw, causing the chain to start and stop.

+There is no concept of "revving" the chain.

+It also means there is no "idle" speed for the chain.

+It is on or off.

+And that is safe.

+You can also consider it a safety feature that the chain has fewer cutting teeth than my gas powered saw chains.

+I don't know the relative operating RPMs .

+if they are about the same, this saw seems to cut a little slower, and fewer teeth would do that.

+This makes the saw less aggressive and less likely to pull out of your control.

+I like that.

+As I say, the cutting ability is well in excess of the 8" branches I've been dealing with.

+The oil fill is conveniently located so that you don't have to tip the saw to fill it, although a small funnel is helpful.

+Overall, I am very happy with this chainsaw.

+The saw works very well, overall.

+I have some minor complaints:.

+1.

+The chain drive gear cover requires a Phillips screwdriver to get the cover off.

+This is just dumb !.

+There's no good reason why it shouldn't have a thumbscrew similar to, but smaller than the chain tensioner thumbscrew.

+As someone pointed out, the chain gear area regularly gets clogged with oily sawdust that needs to be cleaned out.

+I can't figure out a good excuse for this design mistake.

+2 .

+The "instant chain stop" feature woks well, but the remaining motor drivetrain makes a loud howling screech until the motor actually stops.

+Makes me think there might be something wrong with the drivetrain.

+The saw seems to work well, though.

+Time will tell.

+3 .

+The oil filler neck is titled to the side, not vertical to the saw when placed on level ground.

+This makes viewing the oil stream going in and the rising oil level unnecessarily difficult.

+This is another obvious design mistake.

+4 .

+This is my first chainsaw, but it seems the bar oil reservoir is ridiculously small !.

+I have to refill it every 10 minutes of use.

+After reading other reviews for this model I immediately threw out the stock chain without ever using it and replaced it with an Oregon model S52 chain (dual chains is model ST52).

+Note that it fits fine although it is advertized as a 14 inch chain and this saw is advertized to be 16 inches.

+Go figure..

+Also, after reading about the risk of burning up the motor due to using a too lightweight extension cord, I bought a "US Wire 65100 12/3 100-Foot SJTW Orange Heavy Duty Extension Cord".

+It's heavy, alright !

+ */

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
new file mode 100644
index 0000000..467942d
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
@@ -0,0 +1,21 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+

+public class URLsWithReviewFinderByProductName {

+BingQueryRunner search = new BingQueryRunner();

+	

+	public List<String> findFacebookURLByNameAndZip(String name){

+		List<HitBase> foundFBPages = search.runSearch(name, 20);

+		List<String> results = new ArrayList<String>();

+		for(HitBase h: foundFBPages){

+			results.add(h.getUrl());

+		}

+		return results;

+	}

+	

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
new file mode 100644
index 0000000..f9fb43b
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
@@ -0,0 +1,444 @@
+package opennlp.tools.apps.review_builder;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collections;

+import java.util.List;

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.apps.WebPageExtractor;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

+import opennlp.tools.similarity.apps.utils.Utils;

+import opennlp.tools.textsimilarity.TextProcessor;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+import org.apache.commons.lang.StringUtils;

+

+public class WebPageReviewExtractor extends WebPageExtractor {

+	

+	BingAPIProductSearchManager prodman = new BingAPIProductSearchManager();

+	SentenceOriginalizer orig = null;

+		

+	public WebPageReviewExtractor(String resourceDir) {

+		orig = new SentenceOriginalizer(resourceDir);

+	}

+

+	public String[] removeDuplicates(String[] hits)

+	{

+		StringDistanceMeasurer meas = new StringDistanceMeasurer();

+

+		List<Integer> idsToRemove = new ArrayList<Integer>();

+		List<String> hitsDedup = new ArrayList<String>();

+		try

+		{

+			for (int i = 0; i < hits.length; i++)

+				for (int j = i + 1; j < hits.length; j++)

+				{

+					String title1 = hits[i];

+					String title2 = hits[j];

+					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

+						continue;

+					if (meas.measureStringDistance(title1, title2) > 0.7)

+					{

+						idsToRemove.add(j); // dupes found, later list member to

+											// be deleted

+					}

+				}

+			for (int i = 0; i < hits.length; i++)

+				if (!idsToRemove.contains(i))

+					hitsDedup.add(hits[i]);

+			if (hitsDedup.size() < hits.length)

+			{

+				System.out.println("Removed duplicates from relevant search results, including "

+					+ hits[idsToRemove.get(0)]);

+			}

+		}

+		catch (Exception e)

+		{

+			System.out.println("Problem removing duplicates from relevant images");

+		}

+

+		return hitsDedup.toArray(new String[0]);

+

+	}

+

+	public ReviewObj extractSentencesWithPotentialReviewPhrases(String url)

+	{

+		ReviewObj reviewObj = new ReviewObj();

+		int maxSentsFromPage= 20;

+		List<String[]> results = new ArrayList<String[]>();

+

+		String downloadedPage = pageFetcher.fetchPage(url, 20000);

+		if (downloadedPage == null || downloadedPage.length() < 100)

+		{

+			return null;

+		}

+

+		String pageOrigHTML = pageFetcher.fetchOrigHTML(url);

+

+		List<String> productFeaturesList = new ArrayList<String> ();

+		String[] productFeatures = StringUtils.substringsBetween(pageOrigHTML, "<li>", "</li>" );

+		if (productFeatures!=null){

+			for(String item: productFeatures ){

+				if (item.indexOf("class")>-1 || item.indexOf("www.")>-1 || item.indexOf("href")>-1)

+					continue;

+				item = item.replace("<span>","").replace("</span>","").replace("<b>","").replace("</b>","");

+				if (item.length()>80 && MinedSentenceProcessor.acceptableMinedSentence(item)==null){

+					System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+item);

+					continue;

+				}

+				productFeaturesList .add(item);

+			}

+		}

+		

+		productFeaturesList = cleanProductFeatures(productFeaturesList);

+		

+		String startArea = StringUtils.substringBetween(pageOrigHTML, "reviewHistoPop", "t of 5 stars");

+		String item =  StringUtils.substringBetween(startArea, "title=\"","ou" );

+		if (item==null){//title="4.0 out of 5 stars" ><span>4.0 out of 5 stars</span>

+			int index = pageOrigHTML.indexOf("of 5 stars\"");

+			startArea = StringUtils.substringBetween(pageOrigHTML, "of 5 stars\"", "of 5 stars");

+			item =  StringUtils.substringBetween(startArea, "<span>","ou" );

+		}

+

+		// if found, process

+		if (item!=null){

+			try {

+				float rating = Float.parseFloat(item);

+				reviewObj.setRating(rating);

+			} catch (NumberFormatException e) {

+				// TODO Auto-generated catch block

+				e.printStackTrace();

+			}

+		}

+		//productFeaturesList .add(item);

+

+		downloadedPage= downloadedPage.replace("     ", "&");

+		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");

+		String[] sents = downloadedPage.split("#");

+		List<TextChunk> sentsList = new ArrayList<TextChunk>();

+		for(String s: sents){

+			s = s.trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")

+					.replace(": ", ". ").replace("- ", ". ").

+					replace (". .",".").trim();

+			sentsList.add(new TextChunk(s, s.length()));

+		}

+

+		Collections.sort(sentsList, new TextChunkComparable());

+		String[] longestSents = new String[maxSentsFromPage];

+		int j=0;														// -1 removed

+		for(int i=sentsList.size()-1 -maxSentsFromPage; i< sentsList.size()&& j<longestSents.length; i++){

+			longestSents[j] = sentsList.get(i).text;

+			j++;

+		}

+

+		sents = cleanListOfSents(longestSents);

+		

+		sents = removeDuplicates(sents);

+		sents = verifyEnforceStartsUpperCase(sents);

+

+		reviewObj.setFeaturePhrases(productFeaturesList.toArray(new String[0]));

+		reviewObj.setOrigSentences(sents);

+

+		return reviewObj;

+	}

+

+	private String[] verifyEnforceStartsUpperCase(String[] sents) {

+		for(int i=0; i<sents.length; i++){

+			String s = sents[i];

+			s = StringUtils.trim(s);

+			String sFirstChar = s.substring(0, 1);

+			if (!sFirstChar.toUpperCase().equals(sFirstChar)){

+				s = sFirstChar.toUpperCase()+s.substring(1);

+			}

+			sents[i] = s;

+		}

+			return sents;

+	}

+

+	private List<String> cleanProductFeatures(List<String> productFeaturesList) {

+		List<String> results = new ArrayList<String>();

+		for(String feature: productFeaturesList){

+			if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)

+				continue;

+			results.add(feature);

+		}

+		return results;

+	}

+

+	protected String[] cleanListOfSents(String[] longestSents)

+	{

+		float minFragmentLength = 40, minFragmentLengthSpace=4;

+

+		List<String> sentsClean = new ArrayList<String>();

+		for (String sentenceOrMultSent : longestSents)

+		{

+			if (MinedSentenceProcessor.acceptableMinedSentence(sentenceOrMultSent)==null){

+				System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);

+				continue;

+			}

+			// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.

+			int numOfDots = sentenceOrMultSent.replace('.','&').split("&").length;

+			float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+			if ( avgSentenceLengthInTextPortion<minFragmentLength)

+				continue;

+			// o oo o ooo o o o ooo oo ooo o o oo

+			numOfDots = sentenceOrMultSent.replace(' ','&').split("&").length;

+			avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+			if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)

+				continue;

+

+			List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);

+			

+			// forced split by ',' somewhere in the middle of sentence

+			// disused - Feb 26 13

+			//furtherSplit = furtherMakeSentencesShorter(furtherSplit);

+			furtherSplit.remove(furtherSplit.size()-1);

+			for(String s : furtherSplit){

+				if (s.indexOf('|')>-1)

+					continue;

+				s = s.replace("<em>"," ").replace("</em>"," ");

+				s = Utils.convertToASCII(s);

+				sentsClean.add(s);

+			}

+		}

+

+		return (String[]) sentsClean.toArray(new String[0]);

+	}

+

+	private List<String> furtherMakeSentencesShorter(List<String> furtherSplit) {

+		int MIN_LENGTH_TO_SPLIT = 80;

+		List<String> results = new ArrayList<String>();

+		for(String sent: furtherSplit) {

+			sent = startWithCapitalSent(sent);

+			int len = sent.length(); 

+			if (len <MIN_LENGTH_TO_SPLIT)

+				results.add(sent);

+			else {

+				try {

+					int commaIndex = StringUtils.indexOf(sent, ',');

+					int lastCommaIndex = StringUtils.lastIndexOf(sent, ',');

+					int splitIndex = -1;

+					if (Math.abs(commaIndex- len/2) > Math.abs(lastCommaIndex- len/2))

+						splitIndex = commaIndex;

+					else

+						splitIndex = lastCommaIndex;

+					if (splitIndex<0)

+						results.add(sent);

+					else {

+						String sent1 = sent.substring(0, splitIndex)+". ";

+						String sent2 = startWithCapitalSent(sent.substring(splitIndex+1));

+						results.add(sent1); results.add(sent2);

+					}

+				} catch (Exception e) {

+					results.add(sent);

+					e.printStackTrace();

+				}

+

+			}

+		}

+		return results;

+	}

+

+	private String startWithCapitalSent(String sent) {

+		String firstChar = sent.substring(0,1);

+		String remainder = sent.substring(1);

+		

+		return firstChar.toUpperCase()+remainder;

+	}

+

+	public List<String> formReviewsForAProduct(String name /*long bpid, String keywordsName*/){

+		ReviewObj reviewObjTotal = null;

+		try {

+			List<HitBase> pagesForAProduct = prodman.findProductByName(name, 1);

+			reviewObjTotal = null; 

+

+			for(HitBase p: pagesForAProduct){

+				ReviewObj reviewObj = 

+						extractSentencesWithPotentialReviewPhrases(p.getUrl());

+				// init with first element

+				if (reviewObjTotal  == null)

+					reviewObjTotal = reviewObj;

+				if (reviewObj==null)

+					continue;

+				String[] afterOriginalization = orig.convert(reviewObj.getOrigSentences(), p.getTitle(), reviewObj.getKeywordsName());

+				reviewObj.setOriginalizedSentences(Arrays.asList(afterOriginalization));

+				reviewObj.setSentimentPhrases(orig.formedPhrases);

+

+				List<String> buf = reviewObjTotal.getSentimentPhrases();

+				if (orig.formedPhrases!=null && orig.formedPhrases.size()>0){

+					buf.addAll(orig.formedPhrases);

+					reviewObjTotal.setSentimentPhrases(buf);

+				}

+

+		/*		buf = reviewObjTotal.getOriginalizedSentences();

+				if (buf!=null && afterOriginalization!=null && afterOriginalization.length>0){

+					List<String> b1 = Arrays.asList(afterOriginalization);

+					List<String> b2 = new ArrayList<String>();

+					b2.addAll(buf);

+					b2.addAll(new ArrayList<String>(b1));

+					reviewObjTotal.setOriginalizedSentences(b2);

+				}

+*/

+			}

+			if (reviewObjTotal==null) return new ArrayList<String>();

+			

+			List<String> textReviews = buildManyReviewTexts(reviewObjTotal);

+

+			

+		/*	String textReview = buildText(reviewObjTotal);

+			try {

+				if (textReview!=null && textReview.length()>60)

+					ser.saveReviewsToDB(textReview, bpid, pagesForAProduct.get(0).getUrl(), pagesForAProduct.get(0).getTitle(),

+							reviewObjTotal.getSentimentPhrases().toString(), reviewObjTotal.getRating());

+			} catch (Exception e) {

+				System.out.println("Database write failed");

+			}

+			*/

+			

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		} 

+		return reviewObjTotal.getOriginalizedSentences();

+	}

+

+	private String buildText(ReviewObj reviewObj) {

+

+		String[] features = reviewObj.getFeaturePhrases();

+		List<String> sentences =reviewObj.getOriginalizedSentences();

+		StringBuffer buf = new StringBuffer();

+		int count = 0;

+		for(String sent:sentences){

+			if (sent!=null)

+				buf.append(sent+" ");

+			if (count%2==0 && count<features.length)

+				if (features[count]!=null){

+					buf.append(features[count]);

+					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 

+							||features[count].endsWith(".\"") ))

+						buf.append(". ");

+				}

+

+			if (count%5==0)

+				buf.append("\n");

+			count++;

+		}

+		return buf.toString();

+	}

+	

+	private List<String> buildManyReviewTexts(ReviewObj reviewObj) {

+

+		String[] features = reviewObj.getFeaturePhrases();

+		List<String> sentences =reviewObj.getOriginalizedSentences();

+		

+		// first count how many sentences

+				int NUM_SENTS_IN_REVIEW = 7;

+				int count=0;

+				for(String sent:sentences){

+					if (sent!=null)

+						count++;

+				}

+		int nReviews = count/NUM_SENTS_IN_REVIEW;

+		if (nReviews<1)

+			nReviews=1;

+		StringBuffer[] bufs = new StringBuffer[nReviews];

+		for(int i=0; i<bufs.length; i++){

+			bufs[i] = new StringBuffer();

+		}

+				

+		count = 0;

+		int currentRevIndex = 0;

+		for(String sent:sentences){

+			if (sent!=null)

+				bufs[currentRevIndex].append(sent+" ");

+			if (count%2==0 && count<features.length)

+				if (features[count]!=null){

+					bufs[currentRevIndex].append(features[count]);

+					if (!(features[count].endsWith("!") ||features[count].endsWith("?")||features[count].endsWith("?") 

+							||features[count].endsWith(".\"") ))

+						bufs[currentRevIndex].append(". ");

+				}

+

+			try {

+				if (bufs[currentRevIndex].toString().split(".").length>4)

+					bufs[currentRevIndex].append("\n");

+			} catch (Exception e) {

+				e.printStackTrace();

+			}

+			

+			count++;

+			currentRevIndex++;

+			if (currentRevIndex>=nReviews)

+				currentRevIndex=0;	

+		}

+		

+		List<String> results = new ArrayList<String>();

+		for(StringBuffer b:bufs){

+			String sent = b.toString().replace("!.","!").replace("?.","?");

+			results.add(sent);

+		}

+		return results;

+	}

+

+	public static void main(String[] args){

+		String resourceDir = "C:/stanford-corenlp/src/test/resources/";

+		ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor.getInstance(resourceDir); 

+			

+		//ProductFinderInAWebPage init = new ProductFinderInAWebPage("C:/workspace/relevanceEngine/src/test/resources");

+

+		WebPageReviewExtractor extractor = new WebPageReviewExtractor(resourceDir);

+		String res1[] = extractor.verifyEnforceStartsUpperCase(new String[]{ "hhhh !", "Klyn mng hghj ."});

+				

+		List<String> res = extractor.formReviewsForAProduct(//"McCulloch 16-Inch 3.5 HP Electric Chain Saw");

+				//	"WORX Electric JawSaw with Extension Handle");

+				//	"Panasonic 2-Line Digital Cordless System", 215200345l);

+				//	"Sport Silver Dial Women", 215475290);

+				//"Rectangle Area Rug", 213885290);

+				//		"40VA Replacement Transformer", 213085391);

+				//		"PSYLLIUM POWDER Food", 213185391);

+				//		"Leighton Toilet Tank", 213285391);

+				//"Samsung Knack U310 Flip Phone", 214495493);

+				//"Panasonic Cordless Phone 2 handsets", 214870820);

+				//"Winegard TV Antenna Pre-Amplifier", 211924499);

+				//"Atlona AT-HD-V18 HDMI Distribution Amplifier", 215162612);

+				//"airport express base station", 211462827);

+				//"denon  Network Streaming A/V Home Theater receiver", 209565926);

+				//"sherwood receiver 400 watts stereo", 211286714);

+				//"multizone music distribution system", 205333526);

+				//"niles zr4", 215104912);

+				//"alpine waterproof marine cd receiver", 215167695);

+				//"sherwood channel receiver dolby", 215116818);

+				//"sherwood lcd tv widescreen hdtv", 215481917);

+				//"multiroom music distribution system", 205333526);

+				//		"fusion ms compact stereo", 215649463); 

+				//"pyle pro speaker", 213265125);

+				// "apple iphone 4g",  213265325);

+				//"sherwood high performance receiver", 215394729);

+				//"sony camera housing", 211960592);

+				//"sony xl2100", 1135329203);

+				//"sony 18 megapixel-digital-camera", 215743208);

+				//"sony m470 microcassette tape recorder", 205828052);

+				//"sony monitor terminal expansion board", 213244217);

+				//"sony cybershot digital-camera", 215743207);

+				//"sony interchangeable lens handycam camcorder", 215153503);

+				//"canon powershot digital camera", 214754207);

+				//"brother ink pageyield yellow", 204743189);

+				// ?? "garmin 2200 gps navigator", 215167480);

+				"halo portable backup battery");

+

+		ProfileReaderWriter.writeReportListStr(res, "formedReviewSentences4.csv");

+

+

+		/*		

+			res=	extractor. extractSentencesWithPotentialReviewPhrases(//"http://www.sitbetter.com/view/chair/ofm-500-l/ofm--high-back-leather-office-chair/");

+		//"http://www.amazon.com/OFM-High-Back-Leather-Integral-Headrest/dp/B002SIW1E0/ref=sr_1_1?ie=UTF8&qid=1353370254&sr=8-1&keywords=OFM-High-Back-Leather-Integral-Headrest");

+		//"http://www.amazon.com/Oregon-511AX-Chain-Grinder-Sharpener/dp/B0000AX0CY/ref=sr_1_4?s=industrial&ie=UTF8&qid=1353373435&sr=1-4&keywords=chain+saws");

+			//			"http://www.amazon.com/Bearing-UCP204-12-Housing-Mounted-Bearings/dp/B002BBIYWM/ref=sr_1_1?s=industrial&ie=UTF8&qid=1353373786&sr=1-1&keywords=pillow+block+bearing");

+			"http://www.amazon.com/ShelterLogic-20--Feet-Auto-Shelter/dp/B001OFNK8O/ref=sr_1_1?s=lawn-garden&ie=UTF8&qid=1353376677&sr=1-1&keywords=shelterlogic+62680+autoshelter+portable+garage+carport");			

+						System.out.println(res);

+		 */			

+

+	}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
new file mode 100644
index 0000000..0b99fc2
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
@@ -0,0 +1,171 @@
+package opennlp.tools.apps.utils.email;

+

+import java.util.Properties;

+import java.util.regex.Matcher;

+import java.util.regex.Pattern;

+import javax.mail.*;

+import javax.mail.internet.*;

+import javax.activation.*;

+/**

+ * Responsible to sending e-mails trough a gmail smtp server.

+ * It will be extended to handle arbitrary smtp servers.

+ * @author GaDo

+ *

+ */

+public class EmailSender {

+		private static final long serialVersionUID = 1L;

+		private static final String mailboxAddress="bgalitsky@hotmail.com";

+

+		public  boolean sendMail(String smtp, String user, String pass, InternetAddress from, InternetAddress[] to, InternetAddress[] cc, InternetAddress[] bcc, String subject, String body, String file) throws Exception

+		{

+			boolean correct=true;

+			try

+			{							

+				//Eliminate spaces from addresses

+				if(from!=null){		

+					from.setAddress(from.getAddress().replace(" ","").trim());		}

+					to = eliminateSpaces(to);

+					cc = eliminateSpaces(cc);

+					bcc = eliminateSpaces(bcc);

+					correct = validateAddress(from,to,cc,bcc);

+				

+				if(correct){

+					//Configuracio of the properties -> smtp

+					Properties props = new Properties();

+					props.put("mail.smtp.host", smtp);

+					props.put("mail.smtp.auth", "true");

+					props.put("mail.smtp.port", "587");

+					props.put("mail.smtp.starttls.enable", "true");

+					Authenticator auth = new SMTP_Authenticator	(user, pass);

+					Session session = Session.getInstance(props, auth);

+					//Session session = Session.getDefaultInstance(props);

+					//props.put("mail.smtp.user",user);

+					//props.put("mail.smtp.password",pass);

+												    

+				    //Composing the message

+				    MimeMessage message = new MimeMessage(session);

+				      message.setFrom(from);

+				    message.setRecipients(Message.RecipientType.TO,to);

+				    message.setRecipients(Message.RecipientType.CC,cc);

+				    message.setRecipients(Message.RecipientType.BCC,bcc);

+				    message.setSubject(subject);

+				    if(file==null)

+				    {

+				    	

+					    //message.setText(body);

+				    	message.setContent(body, "text/html");

+				    }

+				    else

+				    {

+					    Multipart multipart = new MimeMultipart();

+					    BodyPart messageBodyPart;

+					    messageBodyPart = new MimeBodyPart();

+					    messageBodyPart.setContent(body, "text/html");

+					    //messageBodyPart.setText(body);

+					    multipart.addBodyPart(messageBodyPart);

+					    messageBodyPart = new MimeBodyPart();

+					    DataSource source = new FileDataSource(file);

+					    messageBodyPart.setDataHandler(new DataHandler(source));

+					    messageBodyPart.setFileName(file);

+					    multipart.addBodyPart(messageBodyPart);

+		

+					    message.setContent(multipart);

+				    }

+		

+					Transport tr = session.getTransport("smtp");			

+					tr.connect(smtp, mailboxAddress, pass);

+					message.saveChanges();

+					tr.sendMessage(message, message.getAllRecipients());

+					tr.close();

+				}

+		    }

+			catch(Exception e)

+			{

+				e.printStackTrace();

+				correct=false;

+			}

+			return correct;

+		}

+

+		private  boolean validateAddress(InternetAddress from,

+				InternetAddress[] to, InternetAddress[] cc,

+				InternetAddress[] bcc) {

+			boolean correct = true;

+			try{

+				correct = from!=null && !from.getAddress().equals("") && to!=null && to.length>=1;

+				String regEx="[^\\s]+@[^\\s]+.[^\\s]+";

+				Pattern pc = Pattern.compile(regEx);

+				Matcher m = null ;

+

+				if(correct){

+					m = pc.matcher(from.getAddress());

+					correct = m.matches();

+				}

+				

+				if(correct){

+					int vault = to.length;

+					while(correct && vault<to.length){

+						correct = !to[vault].getAddress().equals("");

+						if(correct){

+					    	m = pc.matcher(to[vault].getAddress());

+					    	correct = m.matches();

+						}

+						vault++;

+					}

+				}

+				

+				if(correct && cc!=null){

+					int vault = cc.length;

+					while(correct && vault<cc.length){

+						correct = !cc[vault].getAddress().equals("");

+						if(correct){

+					    	m = pc.matcher(cc[vault].getAddress());

+					    	correct = m.matches();

+						}

+						vault++;

+					}

+				}

+				

+				if(correct && bcc!=null){

+					int vault = bcc.length;

+					while(correct && vault<bcc.length){

+						correct = !bcc[vault].getAddress().equals("");

+						if(correct){

+					    	m = pc.matcher(bcc[vault].getAddress());

+					    	correct = m.matches();

+						}

+						vault++;

+					}

+				}

+				

+			}catch(Exception e){

+				e.printStackTrace();

+				correct=false;

+			}

+			return correct;

+		}

+

+		private  InternetAddress[] eliminateSpaces(InternetAddress[] address) {

+			if(address!=null){

+				for(int i=0;i<address.length;i++){

+					address[i].setAddress(address[i].getAddress().replace(" ","").trim());

+				}

+			}

+			return address;

+		}		

+

+		

+		public static void main(String[] args){

+			EmailSender s = new EmailSender();

+			try {

+				s.sendMail("smtp.live.com", "bgalitsky@hotmail.com", "******", new InternetAddress("bgalitsky@hotmail.com"), new InternetAddress[]{new InternetAddress("bgalitsky@hotmail.com")}, new InternetAddress[]{}, new InternetAddress[]{}, 

+						"Generated content for you", "body", null);

+			} catch (AddressException e) {

+				// TODO Auto-generated catch block

+				e.printStackTrace();

+			} catch (Exception e) {

+				// TODO Auto-generated catch block

+				e.printStackTrace();

+			}

+		}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
new file mode 100644
index 0000000..a57601b
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
@@ -0,0 +1,24 @@
+package opennlp.tools.apps.utils.email;

+import javax.mail.*;

+

+

+/**

+ * This contains the required informations for the smtp authorization!

+ *

+ */

+

+public class SMTP_Authenticator extends javax.mail.Authenticator {

+	

+	private String username="bg7550@gmail.com";

+	private String password="pill0693";	

+	

+	public SMTP_Authenticator(String user, String pwd) {

+		username=user;

+		password=pwd;

+	}

+

+		

+	public PasswordAuthentication getPasswordAuthentication() {

+		return new PasswordAuthentication(username, password);

+		}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/FeatureSpaceCoverageProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/FeatureSpaceCoverageProcessor.java
new file mode 100644
index 0000000..89d12e4
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/FeatureSpaceCoverageProcessor.java
@@ -0,0 +1,317 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.jsmlearning;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+

+import org.apache.commons.lang.StringUtils;

+

+public class FeatureSpaceCoverageProcessor {

+

+	public Map<String, Integer> paramMap = new HashMap<String, Integer>();

+	public String[] header; 

+	String[] attributes;

+

+	public FeatureSpaceCoverageProcessor (){

+		

+	}

+

+	public void initParamMap(String[] attributes, String[] header){

+		this.header = header;

+		this.attributes = attributes;

+		for(int m=0; m<header.length; m++){

+			paramMap.put(header[m], m);

+		}

+	}

+

+

+	// distance between array and array

+	public Float calcDistance(String[] seed, String[] candidate) throws Exception {

+		if (paramMap.isEmpty())

+			throw new Exception("paramMap.isEmpty()");

+

+		Float score = 0f;

+		int p1 = paramMap.get("First Level Category");	

+		int p2 = paramMap.get("Second Level Category");

+		if (seed[p1].equals(candidate[p1])) {

+			if (seed[p2].equals(candidate[p2]))

+				score = score+0.0000001f;

+			else

+				score = score+0.01f;			

+		} else return 100000f;

+

+		try {

+			int p3 = paramMap.get("Latitude");	

+			int p4 = paramMap.get("Longitude");

+			Double latDiff = Math.abs(Double.parseDouble(seed[p3]) - Double.parseDouble(candidate[p3]));

+			Double longDiff = Math.abs(Double.parseDouble(seed[p4]) - Double.parseDouble(candidate[p4]));

+			if (latDiff>1 || longDiff>1)

+				return 1000000f;

+			else 

+				score+= latDiff.floatValue()/100.0f + longDiff.floatValue()/100.0f;

+		} catch (Exception e) {

+			return 1000000f;

+		}

+

+

+		return score;

+	}

+

+	// distance between matrix and array

+	public Float calcDistance(String[][] seed, String[] candidate) throws Exception {

+		if (paramMap.isEmpty())

+			throw new Exception("paramMap.isEmpty()");

+

+		Float score = 0f, catScore = 10000f, currCatScore=10000000f;

+

+		int p1 = paramMap.get("First Level Category");	

+		int p2 = paramMap.get("Second Level Category");

+		for(int v=0; v<seed[0].length; v++){

+			if (seed[p1][v].equals(candidate[p1])) {

+				if (seed[p2][v].equals(candidate[p2]))

+					currCatScore = 0.0000001f;

+				else

+					currCatScore = 0.01f;			

+			} 

+			if ( catScore >  currCatScore) // if found closer, update

+				catScore =  currCatScore;

+		}

+		score = catScore;

+		if (score > 1000000f)

+			return 10000000f;

+

+		Float latLongScore = 100000f, currLatLongScore = 10000000f;

+		for(int v=0; v<seed[0].length; v++){

+			try {

+				int p3 = paramMap.get("Latitude");	

+				int p4 = paramMap.get("Longitude");

+				if (seed[p3][v].equals("") || seed[p4][v].equals("") 

+						|| candidate[p3].equals("") ||  candidate[p4].equals(""))

+					continue;

+				Double latDiff = Math.abs(Double.parseDouble(seed[p3][v]) - Double.parseDouble(candidate[p3]));

+				Double longDiff = Math.abs(Double.parseDouble(seed[p4][v]) - Double.parseDouble(candidate[p4]));

+				if (!(latDiff>1 || longDiff>1))

+					currLatLongScore = latDiff.floatValue()/100.0f + longDiff.floatValue()/100.0f;

+			} catch (Exception e) {

+				//return 1000000f;

+			}

+			if (latLongScore > currLatLongScore)

+				latLongScore = currLatLongScore;

+

+		}	

+		if (latLongScore> 10000)

+			return 10000f;

+		score+=latLongScore;

+		return score;

+	}

+

+	public Integer getIdForAttributeName(String key){

+		Integer res = paramMap.get(key);

+		try {

+			res.toString();

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+			System.out.println("wrong key"+key);

+		}

+		return res;

+

+	}

+

+	public String getAttribNameForId(Integer id){

+		return header[id];

+	}

+

+

+

+

+	public Map<String, String> computeIntersection(String[] line1,

+			String[] line2) {

+

+		Map<String, String> attr_value = new HashMap<String, String>();

+		for(String attr: attributes){

+			int attrIndex = getIdForAttributeName(attr);

+			String v1 = line1[attrIndex].toLowerCase().replace("\"", "").replace(",  ", ", ").replace(", ", ",");;

+			String v2 = line2[attrIndex].toLowerCase().replace("\"", "").replace(",  ", ", ").replace(", ", ",");;

+			String valArr1Str = StringUtils.substringBetween(v1, "{", "}");

+			String valArr2Str = StringUtils.substringBetween(v2, "{", "}");

+			if (valArr1Str==null || valArr2Str==null) { // we assume single value, not an array of values

+				if (v1.equals(v2)){

+					attr_value.put(attr, v1);

+				}

+			}

+			else {

+				valArr1Str = valArr1Str.replaceAll(", ", ",");

+				valArr2Str = valArr2Str.replaceAll(", ", ",");

+				String[] valArr1 = valArr1Str.split(",");

+				String[] valArr2 = valArr2Str.split(","); 

+				List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1));

+				List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2));

+				valList1.retainAll(valList2);

+				/* verification of coverage

+				valList1.retainAll(valList2);

+				

+				List<String> vl1 = new ArrayList<String>(Arrays.asList(valArr1));

+				valList1.retainAll(vl1); */

+				

+				if (!valList1.isEmpty()){

+					v1 = "{"+valList1.toString().replace("["," ").replace("]", " ").trim()+"}";

+					attr_value.put(attr, v1);

+				}

+

+			}		    		

+		}

+			return attr_value;

+	}

+

+

+		public boolean ruleCoversCase(Map<String, String> attr_value, String[] line){

+			boolean soFarCovers = true;		

+			for(String attr: attributes){

+				int attrIndex = getIdForAttributeName(attr);

+				String rule = attr_value.get(attr);

+				if (rule == null)

+					continue; // no constraint

+				rule = rule.toLowerCase().replace("\"", "").replace(",  ", ",").replace(", ", ",");

+				String vCase = line[attrIndex].toLowerCase().replace("\"", "").replace(",  ", ",").replace(", ", ",");

+				if (vCase==null){// rule for this attribute exists but case has no value

+					soFarCovers = false;

+					return false;

+				}

+				

+				String valArrCaseStr = StringUtils.substringBetween(vCase, "{", "}");

+				String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}");

+				if (valArrCaseStr==null || valArrRuleStr==null) { // we assume single value, not an array of values

+					if (!vCase.equals(rule)){

+						soFarCovers = false;

+						return false;

+					}

+				}

+				else {

+					String[] valArrCase = valArrCaseStr.split(",");

+					String[] valArrRule = valArrRuleStr.split(","); 

+					List<String> valListCase = new ArrayList<String>(Arrays.asList(valArrCase));

+					List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule));

+					

+					int ruleSize = valListRule.size();

+					//System.out.println(valListRule);

+					//System.out.println(valListCase);

+					

+					// rule members are subset of case

+					valListRule.retainAll(valListCase);

+					

+					//System.out.println(valListRule);

+					

+					if (ruleSize != valListRule.size()){

+						soFarCovers = false;

+						return false;

+					}

+					

+					

+					

+				}		    		

+			}

+			return  soFarCovers;

+		}

+		

+		public boolean ruleCoversRule(Map<String, String> attr_value, Map<String, String> line){

+			boolean soFarCovers = true;		

+			for(String attr: attributes){

+				int attrIndex = getIdForAttributeName(attr);

+				String rule = attr_value.get(attr);

+				if (rule == null)

+					continue; // no constraint

+				

+				String vRuleBeingCovered = line.get(attr);

+				if (vRuleBeingCovered==null){// rule for this attribute exists but RuleBeingCovered has no value

+					soFarCovers = false;

+					return false;

+				}

+				

+				String valArrRuleBeingCoveredStr = StringUtils.substringBetween(vRuleBeingCovered, "{", "}");

+				String valArrRuleStr = StringUtils.substringBetween(rule, "{", "}");

+				if (valArrRuleBeingCoveredStr==null || valArrRuleStr==null) { // we assume single value, not an array of values

+					if (!vRuleBeingCovered.equals(rule)){

+						soFarCovers = false;

+						return false;

+					}

+				}

+				else {

+					String[] valArrRuleBeingCovered = valArrRuleBeingCoveredStr.split(",");

+					String[] valArrRule = valArrRuleStr.split(","); 

+					List<String> valListRuleBeingCovered = new ArrayList<String>(Arrays.asList(valArrRuleBeingCovered));

+					List<String> valListRule = new ArrayList<String>(Arrays.asList(valArrRule));		

+					for(String r: valListRule){

+						if (!strListContainsMember(valListRuleBeingCovered, r)){

+							soFarCovers = false;

+							return false;

+						} 

+					}

+

+				}		    		

+			}

+			return  soFarCovers;

+		}

+

+		public Map<String, String> computeIntersection(

+				Map<String, String> rule1, Map<String, String> rule2) {

+			Map<String, String> attr_value = new HashMap<String, String>();

+			for(String attr: attributes){

+				int attrIndex = getIdForAttributeName(attr);

+				String v1 = rule1.get(attr);

+				String v2 = rule2.get(attr);

+				if (v1==null || v2==null)

+					continue;

+				String valArr1Str = StringUtils.substringBetween(v1, "{", "}");

+				String valArr2Str = StringUtils.substringBetween(v2, "{", "}");

+				if (valArr1Str==null || valArr2Str==null) { // we assume single value, not an array of values

+					if (v1.equals(v2)){

+						attr_value.put(attr, v1);

+					}

+				}

+				else {

+					valArr1Str = valArr1Str.replaceAll(", ", ",");

+					valArr2Str = valArr2Str.replaceAll(", ", ",");

+					String[] valArr1 = valArr1Str.split(",");

+					String[] valArr2 = valArr2Str.split(","); 

+					List<String> valList1 = new ArrayList<String>(Arrays.asList(valArr1));

+					List<String> valList2 = new ArrayList<String>(Arrays.asList(valArr2));

+					valList1.retainAll(valList2);

+					if (!valList1.isEmpty()){

+						v1 = "{"+valList1.toString().replace("["," ").replace("]", " ").trim()+"}";

+						attr_value.put(attr, v1);

+					}

+

+				}		    		

+			}

+				return attr_value;

+		}

+

+		private boolean strListContainsMember(List<String> valListCase, String r) {

+			boolean bContains = false;

+			for(String m: valListCase){

+				if (m.startsWith(r) || r.startsWith(m))

+					return true;

+				

+			}

+			return false;

+		}

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/IntersectionSetBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/IntersectionSetBuilder.java
new file mode 100644
index 0000000..e1d748e
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/IntersectionSetBuilder.java
@@ -0,0 +1,361 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.jsmlearning;

+

+import java.util.ArrayList;

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+

+/*

+ * 

+ * The rule is in the form

+The report also shows how many positive cases are covered by this rule (should be 0) and how many negative cases 

+are covered by this rule (should be above 1)

+

+The rule

+{plugin_number=3, service_type=all, mime_type_number=11, review_status=pass}	0	192

+

+should be read as 

+

+plugin_number=3 & service_type=all & mime_type_number=11 & review_status=pass

+

+For a single-attribute, its value should be the one from this rule. For a multi-value attribute, the set of values in the case

+should INCLUDE the set of values from the rule.

+

+The rule checking that a case belongs to the negative set is a disjunction of all rules in the result file.

+

+input: two data files, one is negative set and another is positive set.

+in the argument, just the negative file needs to be specified:

+".../negativeSet1.csv", 

+then the system assumes that the filename for negative is obtained by replacing 'negative' with 'positive'

+".../positiveSet1.csv", 

+

+The set of attribute in analysis is hard coded

+

+

+ */

+public class IntersectionSetBuilder{

+	private FeatureSpaceCoverageProcessor distProcessorPos, distProcessorNeg;

+	private float percentageOfAllowedSetCover = 0.001f;

+	//The set of attribute in analysis is hard coded

+	String[] fieldsToAggr = new String[]{

+			"reason_code",	"risk_rating", "service_type", 	"device_match_result", 	"device_result", 	"http_referer", 	"device_id_reason_code",

+			"review_status", "tcp_os_sig_ttl", "tcp_connection_type",

+			"mime_type_number", "plugin_number", "http_connection_type", "device_last_event", "http_connection_type"

+

+

+	};

+	public IntersectionSetBuilder() {};

+	

+	/*

+	 * Takes a file generated by public String ruleFormer(String dataFile)

+	 * and performs verification of coverage for positive and negative set, as well as dedupe of rules

+	 * The input for negative positive data set is the same as the above function.

+	 * The second argument is the rule file generated by the above.

+	 * Outputs the verified rule file.

+	 */

+

+	public void ruleVerifier(String dataFile, String ruleFile){

+

+

+		List<String[]> negativeSet = ProfileReaderWriter.readProfiles(dataFile); 

+		List<String[]> positiveSet = ProfileReaderWriter.readProfiles(dataFile.replace("negative", "positive")); 

+		distProcessorPos = new FeatureSpaceCoverageProcessor(); distProcessorNeg = new FeatureSpaceCoverageProcessor();

+		distProcessorNeg.initParamMap( 	fieldsToAggr, negativeSet.get(0));		

+		distProcessorPos.initParamMap(fieldsToAggr, positiveSet.get(0));		

+		negativeSet.remove(0); positiveSet.remove(0);

+		

+		List<String[]> ruleStrings = ProfileReaderWriter.readProfiles(ruleFile);

+		List<Map<String, String>> rules = new ArrayList<Map<String, String>>(), dedupedRules = new ArrayList<Map<String, String>>() ;

+		for(String[] l : ruleStrings){

+			Map<String, String> rule = new HashMap<String, String>();

+			String lstr = l[0].substring(1, l[0].length()-1);

+			String[] ruleStr= lstr.split(",");

+			for(String attr_valueStr: ruleStr){

+				String[] attr_value =  attr_valueStr.split("=");	

+				if (attr_value.length==2)

+					rule.put(attr_value[0].trim(), attr_value[1].trim());

+				else if (attr_value.length==1)

+					rule.put(attr_value[0].trim(),"");

+				else

+					System.err.println("Problem parsing rule file "+lstr);

+			}

+			rules.add(rule);

+		}

+		

+		

+		for(int i=0; i<rules.size(); i++){

+			boolean bCovered = false;

+		

+			for(int j=i+1; j<rules.size(); j++){

+				if (distProcessorNeg.ruleCoversRule(rules.get(j), rules.get(i))){

+					bCovered = true;

+				}

+			}

+			if (!bCovered)

+				dedupedRules.add(rules.get(i));

+		}

+		

+		rules = dedupedRules;

+

+		List<String[]> output = new ArrayList<String[]>();

+		output.add(new String[]{"rule", "# covers positive", "# covers negative"});

+		for(Map<String, String> rule: rules){

+			int countCoverNeg = 0, countCoverPos=0;

+			for(String[] line: positiveSet){

+				if (distProcessorPos.ruleCoversCase(rule, line)){

+					countCoverPos++;

+				}

+			}

+			for(String[] line: negativeSet){

+				if (distProcessorNeg.ruleCoversCase(rule, line)){

+					countCoverNeg++;

+				}

+

+			}

+			output.add(new String[]{rule.toString(), new Integer(countCoverPos).toString(), new Integer(countCoverNeg).toString()});	

+

+		}

+		ProfileReaderWriter.writeReport(output, ruleFile+"Verif1.csv");

+	}

+	

+	

+	/*

+	 * Takes one argument for negative training set file, assumes the positive filename is formed by replacing 'negative'->'positive'

+	 * Outputs the filename with generated rules

+	 * 

+	 */

+	public String ruleFormer(String dataFile){

+

+

+		List<String[]> negativeSet = ProfileReaderWriter.readProfiles(dataFile); 

+		List<String[]> positiveSet = ProfileReaderWriter.readProfiles(dataFile.replace("negative", "positive")); 

+		distProcessorPos = new FeatureSpaceCoverageProcessor(); distProcessorNeg = new FeatureSpaceCoverageProcessor();

+		distProcessorNeg.initParamMap( 	fieldsToAggr, negativeSet.get(0));		

+		distProcessorPos.initParamMap(fieldsToAggr, positiveSet.get(0));		

+		negativeSet.remove(0); positiveSet.remove(0);

+

+		List<Map<String, String>> intersections = formIntersectionAmongMembersOfTrainingSetAndVerifyThatDoesNotCoverOppositeTrainingS(negativeSet, positiveSet);

+		List<Map<String, String>> superIntersections = formIntersections(intersections, negativeSet, positiveSet);

+

+		List<String[]> output = new ArrayList<String[]>();

+		for(Map<String, String> rule: superIntersections){

+			int countCover = 0;

+			for(String[] line: positiveSet){

+				if (distProcessorPos.ruleCoversCase(rule, line)){

+					countCover++;

+				}

+			}

+			output.add(new String[]{rule.toString(), new Integer(countCover).toString()});	

+

+		}

+		String outputFile = "learnedRulesForNegativeSetJune23-1.csv";

+		ProfileReaderWriter.writeReport(output, outputFile);

+		return outputFile; 

+

+	}

+

+	private List<Map<String, String>> formIntersections(List<Map<String, String>> intersectionsIn, List<String[]> negativeSet, List<String[]> positiveSet) {

+		List<Map<String, String>> intersectionsNew = new ArrayList<Map<String, String>>();

+		for(int i=0; i<intersectionsIn.size(); i++){

+			for(int j=i+1; j<intersectionsIn.size(); j++){

+				Map<String, String> intersection = distProcessorNeg.computeIntersection(intersectionsIn.get(i), intersectionsIn.get(j));

+				if (intersection.isEmpty())

+					continue;

+				

+				int countCover = 0;

+				for(String[] line: positiveSet){

+					if (distProcessorPos.ruleCoversCase(intersection, line)){

+						//countCover++;

+						countCover = 10000000;

+						break;

+					}

+				}

+				float cover = (float)countCover/(float)positiveSet.size();

+				if (!(cover<this.percentageOfAllowedSetCover))

+					continue;

+

+				List<Map<String, String>> rulesToBeRemoved = new ArrayList<Map<String, String>>();

+				boolean nothingCoversThisRule = true;

+				for(Map<String, String> intersChecker: intersectionsIn){ // more general rule covers more specific

+					if (distProcessorNeg.ruleCoversRule(intersChecker, intersection)){

+						nothingCoversThisRule = false;

+						break;

+					} // now check if this new rule defeats built rules

+					if (distProcessorNeg.ruleCoversRule( intersection, intersChecker)){

+						rulesToBeRemoved.add(intersChecker); 

+					}

+				}

+				if(nothingCoversThisRule){

+					intersectionsNew.add(intersection);

+					intersectionsNew.removeAll(rulesToBeRemoved);

+				}

+			}

+		}

+		intersectionsNew.addAll(intersectionsIn);

+		return intersectionsNew;

+	}

+

+	private List<Map<String, String>> formIntersectionAmongMembersOfTrainingSetAndVerifyThatDoesNotCoverOppositeTrainingS(List<String[]> negativeSet, List<String[]> positiveSet){

+		List<Map<String, String>> intersections = new ArrayList<Map<String, String>>();

+

+		for(int i=0; i<negativeSet.size() && i<1000; i++){

+			for(int j=i+1; j<negativeSet.size(); j++){

+				Map<String, String> intersection = distProcessorNeg.computeIntersection(negativeSet.get(i), negativeSet.get(j));

+				if (intersection.isEmpty())

+					continue;

+				

+				/* temporary code that formed rule covers at least 2 cases

+				int countCoverNeg=0;

+				for(String[] line: negativeSet){

+					if (distProcessorNeg.ruleCoversCase(intersection, line)){

+						countCoverNeg++;

+					}

+

+				} 

+				if (countCoverNeg<2){

+					System.err.println("A rule formed but it does not cover its origin! "+intersection);

+					distProcessorNeg.ruleCoversCase(intersection, negativeSet.get(i));

+					distProcessorNeg.ruleCoversCase(intersection, negativeSet.get(j));

+				} */

+				

+				

+				

+				int countCover = 0;

+				for(String[] line: positiveSet){

+					if (distProcessorPos.ruleCoversCase(intersection, line)){

+						//countCover++;

+						countCover = 10000000;

+						break;

+					}

+				}

+				float cover = (float)countCover/(float)positiveSet.size();

+				if (!(cover<this.percentageOfAllowedSetCover))

+					continue;

+

+				List<Map<String, String>> rulesToBeRemoved = new ArrayList<Map<String, String>>();

+				boolean nothingCoversThisRule = true;

+				for(Map<String, String> intersChecker: intersections){ // more general rule covers more specific

+					if (distProcessorNeg.ruleCoversRule(intersChecker, intersection)){

+						nothingCoversThisRule = false;

+						break;

+					} // now check if this new rule defeats built rules

+					if (distProcessorNeg.ruleCoversRule( intersection, intersChecker)){

+						rulesToBeRemoved.add(intersChecker); 

+					}

+				}

+				if(nothingCoversThisRule){

+					intersections.add(intersection);

+					intersections.removeAll(rulesToBeRemoved);

+				}

+			}

+		}

+		return intersections;

+	}

+

+	private List<Map<String, String>> filterIntersectionsByOppositeTrainingSet(List<Map<String, String>> intersections, List<String[]> positiveSet){

+		List<Map<String, String>> filteredIntersections = new ArrayList<Map<String, String>>();

+		for(Map<String, String> rule: intersections){

+			int countCover = 0;

+			for(String[] line: positiveSet){

+				if (!distProcessorPos.ruleCoversCase(rule, line))

+					countCover++;

+			}

+			if ((float)countCover/(float)positiveSet.size()<this.percentageOfAllowedSetCover)

+				filteredIntersections.add(rule);

+

+		}

+		return filteredIntersections;

+	}

+

+    public boolean applyRule(String[] sample){

+    	return true;

+    	// todo implement singleton which reads rule file and applies them

+    	

+    }

+

+	public static void main(String[] args){

+		IntersectionSetBuilder iBuilder = new IntersectionSetBuilder ();

+		

+		// builds the set of rules

+	    String resFile = iBuilder.ruleFormer("C:/workspace/relevanceEngine/src/test/resources/maps/anomaly/negativeSet1.csv");

+		// verifies and cleans the rules

+		iBuilder.ruleVerifier("C:/workspace/relevanceEngine/src/test/resources/maps/anomaly/negativeSet1.csv", 

+				"C:/workspace/relevanceEngine/learnedRulesForNegativeSetJune23-1.csv");

+

+	}

+

+}

+

+/*

+ * 

+ * datetime

+browser_language

+browser_string

+device_first_seen

+device_match_result

+http_os_signature

+http_os_sig_raw

+os

+device_id_reason_code

+true_ip

+proxy_ip

+http_os_sig_adv_mss

+http_os_sig_snd_mss

+http_os_sig_rcv_mss

+http_os_sig_ttl

+http_connection_type

+device_last_event

+flash_lang

+flash_os

+flash_version

+os_fonts_number

+plugin_adobe_acrobat

+plugin_flash

+plugin_silverlight

+plugin_windows_media_player

+profiling_datetime

+screen_res

+tcp_os_signature

+tcp_os_sig_raw

+time_zone

+time_zone_dst_offset

+profile_api_timedelta

+mime_type_number

+plugin_number

+plugin_quicktime

+plugin_java

+fuzzy_device_id_confidence

+fuzzy_device_match_result

+fuzzy_device_last_event

+fuzzy_device_first_seen

+true_ip_city

+true_ip_first_seen

+true_ip_geo

+true_ip_latitude

+true_ip_longitude

+account_email_first_seen

+shipping_address_first_seen

+tcp_os_ sig_ttl

+tcp_connection_type

+page_time_on

+policy_score

+reason_code

+review_status

+risk_rating

+ */

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
new file mode 100644
index 0000000..9081e1a
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
@@ -0,0 +1,140 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.jsmlearning;

+

+import java.io.FileNotFoundException;

+import java.io.FileReader;

+import java.io.IOException;

+import java.io.PrintWriter;

+import java.util.ArrayList;

+import java.util.List;

+

+import au.com.bytecode.opencsv.CSVReader;

+import au.com.bytecode.opencsv.CSVWriter;

+

+public class ProfileReaderWriter {

+	public static List<String[]> readProfiles(String filename) {

+		CSVReader reader = null;

+		List<String[]> profiles = null;

+		try	{

+			reader = new CSVReader(new FileReader(filename), ',');

+			profiles = reader.readAll();

+		} catch (FileNotFoundException e) {

+			e.printStackTrace();

+		} catch (IOException ioe) {

+			ioe.printStackTrace();

+		}

+		return profiles;

+	}

+	

+	public static List<String[]> readProfiles(String filename, char delimiter) {

+		CSVReader reader = null;

+		List<String[]> profiles = null;

+		try	{

+			reader = new CSVReader(new FileReader(filename), delimiter);

+			profiles = reader.readAll();

+		} catch (FileNotFoundException e) {

+			e.printStackTrace();

+		} catch (IOException ioe) {

+			ioe.printStackTrace();

+		}

+		return profiles;

+	}

+

+	public static void writeReportArr( String[][] allLines, String reportName){

+		List<String[]> rep = new ArrayList<String[]>();

+		for(String[] line: allLines){

+			rep.add(line);

+		}

+		writeReport( rep, reportName);

+	}

+

+	public static void writeReport( List<String[]> allLines, String reportName){

+		CSVWriter writer = null;

+		try {	

+			writer = new CSVWriter(new PrintWriter(reportName));			

+		} catch (FileNotFoundException e) {

+			e.printStackTrace();

+		}		

+		writer.writeAll(allLines);

+

+		try {

+			writer.flush();

+			writer.close();

+		} catch (IOException e) {

+			e.printStackTrace();

+		}

+	}

+

+	public static void writeReport( List<String[]> allLines, String reportName, char delimiter){

+		CSVWriter writer = null;

+		try {	

+			writer = new CSVWriter(new PrintWriter(reportName), delimiter, delimiter, delimiter);			

+		} catch (FileNotFoundException e) {

+			e.printStackTrace();

+		}	

+

+		writer.writeAll(allLines);

+

+		try {

+			writer.flush();

+			writer.close();

+		} catch (IOException e) {

+			e.printStackTrace();

+		}

+	}

+	

+	public static void appendReport( List<String[]> allLines, String reportName, char delimiter){

+		List<String[]> previous;

+		try {

+			previous = readProfiles(reportName);

+			allLines.addAll(previous);

+		} catch (Exception e1) {

+			System.out.println("Creating file "+reportName);

+		}

+		

+		CSVWriter writer = null;

+		try {	

+			writer = new CSVWriter(new PrintWriter(reportName), delimiter, delimiter, delimiter);			

+		} catch (FileNotFoundException e) {

+			e.printStackTrace();

+		}	

+

+		writer.writeAll(allLines);

+

+		try {

+			writer.flush();

+			writer.close();

+		} catch (IOException e) {

+			e.printStackTrace();

+		}

+	}

+

+	public static void writeReportListStr(List<String> res, String string) {

+		// TODO Auto-generated method stub

+

+	}

+

+	public static void main(String[] args){

+		List<String[]> allLines = new ArrayList<String[]>();

+		allLines.add(new String[] {"aa " , "  bb", "ccc" });

+		ProfileReaderWriter.writeReport( allLines, "reportName.txt", ' ');

+

+	}

+

+

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/TreeKernelRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/TreeKernelRunner.java
new file mode 100644
index 0000000..88179b0
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/TreeKernelRunner.java
@@ -0,0 +1,131 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.jsmlearning;

+

+import java.io.BufferedReader;

+import java.io.File;

+import java.io.IOException;

+import java.io.InputStream;

+import java.io.InputStreamReader;

+

+public class TreeKernelRunner {

+	private void runEXE(String[] command, String runPath){

+		Runtime r = Runtime.getRuntime();

+		Process mStartProcess = null;

+		try {

+			mStartProcess = r.exec( command, null, new File(runPath));

+		} catch (IOException e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+

+		StreamLogger outputGobbler = new StreamLogger(mStartProcess.getInputStream());

+		outputGobbler.start();

+

+		try {

+			int returnCode = mStartProcess.waitFor();

+		} catch (InterruptedException e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+	}

+

+	public void runLearner(String dir, String learning_file, String  model_file)

+	{

+		dir = dir.replace('/', '\\');

+		

+		if (!dir.endsWith("\\"))

+				dir+="\\";

+		String[] runString = new String[]{dir+"svm_learn.exe","-t", "5", dir+learning_file,  dir+model_file};

+		runEXE(runString, dir);

+	}

+	

+	

+	//svm_classify example_file model_file predictions_file

+	public void runClassifier(String dir, String example_file, String  model_file, String predictions_file)

+	{

+		dir = dir.replace('/', '\\');

+		

+		if (!dir.endsWith("\\"))

+				dir+="\\";

+		String[] runString = new String[]{dir+"svm_classify.exe", dir+example_file,  dir+model_file, dir+predictions_file};

+		runEXE(runString, dir);

+	}

+

+	class StreamLogger extends Thread{

+

+		private InputStream mInputStream;

+

+		public StreamLogger(InputStream is) {

+			this.mInputStream = is;

+		}

+

+		public void run() {

+			try {

+				InputStreamReader isr = new InputStreamReader(mInputStream);

+				BufferedReader br = new BufferedReader(isr);

+				String line = null;

+				while ((line = br.readLine()) != null) {

+					System.out.println(line);

+				}

+			} catch (IOException ioe) {

+				ioe.printStackTrace();

+			}

+		}

+

+	}

+	

+	public static void main(String[] args){

+		TreeKernelRunner runner = new TreeKernelRunner();

+		runner.runLearner("C:\\stanford-corenlp\\tree_kernel\\", "training.txt", "arg0.model1.txt");

+		runner.runClassifier("C:\\stanford-corenlp\\tree_kernel\\", "arg0.test", "arg0.model1.txt", "arg0.output1.txt");

+	}

+}

+

+	/*

+exec:

+

+public Process exec(String command, String envp[], File dir) 

+

+

+

+   @param      command   a specified system command.

+   @param      envp      array of strings, each element of which 

+                         has environment variable settings in format

+                         <i>name</i>=<i>value</i>.

+   @param      dir       the working directory of the subprocess, or

+                         <tt>null</tt> if the subprocess should inherit

+                         the working directory of the current process.

+

+                         В ди�трибутиве два exe-файла: svm_learn.exe и svm_classify.exe.

+

+1.   svm_learn.exe берет файл � примерами, обрабатывает его, �троит файл model м правилами обучение.

+

+Примеры запу�ка: 

+svm_learn -t 5 learning_file model_file - �то �амый про�той вариант запу�ка, SubSetTreeKernel (допу�кают�� разрывы при обходе деревьев)

+

+svm_learn -t 5 -D 0 learning_file model_file - другой вариант �дра, SubTreeKernel

+

+Пример файла лежит на его �траничке. Там же опи�ание параметров.

+

+2. svm_classify.exe берет файл � те�товыми примерами, файл � моделью, по�троенный svm_learn, и запи�ывает результаты обучени� в файл predictions_file.

+

+Запу�к:     svm_classify example_file model_file predictions_file

+

+Файл имеет тот же формат, что и входные примеры. Образец лежит в архиве на �траничке Мо�китти. 

+Можно �разу же указывать, к какому кла��у отно�ит�� пример (1 или -1 в начале �троки). В �том �лучае точно�ть и полнота оценивают�� автоматиче�ки. Или �тавить там 0.

+	 */
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
index 736eb35..7f4f589 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
@@ -16,6 +16,7 @@
  */

 package opennlp.tools.nl2code;

 

+import java.io.File;

 import java.util.ArrayList;

 import java.util.Arrays;

 import java.util.List;

@@ -28,11 +29,19 @@
 public class NL2Obj {

   ObjectControlOp prevOp;

 

-  public NL2Obj() {

+  public NL2Obj(String path) {

     prevOp = new ObjectControlOp();

     prevOp.setOperatorIf("");

     prevOp.setOperatorFor("");

+    parser = ParserChunker2MatcherProcessor.getInstance(path);

   }

+  

+  public NL2Obj() {

+	    prevOp = new ObjectControlOp();

+	    prevOp.setOperatorIf("");

+	    prevOp.setOperatorFor("");

+	    parser = ParserChunker2MatcherProcessor.getInstance();

+	  }

 

   public static String[] epistemicStatesList = new String[] {

     "select", "verify", "find", "start", "stop", "go", "check"

@@ -268,6 +277,9 @@
 

 

   public static void main(String[] args){

+	  

+	String cDir = new File(".").getAbsolutePath();

+	

     String[] text = new String[]{

         "Randomly select a pixel at an image.",

         "Find a convex area this pixel belongs, so that all pixels are less than 128",      //area->REGION

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
index 706e8f6..c2e54f5 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
@@ -25,120 +25,129 @@
 

 public class NL2ObjCreateAssign extends NL2Obj {

 

-  private boolean classBeingDefined = false;

-  public static String[] declarationStatesList = new String[] {

-    "create", "assign", "set", 

-  };

+	private boolean classBeingDefined = false;

+	public static String[] declarationStatesList = new String[] {

+		"create", "assign", "set", 

+	};

 

-  public static String[] dataTypesList = new String[] {

-    "text", "double", "array", 

-  };

+	public static String[] dataTypesList = new String[] {

+		"text", "double", "array", 

+	};

 

-  public static String[] arrayElementList = new String[] {

-    "first", "second", "third", "fourth" 

-  };

+	public static String[] arrayElementList = new String[] {

+		"first", "second", "third", "fourth" 

+	};

 

-  public static String[] arrayElementListInsdex = new String[] {

-    "0", "1", "2", "3" 

-  };

+	public static String[] arrayElementListInsdex = new String[] {

+		"0", "1", "2", "3" 

+	};

 

 

-  @Override

-  public ObjectPhraseListForSentence convertSentenceToControlObjectPhrase(String sentence){

-    String expression = null;

-    if (sentence.indexOf(":")>-1){

-      expression = sentence.split(":")[1];

-      sentence = sentence.split(":")[0]+".";

-    }

+

+	public NL2ObjCreateAssign() {

+		super();

+	}

+

+	public NL2ObjCreateAssign(String path) {

+		super(path);

+	}

+

+	@Override

+	public ObjectPhraseListForSentence convertSentenceToControlObjectPhrase(String sentence){

+		String expression = null;

+		if (sentence.indexOf(":")>-1){

+			expression = sentence.split(":")[1];

+			sentence = sentence.split(":")[0]+".";

+		}

 

 

-    List<ObjectPhrase> oPhrases = new  ArrayList<ObjectPhrase>();

-    parser = ParserChunker2MatcherProcessor.getInstance();

-    List<List<ParseTreeChunk>> lingPhrases = 

-      parser.formGroupedPhrasesFromChunksForSentence(sentence);

+		List<ObjectPhrase> oPhrases = new  ArrayList<ObjectPhrase>();

+		parser = ParserChunker2MatcherProcessor.getInstance();

+		List<List<ParseTreeChunk>> lingPhrases = 

+				parser.formGroupedPhrasesFromChunksForSentence(sentence);

 

-    ObjectControlOp op = extractControlPart(lingPhrases, prevOp);

-    prevOp = op;

+		ObjectControlOp op = extractControlPart(lingPhrases, prevOp);

+		prevOp = op;

 

-    //start with verb phrases

-    List<ParseTreeChunk> actionWithObject =  lingPhrases.get(1);

-    actionWithObject.addAll( lingPhrases.get(4));

+		//start with verb phrases

+		List<ParseTreeChunk> actionWithObject =  lingPhrases.get(1);

+		actionWithObject.addAll( lingPhrases.get(4));

 

-    System.out.println("      === "+actionWithObject);

+		System.out.println("      === "+actionWithObject);

 

-    for(ParseTreeChunk verbChunk: actionWithObject){

-      List<String> lems = verbChunk.getLemmas();

-      String declarativeAction = verbChunk.getLemmas().get(0).toLowerCase();

-      if (declarativeAction.equals("define")){

-        if (verbChunk.getLemmas().get(1).toLowerCase().equals("class") ||

-            verbChunk.getLemmas().get(2).toLowerCase().equals("class")){

-          // new class

-          String className = verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase();

-          className = className.substring(0, 1).toUpperCase()+className.substring(1, className.length());

-          op.setOperatorIf("class "+className + "{");

-          op.setOperatorFor("{");

-          classBeingDefined = true;

-          break;

-        }

-        String dataType = verbChunk.getLemmas().get(1).toLowerCase();

+		for(ParseTreeChunk verbChunk: actionWithObject){

+			List<String> lems = verbChunk.getLemmas();

+			String declarativeAction = verbChunk.getLemmas().get(0).toLowerCase();

+			if (declarativeAction.equals("define")){

+				if (verbChunk.getLemmas().get(1).toLowerCase().equals("class") ||

+						verbChunk.getLemmas().get(2).toLowerCase().equals("class")){

+					// new class

+					String className = verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase();

+					className = className.substring(0, 1).toUpperCase()+className.substring(1, className.length());

+					op.setOperatorIf("class "+className + "{");

+					op.setOperatorFor("{");

+					classBeingDefined = true;

+					break;

+				}

+				String dataType = verbChunk.getLemmas().get(1).toLowerCase();

 

-        if (classBeingDefined && Arrays.asList(dataTypesList).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){

-          op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());

-          classBeingDefined = true;

-          break;

-        }

-        if (Arrays.asList(dataTypesList).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){

-          op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());

-          classBeingDefined = true;

-          break;

-        }

-      } else if (declarativeAction.equals("create")){

+				if (classBeingDefined && Arrays.asList(dataTypesList).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){

+					op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());

+					classBeingDefined = true;

+					break;

+				}

+				if (Arrays.asList(dataTypesList).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){

+					op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());

+					classBeingDefined = true;

+					break;

+				}

+			} else if (declarativeAction.equals("create")){

 

-        // now substituting array

-        if (verbChunk.getLemmas().get(1).toLowerCase().equals("array")){

+				// now substituting array

+				if (verbChunk.getLemmas().get(1).toLowerCase().equals("array")){

 

-          if(lems.contains("class")){

-            int indClass = lems.indexOf("class");

-            int numElements = lems.indexOf("elements");

-            if (numElements<0)

-              numElements = lems.indexOf("objects");

-            if (numElements<0)

-              numElements = lems.indexOf("members");

-            String arraySize = lems.get(numElements-1);

-            op.setOperatorFor(lems.get(indClass+1)+"[] "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase() 

-                +" = new "+lems.get(indClass+1)+"["+arraySize+"]");

-            classBeingDefined = false;

-            break;

-          }

-        }    

-      } else if (declarativeAction.equals("assign")){

-        int numElements = lems.indexOf("element");

-        if (numElements<0)

-          numElements = lems.indexOf("object");

-        if (numElements<0)

-          numElements = lems.indexOf("member");

-        if (Arrays.asList(arrayElementList).contains(lems.get(numElements-1))){

-          int arrIndex = Arrays.asList(arrayElementList).indexOf(lems.get(numElements-1));

-          String indexValue = arrayElementListInsdex[arrIndex]; 

+					if(lems.contains("class")){

+						int indClass = lems.indexOf("class");

+						int numElements = lems.indexOf("elements");

+						if (numElements<0)

+							numElements = lems.indexOf("objects");

+						if (numElements<0)

+							numElements = lems.indexOf("members");

+						String arraySize = lems.get(numElements-1);

+						op.setOperatorFor(lems.get(indClass+1)+"[] "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase() 

+								+" = new "+lems.get(indClass+1)+"["+arraySize+"]");

+						classBeingDefined = false;

+						break;

+					}

+				}    

+			} else if (declarativeAction.equals("assign")){

+				int numElements = lems.indexOf("element");

+				if (numElements<0)

+					numElements = lems.indexOf("object");

+				if (numElements<0)

+					numElements = lems.indexOf("member");

+				if (Arrays.asList(arrayElementList).contains(lems.get(numElements-1))){

+					int arrIndex = Arrays.asList(arrayElementList).indexOf(lems.get(numElements-1));

+					String indexValue = arrayElementListInsdex[arrIndex]; 

 

-          String arrayName = lems.get(lems.size()-1);

-          if (expression!=null)

-            op.setOperatorFor(arrayName+"["+indexValue+"]."+ expression);

-          break;

-        } 

-      } else if (declarativeAction.equals("set")){

-        int indQuantifier = lems.indexOf("all");

-        if (indQuantifier>-1 && 

-            (lems.get(indQuantifier+1).equals("elements") || lems.get(indQuantifier+1).equals("members") )){

-          

-          String arrayName = lems.get(lems.size()-1);

-          if (expression!=null)

-            op.setOperatorFor("for(int i=0; i<"+ arrayName+".size(); i++) "+

-                arrayName+"[i]."+ expression);

-          break;

-        } 

-      }

-      /*    

+					String arrayName = lems.get(lems.size()-1);

+					if (expression!=null)

+						op.setOperatorFor(arrayName+"["+indexValue+"]."+ expression);

+					break;

+				} 

+			} else if (declarativeAction.equals("set")){

+				int indQuantifier = lems.indexOf("all");

+				if (indQuantifier>-1 && 

+						(lems.get(indQuantifier+1).equals("elements") || lems.get(indQuantifier+1).equals("members") )){

+

+					String arrayName = lems.get(lems.size()-1);

+					if (expression!=null)

+						op.setOperatorFor("for(int i=0; i<"+ arrayName+".size(); i++) "+

+								arrayName+"[i]."+ expression);

+					break;

+				} 

+			}

+			/*    

         else {

           List<String> paramValues = verbChunk.getLemmas(), paramPOSs = verbChunk.getPOSs();

 

@@ -205,40 +214,40 @@
         oPhrases.add(oPhrase);      

 

       } */

-    }

+		}

 

-    ObjectPhraseListForSentence oplfs =  new ObjectPhraseListForSentence( oPhrases, op);

-    oplfs.cleanMethodNamesIsAre();

-    oplfs.substituteNullObjectIntoEmptyArg();

-      

-    return oplfs;

-  }

+		ObjectPhraseListForSentence oplfs =  new ObjectPhraseListForSentence( oPhrases, op);

+		oplfs.cleanMethodNamesIsAre();

+		oplfs.substituteNullObjectIntoEmptyArg();

 

-  public static void main(String[] args){

+		return oplfs;

+	}

 

-    String[] text = new String[]{

-        "Define a class and name it Employee. ",

-        "Define text attribute and name it m_name. ",

-        "Define double attribute and name it m_salary.",

-        "Create array of objects of class Employee for 10 elements, name the object as workforce.",

-        "Assign the first element in array workforce: m_name=\"Boss\"",

-        "Assign the second element in array workforce: m_name=\"His wife\"",

-       //  "Comment: We just started our small business company and expect to hire 8 more people soon.",

-        "Set for all elements in array workforce: m_salary=0 ",

-        "Print the list of all m_name attributes for workforce."

+	public static void main(String[] args){

 

-    };

+		String[] text = new String[]{

+				"Define a class and name it Employee. ",

+				"Define text attribute and name it m_name. ",

+				"Define double attribute and name it m_salary.",

+				"Create array of objects of class Employee for 10 elements, name the object as workforce.",

+				"Assign the first element in array workforce: m_name=\"Boss\"",

+				"Assign the second element in array workforce: m_name=\"His wife\"",

+				//  "Comment: We just started our small business company and expect to hire 8 more people soon.",

+				"Set for all elements in array workforce: m_salary=0 ",

+				"Print the list of all m_name attributes for workforce."

 

-    NL2Obj compiler = new NL2ObjCreateAssign();

-    for(String sent:text){

-      ObjectPhraseListForSentence opls=null;

-      try {

-        opls = compiler.convertSentenceToControlObjectPhrase(sent);

-      } catch (Exception e) {

-        e.printStackTrace();

-      }

-      System.out.println(sent+"\n"+opls+"\n");

-    }

+		};

 

-  }

+		NL2Obj compiler = new NL2ObjCreateAssign();

+		for(String sent:text){

+			ObjectPhraseListForSentence opls=null;

+			try {

+				opls = compiler.convertSentenceToControlObjectPhrase(sent);

+			} catch (Exception e) {

+				e.printStackTrace();

+			}

+			System.out.println(sent+"\n"+opls+"\n");

+		}

+

+	}

 }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ArcType.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ArcType.java
new file mode 100644
index 0000000..2c75ad0
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ArcType.java
@@ -0,0 +1,54 @@
+package opennlp.tools.parse_thicket;

+

+public class ArcType{

+	private String type; // rst

+	private String subtype; // rst-explain

+	private Integer type_id;

+	private Integer subtype_id;

+	

+	public ArcType(String type, // rst

+	String subtype, // rst-explain

+	Integer type_id,

+	Integer subtype_id){

+		this.type = type; // rst

+		this.subtype = subtype; // rst-explain

+		this.type_id= type_id;

+		this.subtype_id = subtype_id;

+	}

+

+	public String getType() {

+		return type;

+	}

+

+	public void setType(String type) {

+		this.type = type;

+	}

+

+	public String getSubtype() {

+		return subtype;

+	}

+

+	public void setSubtype(String subtype) {

+		this.subtype = subtype;

+	}

+

+	public Integer getType_id() {

+		return type_id;

+	}

+

+	public void setType_id(Integer type_id) {

+		this.type_id = type_id;

+	}

+

+	public Integer getSubtype_id() {

+		return subtype_id;

+	}

+

+	public void setSubtype_id(Integer subtype_id) {

+		this.subtype_id = subtype_id;

+	}

+	

+	public String toString(){

+		return type+":"+subtype;

+	}

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/IGeneralizer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/IGeneralizer.java
new file mode 100644
index 0000000..03256c8
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/IGeneralizer.java
@@ -0,0 +1,12 @@
+package opennlp.tools.parse_thicket;

+

+import java.util.List;

+

+public interface IGeneralizer<T> {

+	/* All objects such as words, ParseTreeNodes, Phrases, Communicative actions etc. are subject to 

+	 * generalization, so should implement this interface

+	 * 

+	 * In this project Everything is subject to generalization, and returns a list of generic objects

+	 */

+   public List<T> generalize(Object o1, Object o2);

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/PTTree.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/PTTree.java
new file mode 100644
index 0000000..9a1dfd5
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/PTTree.java
@@ -0,0 +1,89 @@
+package opennlp.tools.parse_thicket;

+

+import java.io.PrintWriter;

+import java.util.ArrayList;

+import java.util.List;

+

+import edu.stanford.nlp.trees.LabeledScoredTreeNode;

+import edu.stanford.nlp.trees.SimpleTree;

+import edu.stanford.nlp.trees.Tree;

+import edu.stanford.nlp.trees.TreeFactory;

+

+

+

+public class PTTree extends SimpleTree {

+	

+	public PTTree(){

+		super();

+	}

+

+	public PTTree(Tree t){

+		super();

+	}

+	private static final long serialVersionUID = 1L;

+

+	@Override

+	public PTTree[] children() {

+		return children();

+	}

+

+	@Override

+	public TreeFactory treeFactory() {

+		// TODO Auto-generated method stub

+		return null;

+	}

+	

+	public void doNavigate(){

+		List<LabeledScoredTreeNode> phrases = new ArrayList<LabeledScoredTreeNode>();

+		navigate(0, false, false, false, true, true, phrases);

+	}

+	

+	private static void navigateChildren(PTTree[] trChildren, int indent, boolean parentLabelNull, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) {

+	    boolean firstSibling = true;

+	    boolean leftSibIsPreTerm = true;  // counts as true at beginning

+	    for (PTTree currentTree : trChildren) {

+	      currentTree.navigate(indent, parentLabelNull, firstSibling, leftSibIsPreTerm, false, onlyLabelValue, phrases);

+	      leftSibIsPreTerm = currentTree.isPreTerminal();

+	      // CC is a special case for English, but leave it in so we can exactly match PTB3 tree formatting

+	      if (currentTree.value() != null && currentTree.value().startsWith("CC")) {

+	        leftSibIsPreTerm = false;

+	      }

+	      firstSibling = false;

+	    }

+	  }

+	

+	/**

+	   * navigate parse tree

+	   */

+	  private void navigate(int indent, boolean parentLabelNull, boolean firstSibling, boolean leftSiblingPreTerminal, boolean topLevel, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) {

+	    // the condition for staying on the same line in Penn Treebank

+	    boolean suppressIndent = (parentLabelNull || (firstSibling && isPreTerminal()) || (leftSiblingPreTerminal && isPreTerminal() && (label() == null || !label().value().startsWith("CC"))));

+	    if (suppressIndent) {

+	      //pw.print(" ");

+	      // pw.flush();

+	    } else {

+	      if (!topLevel) {

+	        //pw.println();

+	      }

+	      for (int i = 0; i < indent; i++) {

+	        //pw.print("  ");

+	        // pw.flush();

+	      }

+	    }

+	    if (isLeaf() || isPreTerminal()) {

+	      String terminalString = toStringBuilder(new StringBuilder(), onlyLabelValue).toString();

+	      //pw.print(terminalString);

+	      //pw.flush();

+	      return;

+	    }

+	    //pw.print("(");

+	    String nodeString = onlyLabelValue ? value() : nodeString();

+	    //pw.print(nodeString);

+	    // pw.flush();

+	    boolean parentIsNull = label() == null || label().value() == null;

+	    navigateChildren(children(), indent + 1, parentIsNull, true, phrases);

+	    //pw.print(")");

+	    

+	  }

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Pair.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Pair.java
new file mode 100644
index 0000000..850e1ee
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Pair.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.parse_thicket;
+
+import java.util.Comparator;
+
+/**
+ * Generic pair class for holding two objects. Often used as return object.
+ * 
+ * @author Albert-Jan de Vries
+ * 
+ * @param <T1>
+ * @param <T2>
+ */
+public class Pair<T1, T2> {
+  private T1 first;
+
+  private T2 second;
+
+  public Pair() {
+
+  }
+
+  public Pair(T1 first, T2 second) {
+    this.first = first;
+    this.second = second;
+  }
+
+  public T1 getFirst() {
+    return first;
+  }
+
+  public void setFirst(T1 first) {
+    this.first = first;
+  }
+
+  public T2 getSecond() {
+    return second;
+  }
+
+  public void setSecond(T2 second) {
+    this.second = second;
+  }
+  
+  public class PairComparable implements Comparator<Pair<T1, T2>> {
+    // @Override
+    public int compare(Pair o1, Pair o2) {
+      int b = -2;
+      if ( o1.second instanceof Float && o2.second instanceof Float){
+        
+        b =  (((Float)o1.second > (Float)o2.second) ? -1
+          : (((Float)o1.second == (Float)o2.second) ? 0 : 1));
+      }
+      return b;
+    }
+  }
+  public String toString(){
+	  return this.first.toString()+" "+this.second.toString();
+  }
+  
+}
+
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
new file mode 100644
index 0000000..10e9683
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
@@ -0,0 +1,191 @@
+package opennlp.tools.parse_thicket;
+
+import java.io.*;
+import java.util.*;
+
+import opennlp.tools.parse_thicket.communicative_actions.CommunicativeActionsArcBuilder;
+
+import edu.stanford.nlp.dcoref.CorefChain;
+import edu.stanford.nlp.dcoref.CorefCoreAnnotations;
+import edu.stanford.nlp.dcoref.CorefChain.CorefMention;
+import edu.stanford.nlp.ling.*;
+import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
+import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
+import edu.stanford.nlp.pipeline.*;
+import edu.stanford.nlp.trees.*;
+import edu.stanford.nlp.util.*;
+
+public class ParseCorefsBuilder {
+	protected static ParseCorefsBuilder instance;
+	private Annotation annotation;
+	StanfordCoreNLP pipeline;
+	CommunicativeActionsArcBuilder caFinder = new CommunicativeActionsArcBuilder();
+	
+	  /**
+	   * singleton method of instantiating the processor
+	   * 
+	   * @return the instance
+	   */
+	  public synchronized static ParseCorefsBuilder getInstance() {
+	    if (instance == null)
+	      instance = new ParseCorefsBuilder();
+
+	    return instance;
+	  }
+	
+	ParseCorefsBuilder(){
+		Properties props = new Properties();
+		props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
+		pipeline = new StanfordCoreNLP(props);
+	}
+	
+	public ParseThicket buildParseThicket(String text){
+		List<Tree> ptTrees = new ArrayList<Tree>();
+		// all numbering from 1, not 0
+		List<WordWordInterSentenceRelationArc> arcs = new ArrayList<WordWordInterSentenceRelationArc>();
+		List<List<ParseTreeNode>> nodesThicket = new ArrayList<List<ParseTreeNode>>();
+		
+		annotation = new Annotation(text);
+		try {
+			pipeline.annotate(annotation);
+			List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
+			if (sentences != null && sentences.size() > 0) 
+			for(CoreMap sentence: sentences){
+				List<ParseTreeNode> nodes = new ArrayList<ParseTreeNode>();
+				
+				// traversing the words in the current sentence
+			    // a CoreLabel is a CoreMap with additional token-specific methods
+				Class<TokensAnnotation> tokenAnn = TokensAnnotation.class;
+				List<CoreLabel> coreLabelList = sentence.get(tokenAnn);
+				int count=1;
+			    for (CoreLabel token: coreLabelList ) {
+			      // this is the text of the token
+			      String lemma = token.get(TextAnnotation.class);
+			      // this is the POS tag of the token
+			      String pos = token.get(PartOfSpeechAnnotation.class);
+			      // this is the NER label of the token
+			      String ne = token.get(NamedEntityTagAnnotation.class);     
+			      nodes.add(new ParseTreeNode(lemma, pos, ne, count));
+			      count++;
+			    }	
+			    nodesThicket.add(nodes);
+			  Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
+			  ptTrees.add(tree);
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	    
+	  
+	    // now coreferences
+	    Map<Integer, CorefChain> corefs = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
+	    List<CorefChain> chains = new ArrayList<CorefChain>(corefs.values());
+	    for(CorefChain c: chains){
+	      //System.out.println(c);
+	      List<CorefMention> mentions = c.getMentionsInTextualOrder();
+	      //System.out.println(mentions);
+	      if (mentions.size()>1)
+	      for(int i=0; i<mentions.size(); i++){
+	    	  for(int j=i+1; j<mentions.size(); j++){
+	    	  CorefMention mi = mentions.get(i), mj=mentions.get(j);
+	    	  
+	    	  
+	    	  int niSentence = mi.position.get(0);
+	    	  int niWord = mi.startIndex;
+	    	  int njSentence = mj.position.get(0);
+	    	  int njWord = mj.startIndex;
+	    	  
+	    	  ArcType arcType = new ArcType("coref-", mj.mentionType+"-"+mj.animacy, 0, 0);
+	    	  
+	    	  WordWordInterSentenceRelationArc arc = 
+	    			  new WordWordInterSentenceRelationArc(new Pair<Integer, Integer>(niSentence,niWord), 
+	    					  new Pair<Integer, Integer>(njSentence,njWord), mi.mentionSpan, mj.mentionSpan, 
+	    					  arcType);
+	    	  arcs.add(arc);
+	    	  
+	    	  /*
+	    	  System.out.println("animacy = "+m.animacy);
+	    	  System.out.println("mention span = "+m.mentionSpan);
+	    	  System.out.println(" id = "+m.mentionID);
+	    	  System.out.println(" position = "+m.position);
+	    	  System.out.println(" start index = "+m.startIndex);
+	    	  System.out.println(" end index = "+m.endIndex);   
+	    	  System.out.println(" mentionType = "+m.mentionType);   
+	    	  System.out.println(" number =  = "+m.number);  
+	    	  */
+	    	  }
+	      }
+	      
+	      
+	    }
+	    List<WordWordInterSentenceRelationArc> arcsCA = buildCAarcs(nodesThicket);
+	    
+	    ParseThicket result = new ParseThicket(ptTrees, arcs);
+	    result.setNodesThicket(nodesThicket);
+	    return result;
+	}
+
+  private List<WordWordInterSentenceRelationArc> buildCAarcs(
+			List<List<ParseTreeNode>> nodesThicket) {
+	  List<WordWordInterSentenceRelationArc> arcs = new ArrayList<WordWordInterSentenceRelationArc>();
+	  
+		for(int sentI=0; sentI<nodesThicket.size(); sentI++){
+			for(int sentJ=sentI+1; sentJ<nodesThicket.size(); sentJ++){
+				List<ParseTreeNode> sentenceI = nodesThicket.get(sentI), 
+						sentenceJ = nodesThicket.get(sentJ);
+				Pair<String, Integer[]> caI = caFinder.findCAInSentence(sentenceI);
+				Pair<String, Integer[]> caJ = caFinder.findCAInSentence(sentenceJ);
+				int indexCA1 = caFinder.findCAIndexInSentence(sentenceI);
+				int indexCA2 = caFinder.findCAIndexInSentence(sentenceJ);
+				if (caI==null || caJ==null)
+					continue;
+				Pair<String, Integer[]> caGen = caFinder.generalize(caI, caJ).get(0);
+				
+				ArcType arcType = new ArcType("ca", 
+						caGen.getFirst().toString()+printNumArray(caGen.getSecond()), 0, 0);
+				 WordWordInterSentenceRelationArc arc = 
+		    			  new WordWordInterSentenceRelationArc(new Pair<Integer, Integer>(sentI,indexCA1), 
+		    					  new Pair<Integer, Integer>(sentJ,indexCA2), caI.getFirst(), caJ.getFirst(), 
+		    					  arcType);
+		    	  arcs.add(arc);
+				
+			}
+					}
+		
+		return arcs;
+	}
+  
+    private String printNumArray(Integer[] arr){
+    	StringBuffer buf = new StringBuffer();
+    	for(Integer i: arr){
+    		buf.append(Integer.toString(i)+ " ");
+    	}
+    	return buf.toString();
+    }
+
+public static void main(String[] args) throws IOException {
+	  ParseCorefsBuilder builder = ParseCorefsBuilder.getInstance();
+	  ParseThicket  th = builder.buildParseThicket("Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+
+    		  "UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " +
+    		  "A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " +
+    		  "Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. ");
+    //GraphFromPTreeBuilder gbuilder = new GraphFromPTreeBuilder();
+    //gbuilder.buildGraphFromPT(th);
+	 
+  }
+
+}
+
+/*
+ * [<sent=1-word=1..Iran> ===> <sent=3-word=9..Iran>, <sent=1-word=1..Iran> ===> <sent=4-word=1..Iran>, <sent=1-word=1..Iran> ===> <sent=4-word=4..its>, <sent=1-word=1..Iran> ===> <sent=4-word=17..it>, <sent=3-word=9..Iran> ===> <sent=4-word=1..Iran>, <sent=3-word=9..Iran> ===> <sent=4-word=4..its>, <sent=3-word=9..Iran> ===> <sent=4-word=17..it>, <sent=4-word=1..Iran> ===> <sent=4-word=4..its>, <sent=4-word=1..Iran> ===> <sent=4-word=17..it>, <sent=4-word=4..its> ===> <sent=4-word=17..it>, <sent=1-word=6..UN> ===> <sent=2-word=1..UN>, <sent=1-word=5..the UN proposal> ===> <sent=1-word=10..its>, <sent=1-word=5..the UN proposal> ===> <sent=1-word=13..its>, <sent=1-word=10..its> ===> <sent=1-word=13..its>, <sent=1-word=16..nuclear weapons> ===> <sent=3-word=14..nuclear weapons>, <sent=2-word=1..UN nuclear watchdog> ===> <sent=2-word=11..its>]
+
+[[[1]Iran:NNP>LOCATION, [2]refuses:VBZ>O, [3]to:TO>O, [4]accept:VB>O, [5]the:DT>O, [6]UN:NNP>ORGANIZATION, [7]proposal:NN>O, [8]to:TO>O, [9]end:VB>O, [10]its:PRP$>O, [11]dispute:NN>O, [12]over:IN>O, [13]its:PRP$>O, [14]work:NN>O, [15]on:IN>O, [16]nuclear:JJ>O, [17]weapons:NNS>O, [18].:.>O], 
+
+[[1]UN:NNP>ORGANIZATION, [2]nuclear:JJ>O, [3]watchdog:NN>O, [4]passes:VBZ>O, [5]a:DT>O, [6]resolution:NN>O, [7]condemning:VBG>O, [8]Iran:NNP>LOCATION, [9]for:IN>O, [10]developing:VBG>O, [11]its:PRP$>O, [12]second:JJ>ORDINAL, [13]uranium:NN>O, [14]enrichment:NN>O, [15]site:NN>O, [16]in:IN>O, [17]secret:NN>O, [18].:.>O], 
+
+[[1]A:DT>O, [2]recent:JJ>O, [3]IAEA:NNP>ORGANIZATION, [4]report:NN>O, [5]presented:VBD>O, [6]diagrams:NNS>O, [7]that:WDT>O, [8]suggested:VBD>O, [9]Iran:NNP>LOCATION, [10]was:VBD>O, [11]secretly:RB>O, [12]working:VBG>O, [13]on:IN>O, [14]nuclear:JJ>O, [15]weapons:NNS>O, [16].:.>O], 
+
+[[1]Iran:NNP>LOCATION, [2]envoy:NN>O, [3]says:VBZ>O, [4]its:PRP$>O, [5]nuclear:JJ>O, [6]development:NN>O, [7]is:VBZ>O, [8]for:IN>O, [9]peaceful:JJ>O, [10]purpose:NN>O, [11],:,>O, [12]and:CC>O, [13]the:DT>O, [14]material:NN>O, [15]evidence:NN>O, [16]against:IN>O, [17]it:PRP>O, [18]has:VBZ>O, [19]been:VBN>O, [20]fabricated:VBN>O, [21]by:IN>O, [22]the:DT>O, [23]US:NNP>LOCATION, [24].:.>O]]
+*/
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
new file mode 100644
index 0000000..e584d1e
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
@@ -0,0 +1,59 @@
+package opennlp.tools.parse_thicket;

+

+import java.util.List;

+

+import edu.stanford.nlp.trees.Tree;

+

+public class ParseThicket {

+	// parse trees 

+	private List<Tree> sentenceTrees;

+	// there should be an arc for each sentence

+	private List<WordWordInterSentenceRelationArc> arcs;

+	// lists of nodes for each sentence

+	// then list for all sentences

+	private List<List<ParseTreeNode>> sentenceNodes;

+	

+	public List<Tree> getSentences() {

+		return sentenceTrees;

+	}

+

+	public void setSentences(List<Tree> sentences) {

+		this.sentenceTrees = sentences;

+	}

+

+	public List<WordWordInterSentenceRelationArc> getArcs() {

+		return arcs;

+	}

+

+	public void setArcs(List<WordWordInterSentenceRelationArc> arcs) {

+		this.arcs = arcs;

+	}

+

+	public List<List<ParseTreeNode>> getNodesThicket() {

+		return sentenceNodes;

+	}

+

+	public void setNodesThicket(List<List<ParseTreeNode>> nodesThicket) {

+		this.sentenceNodes = nodesThicket;

+	}

+

+	public ParseThicket(String paragraph){

+		ParseCorefsBuilder builder = ParseCorefsBuilder.getInstance();

+		ParseThicket res = builder.buildParseThicket(paragraph);

+		this.sentenceTrees= res.sentenceTrees;

+		this.arcs = res.arcs;		

+	}

+

+	public ParseThicket(List<Tree> ptTrees,

+			List<WordWordInterSentenceRelationArc> barcs) {

+		this.sentenceTrees= ptTrees;

+		this.arcs = barcs;				

+	}

+	

+	public String toString(){

+		return this.sentenceTrees+"\n"+this.arcs;

+	}

+	

+	

+	

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
new file mode 100644
index 0000000..528eb4d
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
@@ -0,0 +1,153 @@
+package opennlp.tools.parse_thicket;

+

+import java.util.ArrayList;

+import java.util.List;

+

+public class ParseTreeNode implements IGeneralizer<ParseTreeNode>{

+	String word;

+    // this is the POS tag of the token

+    String pos; 

+    // this is the NER label of the token

+    String ne; 

+    Integer id;

+    //PhraseType 

+    String phraseType;

+    

+    public enum PhraseType {NP("NP"), VP("VP"), PRP("PRP");

+    	private PhraseType(final String text) {

+        this.text = text;

+    	}

+        private final String text;

+    

+    }

+    

+	public ParseTreeNode(String word, String pos, String ne, Integer id) {

+		super();

+		this.word = word;

+		this.pos = pos;

+		this.ne = ne;

+		this.id = id;

+	}

+	

+	public ParseTreeNode(String word, String pos) {

+		super();

+		this.word = word;

+		this.pos = pos;

+		this.ne = ne;

+		this.id = id;

+	}

+	

+	public String getPhraseType() {

+		return phraseType;

+	}

+	public void setPhraseType(String pt) {

+		this.phraseType=pt;

+	}

+	public String getWord() {

+		return word;

+	}

+	public void setWord(String word) {

+		this.word = word;

+	}

+	public String getPos() {

+		return pos;

+	}

+	public void setPos(String pos) {

+		this.pos = pos;

+	}

+	public String getNe() {

+		return ne;

+	}

+	public void setNe(String ne) {

+		this.ne = ne;

+	}

+	public Integer getId() {

+		return id;

+	}

+	public void setId(Integer id) {

+		this.id = id;

+	} 

+    

+	public String toString(){

+		StringBuffer buf = new StringBuffer();

+		if (id!=null)

+			buf.append("<"+id+">");

+		if(phraseType!=null)

+			buf.append(phraseType);

+		if(word!=null)

+			buf.append("'"+word+"'");

+		if (pos!=null)

+			buf.append(":"+pos);

+		return buf.toString();

+	}

+

+	@Override

+	public List<ParseTreeNode> generalize(Object o1, Object o2) {

+		List<ParseTreeNode> result = new ArrayList<ParseTreeNode>();

+		

+		ParseTreeNode w1 = (ParseTreeNode) o1;

+		ParseTreeNode w2 = (ParseTreeNode) o2;

+		String posGen =  generalizePOS(w1.pos, w2.pos);

+		if (posGen ==null)

+			return result;

+		ParseTreeNode newNode = new ParseTreeNode(generalizeWord(w1.word, w2.word),

+				posGen, "O", -1);

+		result.add(newNode);

+		return result;

+	}

+	

+	public String generalizeWord(String lemma1, String lemma2){

+		if (lemma1.equals(lemma2))

+			return lemma1;

+		if (lemma1.equals("*"))

+			return "*";

+		if (lemma2.equals("*"))

+			return "*";

+		//TODO

+		return "*";

+		

+	}

+	

+	public String generalizePOS(String pos1, String pos2) {

+	    if ((pos1.startsWith("NN") && pos2.equals("NP") || pos2.startsWith("NN")

+	        && pos1.equals("NP"))) {

+	      return "NN";

+	    }

+	    if ((pos1.startsWith("NN") && pos2.equals("VBG") || pos2.startsWith("VBG")

+	        && pos1.equals("NN"))) {

+	      return "NN";

+	    }

+

+	    if ((pos1.startsWith("NN") && pos2.equals("ADJP") || pos2.startsWith("NN")

+	        && pos1.equals("ADJP"))) {

+	      return "NN";

+	    }

+	    if ((pos1.equals("IN") && pos2.equals("TO") || pos1.equals("TO")

+	        && pos2.equals("IN"))) {

+	      return "IN";

+	    }

+	    // VBx vs VBx = VB (does not matter which form for verb)

+	    if (pos1.startsWith("VB") && pos2.startsWith("VB")) {

+	      return "VB";

+	    }

+

+	    // ABx vs ABy always gives AB

+	    if (pos1.equalsIgnoreCase(pos2)) {

+	      return pos1;

+	    }

+	    if (pos1.length() > 2) {

+	      pos1 = pos1.substring(0, 2);

+	    }

+

+	    if (pos2.length() > 2) {

+	      pos2 = pos2.substring(0, 2);

+	    }

+	    if (pos1.equalsIgnoreCase(pos2)) {

+	      return pos1 + "*";

+	    }

+	    return null;

+	  }

+

+	

+};

+

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Triple.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Triple.java
new file mode 100644
index 0000000..f4a8176
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Triple.java
@@ -0,0 +1,49 @@
+package opennlp.tools.parse_thicket;

+

+import java.util.Comparator;

+

+

+public class Triple<T1, T2, T3> {

+		  private T1 first;

+

+		  private T2 second;

+		  

+		  private T3 third;

+

+		  public Triple() {

+

+		  }

+

+		  public T1 getFirst() {

+		    return first;

+		  }

+

+		  public void setFirst(T1 first) {

+		    this.first = first;

+		  }

+

+		  public T2 getSecond() {

+		    return second;

+		  }

+

+		  public void setSecond(T2 second) {

+		    this.second = second;

+		  }

+

+		public Triple(T1 first, T2 second, T3 third) {

+			super();

+			this.first = first;

+			this.second = second;

+			this.third = third;

+		}

+

+		public T3 getThird() {

+			return third;

+		}

+

+		public void setThird(T3 third) {

+			this.third = third;

+		}

+		  

+		  

+		}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
new file mode 100644
index 0000000..db7905d
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
@@ -0,0 +1,68 @@
+package opennlp.tools.parse_thicket;

+

+public class WordWordInterSentenceRelationArc {

+	

+	

+		Pair<Integer, Integer> codeFrom;

+		Pair<Integer, Integer> codeTo;

+		String lemmaFrom;

+		String lemmaTo;

+		ArcType arcType;

+		

+		public Pair<Integer, Integer> getCodeFrom() {

+			return codeFrom;

+		}

+

+		public void setCodeFrom(Pair<Integer, Integer> codeFrom) {

+			this.codeFrom = codeFrom;

+		}

+

+		public Pair<Integer, Integer> getCodeTo() {

+			return codeTo;

+		}

+

+		public void setCodeTo(Pair<Integer, Integer> codeTo) {

+			this.codeTo = codeTo;

+		}

+

+		public String getLemmaFrom() {

+			return lemmaFrom;

+		}

+

+		public void setLemmaFrom(String lemmaFrom) {

+			this.lemmaFrom = lemmaFrom;

+		}

+

+		public String getLemmaTo() {

+			return lemmaTo;

+		}

+

+		public void setLemmaTo(String lemmaTo) {

+			this.lemmaTo = lemmaTo;

+		}

+

+		public ArcType getArcType() {

+			return arcType;

+		}

+

+		public void setArcType(ArcType arcType) {

+			this.arcType = arcType;

+		}

+

+		public WordWordInterSentenceRelationArc(

+				Pair<Integer, Integer> codeFrom, Pair<Integer, Integer> codeTo,

+				String lemmaFrom, String lemmaTo, ArcType arcType) {

+			super();

+			this.codeFrom = codeFrom;

+			this.codeTo = codeTo;

+			this.lemmaFrom = lemmaFrom;

+			this.lemmaTo = lemmaTo;

+			this.arcType = arcType;

+		}

+	

+		public String toString(){

+			return "<sent="+codeFrom.getFirst()+"-word="+codeFrom.getSecond()+".."+lemmaFrom+"> ===> "+

+					"<sent="+codeTo.getFirst()+"-word="+codeTo.getSecond()+".."+lemmaTo+">";

+		}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/BingQueryRunnerMultipageSearchResults.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/BingQueryRunnerMultipageSearchResults.java
new file mode 100644
index 0000000..09e371a
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/BingQueryRunnerMultipageSearchResults.java
@@ -0,0 +1,72 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.parse_thicket.apps;

+

+import java.io.BufferedReader;

+import java.io.InputStreamReader;

+import java.net.URL;

+import java.net.URLConnection;

+import java.net.URLEncoder;

+import java.util.ArrayList;

+import java.util.List;

+import java.util.logging.Logger;

+

+import net.billylieurance.azuresearch.AzureSearchResultSet;

+import net.billylieurance.azuresearch.AzureSearchWebQuery;

+import net.billylieurance.azuresearch.AzureSearchWebResult;

+

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+

+import org.apache.commons.lang.StringUtils;

+import org.json.JSONArray;

+import org.json.JSONObject;

+

+

+public class BingQueryRunnerMultipageSearchResults extends BingQueryRunner {

+	

+	private static String BING_KEY = "e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=";

+	private static final Logger LOG = Logger

+		      .getLogger("opennlp.tools.similarity.apps.BingQueryRunnerMultipageSearchResults");

+	private AzureSearchWebQuery aq = new AzureSearchWebQuery();

+

+	public List<HitBase> runSearch(String query, int nRes, boolean bHighRank) {

+		aq.setAppid(BING_KEY);

+		aq.setQuery(query);		  		

+		aq.doQuery();

+		if (!bHighRank)

+			aq.setPage(5);

+		aq.setPerPage(nRes);

+		

+		List<HitBase> results = new ArrayList<HitBase> ();

+		AzureSearchResultSet<AzureSearchWebResult> ars = aq.getQueryResult();

+		

+		for (AzureSearchWebResult anr : ars){

+		    HitBase h = new HitBase();

+		    h.setAbstractText(anr.getDescription());

+		    h.setTitle(anr.getTitle());

+		    h.setUrl(anr.getUrl());

+		    results.add(h);

+		}

+		return results;

+	}

+	

+	

+

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java
new file mode 100644
index 0000000..4f0512b
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java
@@ -0,0 +1,210 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.parse_thicket.apps;

+

+import java.util.Arrays;

+import java.util.List;

+

+import opennlp.tools.similarity.apps.utils.Utils;

+

+import org.apache.commons.lang.StringUtils;

+

+public class MinedSentenceProcessor {

+  public static String acceptableMinedSentence(String sent) {

+    // if too many commas => seo text

+

+    String[] commas = StringUtils.split(sent, ',');

+    String[] spaces = StringUtils.split(sent, ' ');

+    if ((float) commas.length / (float) spaces.length > 0.7) {

+      System.out.println("Rejection: too many commas");

+      return null;

+    }

+    

+    String[] otherDelimiters = StringUtils.split(sent, '/');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    

+    otherDelimiters = StringUtils.split(sent, '.');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    otherDelimiters = StringUtils.split(sent, '!');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    otherDelimiters = StringUtils.split(sent, '=');

+    if ((float) otherDelimiters.length / (float) spaces.length > 0.7) {

+        System.out.println("Rejection: too many delimiters");

+        return null;

+    }

+    

+    String[] pipes = StringUtils.split(sent, '|');

+    if (StringUtils.split(sent, '|').length > 2

+        || StringUtils.split(sent, '>').length > 2) {

+      System.out.println("Rejection: too many |s or >s ");

+      return null;

+    }

+    String sentTry = sent.toLowerCase();

+    // if too many long spaces

+    String sentSpaces = sentTry.replace("   ", "");

+    if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -

+      // suspicious

+      return null;

+

+    if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1

+        || sentTry.indexOf("copyright") > -1

+        || sentTry.indexOf("operating hours") > -1

+        || sentTry.indexOf("days per week") > -1

+        || sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1

+        || sentTry.indexOf("find the latest") > -1

+        || sentTry.startsWith("subscribe")

+        || sentTry.indexOf("Terms of Service") > -1

+        || sentTry.indexOf("clicking here") > -1

+        || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1

+        || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")

+        || sentTry.indexOf("available online") > -1

+        || sentTry.indexOf("get online") > -1

+        || sentTry.indexOf("buy online") > -1

+        || sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1

+        || sentTry.indexOf("official site") > -1

+        || sentTry.indexOf("this video") > -1

+        || sentTry.indexOf("this book") > -1

+        || sentTry.indexOf("this product") > -1

+        || sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1

+        || sentTry.indexOf("audio cd") > -1

+        || sentTry.indexOf("related searches") > -1

+        || sentTry.indexOf("permission is granted") > -1

+        || sentTry.indexOf("[edit") > -1

+        || sentTry.indexOf("edit categories") > -1

+        || sentTry.indexOf("free license") > -1

+        || sentTry.indexOf("permission is granted") > -1

+        || sentTry.indexOf("under the terms") > -1

+        || sentTry.indexOf("rights reserved") > -1

+        || sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")

+        || sentTry.endsWith("the.") || sentTry.startsWith("below") 

+        || sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 

+        ||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1

+        ||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 

+        ||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1

+        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1

+        ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1

+        ||sentTry.startsWith( "we ") ||sentTry.indexOf( "the recipient")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1

+        

+        ||sentTry.startsWith( "fax") ||sentTry.indexOf( "refund it")>-1 || sentTry.indexOf( "your money")>-1

+        ||sentTry.startsWith( "free") ||sentTry.indexOf( "purchase orders")>-1

+        ||sentTry.startsWith( "exchange it ") ||sentTry.indexOf( "return it")>-1 ||sentTry.indexOf( "credit card")>-1 

+        

+        ||sentTry.indexOf( "storeshop")>-1 || sentTry.startsWith( "find") || sentTry.startsWith( "shop") || sentTry.startsWith( "unlimited") 

+        ||sentTry.indexOf( "for a limited time")>-1 ||sentTry.indexOf( "prime members")>-1 ||sentTry.indexOf( "amazon members")>-1 ||sentTry.indexOf( "unlimited free")>-1 

+        ||sentTry.indexOf( "shipping")>-1 || sentTry.startsWith( "amazon")

+// not a script text

+        ||sentTry.indexOf( "document.body")>-1 ||sentTry.indexOf( " var ")>-1         ||sentTry.indexOf( "search suggestions")>-1 ||sentTry.startsWith( "Search") 

+        

+    		)

+      return null;

+    

+    //Millions of Amazon Prime members enjoy instant videos, free Kindle books and unlimited free two-day shipping.

+

+    // count symbols indicating wrong parts of page to mine for text

+    // if short and contains too many symbols indicating wrong area: reject

+    String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")

+        .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")

+        .replace("-", "&&&").replace("%", "&&&");

+    if ((sentWrongSym.length() - sentTry.length()) >= 4

+        && sentTry.length() < 200) // twice ot more

+      return null;

+

+    sent = sent.replace('[', ' ').replace(']', ' ')

+        .replace("_should_find_orig_", "").replace(".   .", ". ")

+        .replace("amp;", " ").replace("1.", " ").replace("2.", " ")

+        .replace("3.", " ").replace("4.", " ").replace("2009", "2011")

+        .replace("2008", "2011").replace("2006", "2011")

+        .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")

+        .replace("no comments", " ").replace("  ", " ").replace("  ", " ")

+        .replace("(more.)", "").replace("more.", "").replace("<more>", "")

+        .replace("[more]", "").replace(".,", ".").replace("&lt;", "")

+        .replace("p&gt;", "").replace("product description", "");

+

+    // TODO .replace("a.", ".");

+

+    int endIndex = sent.indexOf(" posted");

+    if (endIndex > 0)

+      sent = sent.substring(0, endIndex);

+

+    return sent;

+  }

+

+  public static String processSentence(String pageSentence) {

+    if (pageSentence == null)

+      return "";

+    pageSentence = Utils.fullStripHTML(pageSentence);

+    pageSentence = StringUtils.chomp(pageSentence, "..");

+    pageSentence = StringUtils.chomp(pageSentence, ". .");

+    pageSentence = StringUtils.chomp(pageSentence, " .");

+    pageSentence = StringUtils.chomp(pageSentence, ".");

+    pageSentence = StringUtils.chomp(pageSentence, "...");

+    pageSentence = StringUtils.chomp(pageSentence, " ....");

+    pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")

+        .replace("(.)", "");

+

+    pageSentence = pageSentence.trim();

+    pageSentence = pageSentence.replaceAll("\\s+", " "); // make single

+    // spaces

+    // everywhere

+

+    String[] pipes = StringUtils.split(pageSentence, '|'); // removed

+    // shorter part

+    // of sentence

+    // at the end

+    // after pipe

+    if (pipes.length == 2

+        && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {

+      int pipePos = pageSentence.indexOf("|");

+      if (pipePos > -1)

+        pageSentence = pageSentence.substring(0, pipePos - 1).trim();

+

+    }

+

+    if (!StringUtils.contains(pageSentence, '.')

+        && !StringUtils.contains(pageSentence, '?')

+        && !StringUtils.contains(pageSentence, '!'))

+      pageSentence = pageSentence + ". ";

+

+    pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();

+    if (!pageSentence.endsWith("."))

+      pageSentence += ". ";

+    return pageSentence;

+  }

+

+  

+  public static String normalizeForSentenceSplitting(String pageContent) {

+    pageContent.replace("Jan.", "January").replace("Feb.", "February")

+        .replace("Mar.", "March").replace("Apr.", "April")

+        .replace("Jun.", "June").replace("Jul.", "July")

+        .replace("Aug.", "August").replace("Sep.", "September")

+        .replace("Oct.", "October").replace("Nov.", "November")

+        .replace("Dec.", "December");

+

+    return pageContent;

+

+  }

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MostFrequentWordsFromPageGetter.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MostFrequentWordsFromPageGetter.java
new file mode 100644
index 0000000..b106ac9
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MostFrequentWordsFromPageGetter.java
@@ -0,0 +1,70 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.apps;

+

+import java.util.ArrayList;

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+import java.util.Map.Entry;

+import java.util.Scanner;

+import java.util.TreeMap;

+

+import org.apache.commons.lang.StringUtils;

+

+import opennlp.tools.similarity.apps.utils.ValueSortMap;

+

+public class MostFrequentWordsFromPageGetter {

+	

+	public List<String> getMostFrequentWordsInText(String input)

+	{

+		int maxRes = 4;

+		Scanner in = new Scanner(input);

+        in.useDelimiter("\\s+");

+        Map<String, Integer> words = 

+                new HashMap<String, Integer>();

+        

+        while (in.hasNext()) {

+            String word = in.next();

+            if (!StringUtils.isAlpha(word) || word.length()<4 )

+            	continue;

+            

+            if (!words.containsKey(word)) {

+                words.put(word, 1);

+            } else {

+                words.put(word, words.get(word) + 1);

+            }

+        }

+        

+        words = ValueSortMap.sortMapByValue(words, false);

+        List<String> results = new ArrayList<String>(words.keySet());

+		

+		if (results.size() > maxRes )

+			results = results.subList(0, maxRes); // get maxRes elements

+       

+        return results;

+    }

+	public List<String> getMostFrequentWordsInTextArr(String[] longestSents) {

+		StringBuffer buffer = new StringBuffer();

+		for(String s: longestSents){

+			buffer.append(s);

+		}

+		

+		return getMostFrequentWordsInText(buffer.toString());

+	}

+	

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
new file mode 100644
index 0000000..ce4b600
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
@@ -0,0 +1,184 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.apps;

+

+import java.util.ArrayList;

+import java.util.Collections;

+import java.util.List;

+import java.util.logging.Logger;

+

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.matching.Matcher;

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.HitBaseComparable;

+import opennlp.tools.similarity.apps.WebSearchEngineResultsScraper;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+public class MultiSentenceSearchResultsProcessor  {

+	private static Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.SearchResultsProcessor");

+

+	private WebSearchEngineResultsScraper scraper = new WebSearchEngineResultsScraper();

+	private Matcher matcher = new Matcher();

+	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+	private BingQueryRunner bingSearcher = new BingQueryRunner();

+	private SnippetToParagraph snp = new SnippetToParagraph();

+

+	protected static final int NUM_OF_SEARCH_RESULTS = 10;

+

+	/*

+	 * Takes a search engine API (or scraped) search results and calculates the parse tree similarity

+	 * between the question and each snippet. Ranks those snippets with higher

+	 * similarity score up

+	 */

+

+

+	protected List<HitBase> calculateMatchScoreResortHits(List<HitBase> hits,

+			String searchQuery) {

+

+		List<HitBase> newHitList = new ArrayList<HitBase>();

+		int count = 0;

+		for (HitBase hit : hits) {

+			if (count>10)

+				break;

+			count++;

+			String[] pageSentsAndSnippet = formTextForReRankingFromHit(hit);

+					

+			Double score = 0.0;

+			try {

+				List<List<ParseTreeChunk>> match = null;

+				if (pageSentsAndSnippet!=null && pageSentsAndSnippet[0].length()>50){

+					match = matcher.assessRelevanceCache(pageSentsAndSnippet[0] ,

+							searchQuery);

+					score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

+					hit.setSource(match.toString());

+				}

+				if (score < 2){ // attempt to match with snippet, if not much luck with original text

+					match = matcher.assessRelevanceCache(pageSentsAndSnippet[0] ,

+							searchQuery);

+					score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

+				}

+				LOG.info(score + " | " +pageSentsAndSnippet[1]);

+			} catch (Exception e) {

+				LOG.severe("Problem processing snapshot " + pageSentsAndSnippet[1]);

+				e.printStackTrace();

+			}

+			hit.setGenerWithQueryScore(score);

+			newHitList.add(hit);

+		}

+		

+		System.out.println("\n\n ============= old ORDER ================= ");

+		for (HitBase hit : newHitList) {

+			System.out.println(hit.getOriginalSentences().toString() + " => "+hit.getGenerWithQueryScore());

+			System.out.println("match = "+hit.getSource());

+		}

+		Collections.sort(newHitList, new HitBaseComparable());

+

+		System.out.println("\n\n ============= NEW ORDER ================= ");

+		for (HitBase hit : newHitList) {

+			System.out.println(hit.getOriginalSentences().toString() + " => "+hit.getGenerWithQueryScore());

+			System.out.println("match = "+hit.getSource());

+		}

+

+		return newHitList;

+	}

+

+	protected String[] formTextForReRankingFromHit(HitBase hit) {

+		HitBase hitWithFullSents = snp.formTextFromOriginalPageGivenSnippet(hit);

+		String textFromOriginalPage = "";

+		try {

+			List<String> sents = hitWithFullSents.getOriginalSentences();

+			for(String s: sents){

+				textFromOriginalPage+=s+" ";

+			}

+

+			if (textFromOriginalPage.startsWith(".")){

+				textFromOriginalPage = textFromOriginalPage.substring(2);

+			}

+			textFromOriginalPage = textFromOriginalPage.replace(" . .", ". ").replace(". . ", ". ").

+					replace("..", ". ").trim();

+		} catch (Exception e1) {

+			e1.printStackTrace();

+			LOG.info("Problem processing snapshot "+hit.getAbstractText());

+		}

+		hit.setPageContent(textFromOriginalPage);

+		String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ").replace("<span class='best-phrase'>", " ").replace("<span>", " ").replace("<span>", " ")

+				.replace("<b>", "").replace("</b>", "");

+		snapshot = snapshot.replace("</B>", "").replace("<B>", "")

+				.replace("<br>", "").replace("</br>", "").replace("...", ". ")

+				.replace("|", " ").replace(">", " ").replace(". .", ". ");

+		snapshot += " . " + hit.getTitle();

+		

+		return new String[] { textFromOriginalPage, snapshot };

+	}

+

+	public void close() {

+		// TODO

+		// matcher.close();

+	}

+

+	public List<HitBase> runSearch(String query) {

+

+

+		List<HitBase> hits = scraper.runSearch(query);

+		hits = calculateMatchScoreResortHits(hits, query);

+		return hits;

+	}

+

+

+	public List<HitBase> runSearchViaAPI(String query) {

+		List<String[]> reportData = new ArrayList<String[]>(); 

+		reportData.add(new String[]{query});

+		List<HitBase> hits = null;

+		try {

+			List<HitBase> resultList = bingSearcher.runSearch(query, NUM_OF_SEARCH_RESULTS);

+			reportData.add(convertListHitBaseIntoStringAr(resultList));

+			

+			// now we apply our own relevance filter

+			hits = calculateMatchScoreResortHits(resultList, query);

+			reportData.add(convertListHitBaseIntoStringAr(resultList));

+		} catch (Exception e) {

+			e.printStackTrace();

+			LOG.info("No search results for query '" + query);

+			return null;

+		}

+		ProfileReaderWriter.writeReport(reportData, "resultsForQuery_"+query.replace(' ', '_')+".csv");

+		return hits;

+	}

+	

+	private String[] convertListHitBaseIntoStringAr(List<HitBase> list){

+		List<String> results = new  ArrayList<String>(); 

+		for(HitBase h: list ){

+			results.add(h.getTitle()+ " | "+h.getAbstractText());

+		}

+		return results.toArray(new String[0]);

+	}

+

+	public static void main(String[] args){

+		String query = " I see no meaningful distinction between complacency or complicity in the military's latest failure to uphold their own " +

+				"standards of conduct. Nor do I see a distinction between the service member who orchestrated this offense and the chain of " +

+				"command that was either oblivious to or tolerant of criminal behavior";

+

+		new MultiSentenceSearchResultsProcessor().runSearchViaAPI(query);

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java
new file mode 100644
index 0000000..dd7eaf7
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java
@@ -0,0 +1,382 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.apps;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collections;

+import java.util.Comparator;

+import java.util.List;

+import java.util.logging.Logger;

+

+import org.apache.commons.lang.StringUtils;

+

+

+import opennlp.tools.similarity.apps.ContentGeneratorSupport;

+import opennlp.tools.similarity.apps.Fragment;

+import opennlp.tools.similarity.apps.GeneratedSentenceProcessor;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.RelatedSentenceFinder;

+import opennlp.tools.similarity.apps.utils.PageFetcher;

+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

+import opennlp.tools.similarity.apps.utils.Utils;

+import opennlp.tools.textsimilarity.TextProcessor;

+

+

+public class SnippetToParagraph extends ContentGeneratorSupport /*RelatedSentenceFinder */{

+	private PageFetcher pFetcher = new PageFetcher();

+	private static Logger LOG = Logger

+			.getLogger("com.become.parse_thicket.apps.SnippetToParagraph");

+

+	public HitBase formTextFromOriginalPageGivenSnippetDirect(HitBase item) {

+

+		// put orig sentence in structure

+		List<String> origs = new ArrayList<String>();

+

+		item.setOriginalSentences(origs);

+		String title = item.getTitle().replace("<b>", " ").replace("</b>", " ")

+				.replace("  ", " ").replace("  ", " ");

+		// generation results for this sentence

+		List<Fragment> result = new ArrayList<Fragment>();

+		// form plain text from snippet

+		String snapshot = item.getAbstractText().replace("<b>", " ")

+				.replace("</b>", " ").replace("  ", " ").replace("  ", " ");

+

+		String snapshotMarked = snapshot.replace("...",

+				" _should_find_orig_ . _should_find_orig_");

+		List<String> fragments = TextProcessor.splitToSentences(snapshotMarked);

+		List<String> allFragms = new ArrayList<String>();

+		allFragms.addAll(fragments);

+

+		List<String> sents = new ArrayList<String>();

+		String downloadedPage;

+		try {

+			if (snapshotMarked.length() != snapshot.length()) {

+				downloadedPage = pFetcher.fetchPage(item.getUrl());

+				if (downloadedPage != null && downloadedPage.length() > 100) {

+					item.setPageContent(downloadedPage);

+					String pageContent = Utils.fullStripHTML(item.getPageContent());

+					pageContent = GeneratedSentenceProcessor

+							.normalizeForSentenceSplitting(pageContent);

+					pageContent = pageContent.trim().replaceAll("  [A-Z]", ". $0")// .replace("  ",

+							// ". ")

+							.replace("..", ".").replace(". . .", " ").trim(); // sometimes

+					// html breaks

+					// are converted

+					// into ' ' (two

+					// spaces), so

+					// we need to

+					// put '.'

+					sents = TextProcessor.splitToSentences(pageContent);

+

+				}

+			}

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			// e.printStackTrace();

+			System.err

+			.println("Problem downloading  the page and splitting into sentences");

+			return item;

+		}

+

+		for (String fragment : allFragms) {

+			String followSent = null;

+			if (fragment.length() < 50)

+				continue;

+			String pageSentence = "";

+			// try to find original sentence from webpage

+			if (fragment.indexOf("_should_find_orig_") > -1 && sents != null

+					&& sents.size() > 0)

+				try {

+					String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(

+							fragment.replace("_should_find_orig_", ""), (String[])sents.toArray(new String[]{}));

+					pageSentence = mainAndFollowSent[0];

+					followSent = mainAndFollowSent[1];

+

+				} catch (Exception e) {

+

+					// TODO Auto-generated catch block

+					e.printStackTrace();

+				}

+			else

+				// or get original snippet

+				pageSentence = fragment;

+			if (pageSentence != null)

+				pageSentence = pageSentence.replace("_should_find_orig_", "");

+			String pageSentenceProc = GeneratedSentenceProcessor

+					.acceptableMinedSentence(pageSentence);

+			if (pageSentenceProc != null) {

+				pageSentenceProc = GeneratedSentenceProcessor

+						.processSentence(pageSentenceProc);

+				if (followSent != null) {

+					pageSentenceProc += " "

+							+ GeneratedSentenceProcessor.processSentence(followSent);

+				}

+

+				pageSentenceProc = Utils.convertToASCII(pageSentenceProc);

+				Fragment f = new Fragment(pageSentenceProc, 1);

+				f.setSourceURL(item.getUrl());

+				f.fragment = fragment;

+				result.add(f);

+				System.out.println("Accepted sentence: " + pageSentenceProc

+						+ "| with title= " + title);

+				System.out.println("For fragment = " + fragment);

+			} else

+				System.out

+				.println("Rejected sentence due to wrong area at webpage: "

+						+ pageSentence);

+		} 

+

+

+		item.setFragments(result);

+		return item;

+	}

+

+	public HitBase formTextFromOriginalPageGivenSnippet(HitBase item) {

+

+		String[] sents = extractSentencesFromPage(item.getUrl());

+

+		String title = item.getTitle().replace("<b>", " ").replace("</b>", " ")

+				.replace("  ", " ").replace("  ", " ");

+		// generation results for this sentence

+		List<String> result = new ArrayList<String>();

+		// form plain text from snippet

+		String snapshot = item.getAbstractText().replace("<b>", " ")

+				.replace("</b>", " ").replace("  ", " ").replace("  ", " ").replace("\"", "");

+

+		String snapshotMarked = snapshot.replace(" ...", ".");

+		List<String> fragments = TextProcessor.splitToSentences(snapshotMarked);

+		if (fragments.size()<3 && StringUtils.countMatches(snapshotMarked, ".")>1){

+			snapshotMarked = snapshotMarked.replace("..", "&").replace(".", "&");

+			String[] fragmSents = snapshotMarked.split("&");

+			fragments = Arrays.asList(fragmSents);

+		}

+

+		for (String f : fragments) {

+			String followSent = null;

+			if (f.length() < 50)

+				continue;

+			String pageSentence = "";

+			// try to find original sentence from webpage

+

+			try {

+				String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(

+						f, sents);

+				pageSentence = mainAndFollowSent[0];

+				followSent = mainAndFollowSent[1];

+				if (pageSentence!=null)

+					result.add(pageSentence);

+				else {

+					result.add(f);

+					LOG.info("Could not find the original sentence \n"+f +"\n in the page " );

+				}

+				//if (followSent !=null)

+				//	result.add(followSent);

+			} catch (Exception e) {

+

+				e.printStackTrace();

+			}

+		}

+		item.setOriginalSentences(result);

+		return item;

+	}

+

+	public  List<String> cleanListOfSents(List<String> sents) {

+		List<String> sentsClean = new ArrayList<String>();

+		for (String s : sents) {

+			if (s == null || s.trim().length() < 30 || s.length() < 20)

+				continue;

+			sentsClean.add(s);

+		}

+		return sentsClean;

+	}

+

+

+

+	private String[] removeDuplicates(String[] hits)

+	{

+		StringDistanceMeasurer meas = new StringDistanceMeasurer();

+

+		List<Integer> idsToRemove = new ArrayList<Integer>();

+		List<String> hitsDedup = new ArrayList<String>();

+		try

+		{

+			for (int i = 0; i < hits.length; i++)

+				for (int j = i + 1; j < hits.length; j++)

+				{

+					String title1 = hits[i];

+					String title2 = hits[j];

+					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

+						continue;

+					if (meas.measureStringDistance(title1, title2) > 0.7)

+					{

+						idsToRemove.add(j); // dupes found, later list member to

+						// be deleted

+					}

+				}

+			for (int i = 0; i < hits.length; i++)

+				if (!idsToRemove.contains(i))

+					hitsDedup.add(hits[i]);

+			if (hitsDedup.size() < hits.length)

+			{

+				System.out.println("Removed duplicates from relevant search results, including "

+						+ hits[idsToRemove.get(0)]);

+			}

+		}

+		catch (Exception e)

+		{

+			System.out.println("Problem removing duplicates from relevant images");

+		}

+

+		return hitsDedup.toArray(new String[0]);

+

+	}

+

+	public String[] extractSentencesFromPage(String url)

+	{

+

+		int maxSentsFromPage= 100;

+		List<String[]> results = new ArrayList<String[]>();

+

+		String downloadedPage = pFetcher.fetchPage(url, 20000);

+		if (downloadedPage == null || downloadedPage.length() < 100)

+		{

+			return null;

+		}

+

+		String pageOrigHTML = pFetcher.fetchOrigHTML(url);

+

+		downloadedPage= downloadedPage.replace("     ", "&");

+		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");

+		String[] sents = downloadedPage.split("#");

+		List<TextChunk> sentsList = new ArrayList<TextChunk>();

+		for(String s: sents){

+			s = s.trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")

+					.replace(": ", ". ").replace("- ", ". ").

+					replace (". .",".").trim();

+			sentsList.add(new TextChunk(s, s.length()));

+		}

+

+		Collections.sort(sentsList, new TextChunkComparable());

+		String[] longestSents = new String[maxSentsFromPage];

+		int j=0;

+		int initIndex = sentsList.size()-1 -maxSentsFromPage;

+		if (initIndex<0)

+			initIndex = 0;

+		for(int i=initIndex; i< sentsList.size() && j<maxSentsFromPage ; i++){

+			longestSents[j] = sentsList.get(i).text;

+			j++;

+		}

+

+		sents = cleanSplitListOfSents(longestSents);

+

+		//sents = removeDuplicates(sents);

+		//sents = verifyEnforceStartsUpperCase(sents);

+

+		return sents;

+	}

+	

+	protected String[] cleanSplitListOfSents(String[] longestSents){

+	float minFragmentLength = 40, minFragmentLengthSpace=4;

+

+	List<String> sentsClean = new ArrayList<String>();

+	for (String sentenceOrMultSent : longestSents)

+	{

+		if (sentenceOrMultSent==null || sentenceOrMultSent.length()<20)

+			continue;

+		if (GeneratedSentenceProcessor.acceptableMinedSentence(sentenceOrMultSent)==null){

+			System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);

+			continue;

+		}

+		// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.

+		int numOfDots = sentenceOrMultSent.replace('.','&').split("&").length;

+		float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+		if ( avgSentenceLengthInTextPortion<minFragmentLength)

+			continue;

+		// o oo o ooo o o o ooo oo ooo o o oo

+		numOfDots = sentenceOrMultSent.replace(' ','&').split("&").length;

+		avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+		if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)

+			continue;

+

+		List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);

+		

+		// forced split by ',' somewhere in the middle of sentence

+		// disused - Feb 26 13

+		//furtherSplit = furtherMakeSentencesShorter(furtherSplit);

+		furtherSplit.remove(furtherSplit.size()-1);

+		for(String s : furtherSplit){

+			if (s.indexOf('|')>-1)

+				continue;

+			s = s.replace("<em>"," ").replace("</em>"," ");

+			s = Utils.convertToASCII(s);

+			sentsClean.add(s);

+		}

+	}

+

+	return (String[]) sentsClean.toArray(new String[0]);

+}

+	private String[] verifyEnforceStartsUpperCase(String[] sents) {

+		for(int i=0; i<sents.length; i++){

+			String s = sents[i];

+			s = StringUtils.trim(s);

+			String sFirstChar = s.substring(0, 1);

+			if (!sFirstChar.toUpperCase().equals(sFirstChar)){

+				s = sFirstChar.toUpperCase()+s.substring(1);

+			}

+			sents[i] = s;

+		}

+		return sents;

+	}

+

+	private List<String> cleanProductFeatures(List<String> productFeaturesList) {

+		List<String> results = new ArrayList<String>();

+		for(String feature: productFeaturesList){

+			if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)

+				continue;

+			results.add(feature);

+		}

+		return results;

+	}

+	public class TextChunk {

+		public TextChunk(String s, int length) {

+			this.text = s;

+			this.len = length;

+		}

+		public String text;

+		public int len;

+	}

+

+	public class TextChunkComparable implements Comparator<TextChunk>

+	{

+		public int compare(TextChunk ch1, TextChunk ch2)

+		{

+			if (ch1.len>ch2.len)

+				return 1;

+			else if (ch1.len<ch2.len)

+				return  -1;

+			else return 0;

+

+		}

+	}

+	

+	public static void main(String[] args){

+		

+	}

+

+}

+

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java
new file mode 100644
index 0000000..038fcfc
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java
@@ -0,0 +1,147 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.apps;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collections;

+import java.util.List;

+

+import opennlp.tools.similarity.apps.GeneratedSentenceProcessor;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

+import opennlp.tools.similarity.apps.utils.Utils;

+import opennlp.tools.textsimilarity.TextProcessor;

+

+import org.apache.commons.lang.StringUtils;

+

+public class WebPageContentSentenceExtractor extends WebPageExtractor {

+	

+	

+	

+

+	public List<String> extractSentencesWithPotentialReviewPhrases(String url)

+	{

+		int maxSentsFromPage = 30;

+		String downloadedPage = pageFetcher.fetchPage(url, 20000);

+		if (downloadedPage == null || downloadedPage.length() < 100)

+		{

+			return null;

+		}

+		downloadedPage= downloadedPage.replace("     ", "&");

+		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");

+		String[] sents = downloadedPage.split("#");

+		List<TextChunk> sentsList = new ArrayList<TextChunk>();

+		for(String s: sents){

+			s = s.trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")

+					.replace(": ", ". ").replace("- ", ". ").

+					replace (". .",".").trim();

+			sentsList.add(new TextChunk(s, s.length()));

+		}

+		

+		Collections.sort(sentsList, new TextChunkComparable());

+		

+		String[] longestSents = new String[maxSentsFromPage];

+		int j=0;														// -1 removed

+		for(int i=sentsList.size()-1 -maxSentsFromPage; i< sentsList.size()-1; i++){

+			longestSents[j] = sentsList.get(i).text;

+			j++;

+		}

+

+		sents = cleanListOfSents(longestSents);

+	/*	

+		for(int i = 0; i< sents.length; i++){

+			sents[i] = sents[i].trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")

+					.replace(": ", ". ").replace("- ", ". ").

+					replace (". .",".").trim();

+		}

+		sents = cleanListOfSents(sents);

+	*/	sents = verifyEnforceStartsUpperCase(sents);

+

+		return Arrays.asList(sents);

+	}

+

+	private String[] verifyEnforceStartsUpperCase(String[] sents) {

+		for(int i=0; i<sents.length; i++){

+			String s = sents[i];

+			s = StringUtils.trim(s);

+			String sFirstChar = s.substring(0, 1);

+			if (!sFirstChar.toUpperCase().equals(sFirstChar)){

+				s = sFirstChar.toUpperCase()+s.substring(1);

+			}

+			sents[i] = s;

+		}

+			return sents;

+	}

+

+	private List<String> cleanProductFeatures(List<String> productFeaturesList) {

+		List<String> results = new ArrayList<String>();

+		for(String feature: productFeaturesList){

+			if (feature.startsWith("Unlimited Free") || feature.startsWith("View Larger") || feature.startsWith("View Larger") || feature.indexOf("shipping")>0)

+				continue;

+			results.add(feature);

+		}

+		return results;

+	}

+

+	// extracts paragraphs from web page

+	protected String[] cleanListOfSents(String[] longestSents)

+	{

+		float minFragmentLength = 40, minFragmentLengthSpace=4;

+

+		List<String> sentsClean = new ArrayList<String>();

+		for (String sentenceOrMultSent : longestSents)

+		{

+			if (GeneratedSentenceProcessor.acceptableMinedSentence(sentenceOrMultSent)==null){

+				System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);

+				continue;

+			}

+			// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.

+			int numOfDots = sentenceOrMultSent.replace('.','&').split("&").length;

+			float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+			if ( avgSentenceLengthInTextPortion<minFragmentLength)

+				continue;

+			// o oo o ooo o o o ooo oo ooo o o oo

+			numOfDots = sentenceOrMultSent.replace(' ','&').split("&").length;

+			avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+			if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)

+				continue;

+

+			sentsClean.add(sentenceOrMultSent);

+		}

+

+		return (String[]) sentsClean.toArray(new String[0]);

+	}

+

+	

+

+	private String startWithCapitalSent(String sent) {

+		String firstChar = sent.substring(0,1);

+		String remainder = sent.substring(1);

+		

+		return firstChar.toUpperCase()+remainder;

+	}

+

+	public HitBase formTextFromOriginalPageGivenSnippet(HitBase hit) {

+		List<String> results = extractSentencesWithPotentialReviewPhrases(hit.getUrl());

+		hit.setOriginalSentences(results);

+		return hit;

+	}

+

+	

+	

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
new file mode 100644
index 0000000..b91f5cb
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
@@ -0,0 +1,158 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.apps;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collections;

+import java.util.Comparator;

+import java.util.List;

+

+import org.apache.commons.lang.StringUtils;

+

+import opennlp.tools.similarity.apps.GeneratedSentenceProcessor;

+import opennlp.tools.similarity.apps.utils.PageFetcher;

+import opennlp.tools.textsimilarity.TextProcessor;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+public class WebPageExtractor

+{

+	protected PageFetcher pageFetcher = new PageFetcher();

+	

+	protected ParserChunker2MatcherProcessor nlProc;

+	protected MostFrequentWordsFromPageGetter mostFrequentWordsFromPageGetter = new MostFrequentWordsFromPageGetter();

+

+	protected static int sentThresholdLength = 70;

+

+	public List<String[]> extractSentencesWithPotentialProductKeywords(String url)

+	{

+		int maxSentsFromPage= 20;

+		List<String[]> results = new ArrayList<String[]>();

+

+		String downloadedPage = pageFetcher.fetchPage(url, 20000);

+		if (downloadedPage == null || downloadedPage.length() < 100)

+		{

+			return null;

+		}

+

+		String pageOrigHTML = pageFetcher.fetchOrigHTML(url);

+		String pageTitle = StringUtils.substringBetween(pageOrigHTML, "<title>", "</title>" );

+		pageTitle = pageTitle.replace("  ", ". ").replace("..", ".").replace(". . .", " ")

+				.replace(": ", ". ").replace("- ", ". ").replace(" |", ". ").

+				replace (". .",".").trim();

+		List<String> pageTitles = new ArrayList<String>();

+		pageTitles.addAll(TextProcessor.splitToSentences(pageTitle));

+		pageTitles.addAll(Arrays.asList(pageTitle.split(".")));

+

+		String[] headerSections = pageOrigHTML.split("<h2");

+		if (headerSections.length<2)

+			headerSections = pageOrigHTML.split("<h3");

+		for(String section: headerSections){

+

+			String header = StringUtils.substringBetween(section, ">", "<");

+			if (header!=null && header.length()>20)

+				pageTitles.add(header);

+		}

+

+

+		downloadedPage= downloadedPage.replace("     ", "&");

+		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");

+		String[] sents = downloadedPage.split("#");

+		List<TextChunk> sentsList = new ArrayList<TextChunk>();

+		for(String s: sents){

+			s = s.trim().replace("  ", ". ").replace("..", ".").replace(". . .", " ")

+					.replace(": ", ". ").replace("- ", ". ").

+					replace (". .",".").trim();

+			sentsList.add(new TextChunk(s, s.length()));

+		}

+

+		Collections.sort(sentsList, new TextChunkComparable());

+

+

+		String[] longestSents = new String[maxSentsFromPage];

+		int j=0;

+		for(int i=sentsList.size() -maxSentsFromPage; i< sentsList.size(); i++){

+			longestSents[j] = sentsList.get(i).text;

+			j++;

+		}

+

+		sents = cleanListOfSents(longestSents);

+

+		List<String>  mosFrequentWordsListFromPage = mostFrequentWordsFromPageGetter. getMostFrequentWordsInTextArr(sents);

+		// mostFrequentWordsFromPageGetter. getMostFrequentWordsInText(downloadedPage);

+

+		results.add(pageTitles.toArray(new String[0]));

+		results.add(mosFrequentWordsListFromPage.toArray(new String[0]));

+		results.add(sents);

+

+		return results;

+	}

+

+	protected String[] cleanListOfSents(String[] longestSents)

+	{

+		List<String> sentsClean = new ArrayList<String>();

+		for (String sentenceOrMultSent : longestSents)

+		{

+			List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);

+			for(String s : furtherSplit){

+				if (s.replace('.','&').split("&").length>3)

+					continue;

+				if (s.indexOf('|')>-1)

+					continue;

+				if (s == null || s.trim().length() < sentThresholdLength || s.length() < sentThresholdLength + 10)

+					continue;

+				if (GeneratedSentenceProcessor.acceptableMinedSentence(s)==null){

+					System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+s);

+					continue;

+				}

+				sentsClean.add(s);

+			}

+		}

+		return (String[]) sentsClean.toArray(new String[0]);

+	}

+

+	public class TextChunk {

+		public TextChunk(String s, int length) {

+			this.text = s;

+			this.len = length;

+		}

+		public String text;

+		public int len;

+	}

+

+	public class TextChunkComparable implements Comparator<TextChunk>

+	{

+		public int compare(TextChunk ch1, TextChunk ch2)

+		{

+			if (ch1.len>ch2.len)

+				return 1;

+			else if (ch1.len<ch2.len)

+				return  -1;

+			else return 0;

+

+		}

+	}

+	

+	public static void main(String[] args){

+		WebPageExtractor extractor = new WebPageExtractor();

+		List<String[]> res = 

+				extractor.extractSentencesWithPotentialProductKeywords("http://www.sitbetter.com/view/chair/ofm-500-l/ofm--high-back-leather-office-chair/");

+		System.out.println(res.get(1));

+		

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilder.java
new file mode 100644
index 0000000..aea85b5
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilder.java
@@ -0,0 +1,163 @@
+package opennlp.tools.parse_thicket.communicative_actions;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.parse_thicket.IGeneralizer;

+import opennlp.tools.parse_thicket.Pair;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+

+

+public class CommunicativeActionsArcBuilder implements IGeneralizer<Pair<String, Integer[]>>{

+

+	private List<Pair<String, Integer[]>> commActionsAttr = new ArrayList<Pair<String, Integer[]>>();

+	public CommunicativeActionsArcBuilder(){

+

+		commActionsAttr.add(new Pair<String, Integer[]>("agree", new Integer[]{	1,	-1,	-1,	1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("accept", new Integer[]{	1,	-1,	-1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("explain", new Integer[]{	0,	-1,	1,	1,	-1}));

+

+		commActionsAttr.add(new Pair<String, Integer[]>("suggest", new Integer[]{	1,	0,	1,	-1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("claim", new Integer[]{	1,	0,	1,	-1,	-1}));

+

+		// bring-attention

+		commActionsAttr.add(new Pair<String, Integer[]>("bring_attention", new Integer[]{	1,	1,	1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("remind", new Integer[]{	-1,	0,	1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("allow", new Integer[]{	1,	-1,	-1,	-1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("try", new Integer[]{	1,	0,	-1,	-1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("request", new Integer[]{	0,	1,	-1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("understand", new Integer[]{	0,	-1,	-1,	1,	-1}));

+

+		commActionsAttr.add(new Pair<String, Integer[]>("inform", new Integer[]{	0,	0,	1,	1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("notify", new Integer[]{	0,	0,	1,	1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("report", new Integer[]{	0,	0,	1,	1,	-1}));

+

+

+		commActionsAttr.add(new Pair<String, Integer[]>("confirm", new Integer[]{	0,	-1,	1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("ask", new Integer[]{	0,	1,	-1,	-1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("check", new Integer[]{	-1,	1,	-1,	-1,	1}));

+

+		commActionsAttr.add(new Pair<String, Integer[]>("ignore", new Integer[]{	-1,	-1,	-1,	-1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("wait", new Integer[]{	-1,	-1,	-1,	-1,	1}));

+

+		commActionsAttr.add(new Pair<String, Integer[]>("convince", new Integer[]{	0,	1,	1,	1, -1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("disagree", new Integer[]{	-1,	-1,	-1,	1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("appeal", new Integer[]{	-1,	1,	1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("deny", new Integer[]{	-1,	-1,	-1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("threaten", new Integer[]{	-1,	1, -1,	1,	1}));

+

+		commActionsAttr.add(new Pair<String, Integer[]>("concern", new Integer[]{	1,	-1, -1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("afraid", new Integer[]{	1,	-1, -1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("worri", new Integer[]{	1,	-1, -1,	1,	1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("scare", new Integer[]{	1,	-1, -1,	1,	1}));

+

+		commActionsAttr.add(new Pair<String, Integer[]>("want", new Integer[]{	1,	0,	-1,	-1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("know", new Integer[]{	0,	-1,	-1,	1,	-1}));

+		commActionsAttr.add(new Pair<String, Integer[]>("believe", new Integer[]{	0,	-1,	-1,	1,	-1}));

+	}

+

+	public Pair<String, Integer[]> findCAInSentence(List<ParseTreeNode> sentence){

+		for(ParseTreeNode node: sentence){

+			for(Pair<String, Integer[]> ca: commActionsAttr){

+				String lemma = (String)ca.getFirst();

+				// canonical form lemma is a sub-string of an actual form in parseTreeNode

+				if (node.getWord().toLowerCase().startsWith(lemma))

+					return ca;

+			}

+		}

+		return null;

+	}

+

+	public int findCAIndexInSentence(List<ParseTreeNode> sentence){

+		for(int index = 1; index< sentence.size(); index++){

+			ParseTreeNode node = sentence.get(index);

+			for(Pair<String, Integer[]> ca: commActionsAttr){

+				String lemma = (String)ca.getFirst();

+				String[] lemmas = lemma.split("_");

+				if (lemmas==null || lemmas.length<2){

+					if (node.getWord().toLowerCase().startsWith(lemma))

+						return index;

+				} else { //multiword matching 

+					for(int indexM= index+1; indexM<sentence.size(); indexM++);//

+				}

+				

+			}

+		}

+		return -1;

+	}

+

+

+	public List<Pair<String, Integer[]>> generalize(Object o1, Object o2) {

+		List<Pair<String, Integer[]>> results = new ArrayList<Pair<String, Integer[]>>();

+

+

+		String ca1 = null, ca2=null;

+

+		if (o1 instanceof String){

+			ca1 = (String)o1;

+			ca2 = (String)o2;

+		} else {			

+			ca1 = ((Pair<String, Integer[]>)o1).getFirst();

+			ca2 = ((Pair<String, Integer[]>)o2).getFirst();

+		}

+

+

+		// find entry for ca1

+		Pair<String, Integer[]> caP1=null, caP2=null;

+		for(Pair<String, Integer[]> ca: commActionsAttr){

+			String lemma = (String)ca.getFirst();

+			if (lemma.equals(ca1)){

+				caP1=ca;

+				break;

+			}					

+		}

+

+		// find entry for ca2

+		for(Pair<String, Integer[]> ca: commActionsAttr){

+			String lemma = (String)ca.getFirst();

+			if (lemma.equals(ca2)){

+				caP2=ca;

+				break;

+			}					

+		}

+

+		if (ca1.equals(ca2)){

+			results.add(caP1);

+		} else {

+			// generalization of int arrays also implements IGeneralizer

+			// we take Integer[] which is a first element of as resultant list

+			Integer[] res = new CommunicativeActionsAttribute().

+					generalize(caP1.getSecond(), caP2.getSecond()).get(0);

+			results.add(new Pair<String, Integer[]>("", res ));

+		}

+

+		return results;

+	}

+

+

+

+

+	/*Pair<String, Integer[]>[] commActionsAttrAr = new Pair<String, Integer[]>[] {

+			new Pair<String, Integer[]>("agree", new Integer[]{	1,	-1,	-1,	1,	-1}),

+			new Pair<String, Integer[]>("accept", new Integer[]{	1,	-1,	-1,	1,	1}),

+			new Pair<String, Integer[]>("explain", new Integer[]{	0,	-1,	1,	1,	-1}),

+			new Pair<String, Integer[]>("suggest", new Integer[]{	1,	0,	1,	-1,	-1}),

+			new Pair<String, Integer[]>("bring attention", new Integer[]{	1,	1,	1,	1,	1}),

+			new Pair<String, Integer[]>("remind", new Integer[]{	-1,	0,	1,	1,	1}),

+		    new Pair<String, Integer[]>("allow", new Integer[]{	1,	-1,	-1,	-1,	-1}),

+			new Pair<String, Integer[]>("try", new Integer[]{	1,	0,	-1,	-1,	-1}),

+			new Pair<String, Integer[]>("request", new Integer[]{	0,	1,	-1,	1,	1}),

+			new Pair<String, Integer[]>("understand", new Integer[]{	0,	-1,	-1,	1,	-1}),

+			new Pair<String, Integer[]>("inform", new Integer[]{	0,	0,	1,	1,	-1}),

+			new Pair<String, Integer[]>("confirm", new Integer[]{	0,	-1,	1,	1,	1}),

+			new Pair<String, Integer[]>("ask", new Integer[]{	0,	1,	-1,	-1,	-1}),

+			new Pair<String, Integer[]>("check", new Integer[]{	-1,	1,	-1,	-1,	1}),

+			new Pair<String, Integer[]>("ignore", new Integer[]{	-1,	-1,	-1,	-1,	1}),

+			new Pair<String, Integer[]>("convince", new Integer[]{	0,	1,	1,	1, -1}),

+			new Pair<String, Integer[]>("disagree", new Integer[]{	-1,	-1,	-1,	1,	-1}),

+			new Pair<String, Integer[]>("appeal", new Integer[]{	-1,	1,	1,	1,	1}),

+			new Pair<String, Integer[]>("deny", new Integer[]{	-1,	-1,	-1,	1,	1}),

+			new Pair<String, Integer[]>("threaten", new Integer[]{	-1,	1, -1,	1,	1}),	

+	} */

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsAttribute.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsAttribute.java
new file mode 100644
index 0000000..24bda54
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsAttribute.java
@@ -0,0 +1,29 @@
+package opennlp.tools.parse_thicket.communicative_actions;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.parse_thicket.IGeneralizer;

+

+

+public class CommunicativeActionsAttribute implements IGeneralizer<Integer[]>{

+

+	public List<Integer[]> generalize(Object intArr1ob, Object intArr2ob) {

+		Integer[] arr1 = (Integer[])intArr1ob, arr2 = (Integer[])intArr2ob;

+		Integer[] result = new Integer[arr2.length];

+		for(int i=0; i< arr2.length; i++ ){

+			if (arr1[i].equals(arr2[i]))

+				result[i] = arr1[i];

+			else if ((arr1[i]<0 && arr2[i]>0) || (arr1[i]>0 && arr2[i]<0)){

+				result[i]=0;

+			} else if (arr1[i]==0)

+				result[i]=arr2[i];

+			else if (arr2[i]==0)

+				result[i]=arr1[i];

+		}

+		List<Integer[]> results = new ArrayList<Integer[]>();

+		results.add(result);

+		return results;

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
new file mode 100644
index 0000000..eb67724
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
@@ -0,0 +1,155 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.kernel_interface;

+

+import java.io.File;

+import java.util.ArrayList;

+import java.util.Collections;

+import java.util.List;

+import java.util.logging.Logger;

+

+import edu.stanford.nlp.trees.Tree;

+import edu.stanford.nlp.util.StringUtils;

+

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.apps.MultiSentenceSearchResultsProcessor;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph;

+import opennlp.tools.parse_thicket.apps.WebPageContentSentenceExtractor;

+import opennlp.tools.parse_thicket.matching.Matcher;

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.HitBaseComparable;

+import opennlp.tools.similarity.apps.WebSearchEngineResultsScraper;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+public class MultiSentenceExtendedForestSearchResultsProcessorSetFormer  extends MultiSentenceKernelBasedSearchResultsProcessor{

+	private static Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.MultiSentenceKernelBasedExtendedForestSearchResultsProcessor");

+	protected TreeExtenderByAnotherLinkedTree treeExtender = new TreeExtenderByAnotherLinkedTree();

+	

+	private TreeKernelRunner tkRunner = new TreeKernelRunner();

+	

+	protected static final String modelFileName = "model.txt";

+

+	private static final String trainingFileName = "training.txt";

+

+	protected static final String unknownToBeClassified = "unknown.txt";

+

+	private static final String classifierOutput = "classifier_output.txt";

+	

+	private String path;

+	public void setKernelPath (String path){

+		this.path=path;

+	}

+	

+	WebPageContentSentenceExtractor extractor = new WebPageContentSentenceExtractor();

+	

+	private List<HitBase> formTreeForestDataSet(

+			List<HitBase> hits, String query, boolean isPositive) {

+		List<HitBase> newHitList = new ArrayList<HitBase>(), newHitListReRanked = new ArrayList<HitBase>();

+		// form the training set from original documets. Since search results are ranked, we set the first half as positive set,

+		//and the second half as negative set.

+		// after re-classification, being re-ranked, the search results might end up in a different set

+		List<String[]> treeBankBuffer = new ArrayList<String[]>();

+		int count = 0;

+		for (HitBase hit : hits) {

+			count++;

+			// if orig content has been already set in HIT object, ok; otherwise set it

+			String searchResultText = hit.getPageContent();

+			if (searchResultText ==null){

+				try {

+					HitBase hitWithFullSents = extractor.formTextFromOriginalPageGivenSnippet(hit);

+					for(String paragraph: hitWithFullSents.getOriginalSentences()){

+						List<String[]> res = formTreeKernelStructure(paragraph, count, hits,  isPositive);

+						for(String[] rl : res){

+							StringUtils.printToFile(new File(path+trainingFileName), rl[0]+" \n", true);

+						}

+						//treeBankBuffer.addAll(res);

+					}

+				} catch (Exception e) {

+					e.printStackTrace();

+				}

+				

+			}			

+			newHitList.add(hit);

+			

+			

+		}	

+		// write the lits of samples to a file

+		ProfileReaderWriter.appendReport(treeBankBuffer, path+trainingFileName, ' ');

+		return newHitList;

+

+	}

+	

+	protected List<String[]> formTreeKernelStructure(String searchResultText, int count, List<HitBase> hits, boolean isPositive) {

+		List<String[]> treeBankBuffer = new ArrayList<String[]> ();

+		try {

+			// get the parses from original documents, and form the training dataset

+			ParseThicket pt = matcher.buildParseThicketFromTextWithRST(searchResultText);

+			List<Tree> forest = pt.getSentences();

+			// if from the first half or ranked docs, then positive, otherwise negative

+			String posOrNeg = null;

+			if (isPositive)

+				posOrNeg=" 1 ";

+			else 

+				posOrNeg=" -1 ";

+			// form the list of training samples

+			for(Tree t: forest){

+				treeBankBuffer.add(new String[] {posOrNeg+" |BT| "+t.toString()+ " |ET|"});

+			}

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		return treeBankBuffer;

+	}

+	

+	public List<HitBase> runSearchViaAPI(String query, Boolean isPositive) {

+		

+		try {

+			List<HitBase> hits = bingSearcher.runSearch(query, 20, true);

+			formTreeForestDataSet(hits, query, isPositive);

+

+		} catch (Exception e) {

+			e.printStackTrace();

+			LOG.info("No search results for query '" + query);

+			return null;

+		}

+

+

+		return null;

+	}

+	public static void main(String[] args){

+		String query = "digital camera for my mother as a gift";

+		Boolean isPositive = true;

+		if (args!=null && args.length>0){

+			query = args[0];

+			if (args.length>1 && args[1]!=null && args[1].startsWith("neg"))

+				isPositive = false;

+		}

+		

+		MultiSentenceExtendedForestSearchResultsProcessorSetFormer proc = new MultiSentenceExtendedForestSearchResultsProcessorSetFormer();

+		proc.setKernelPath("C:\\stanford-corenlp\\tree_kernel_big\\");

+		proc.runSearchViaAPI(query, isPositive);

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedExtendedForestSearchResultsProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedExtendedForestSearchResultsProcessor.java
new file mode 100644
index 0000000..1b2790f
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedExtendedForestSearchResultsProcessor.java
@@ -0,0 +1,92 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.kernel_interface;

+

+import java.util.ArrayList;

+import java.util.Collections;

+import java.util.List;

+import java.util.logging.Logger;

+

+import edu.stanford.nlp.trees.Tree;

+

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.apps.MultiSentenceSearchResultsProcessor;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph;

+import opennlp.tools.parse_thicket.matching.Matcher;

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.HitBaseComparable;

+import opennlp.tools.similarity.apps.WebSearchEngineResultsScraper;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+public class MultiSentenceKernelBasedExtendedForestSearchResultsProcessor  extends MultiSentenceKernelBasedSearchResultsProcessor{

+	private static Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.MultiSentenceKernelBasedExtendedForestSearchResultsProcessor");

+	protected TreeExtenderByAnotherLinkedTree treeExtender = new TreeExtenderByAnotherLinkedTree();

+	

+	

+	

+

+	protected List<String[]> formTreeKernelStructure(String searchResultText, int count, List<HitBase> hits) {

+		List<String[]> treeBankBuffer = new ArrayList<String[]> ();

+		try {

+			// get the parses from original documents, and form the training dataset

+			ParseThicket pt = matcher.buildParseThicketFromTextWithRST(searchResultText);

+			List<String> extendedTreesDump = treeExtender.buildForestForCorefArcs(pt);

+			// if from the first half or ranked docs, then positive, otherwise negative

+			String posOrNeg = null;

+			if (count<hits.size()/2)

+				posOrNeg=" 1 ";

+			else 

+				posOrNeg=" -1 ";

+			// form the list of training samples

+			for(String t: extendedTreesDump){

+				treeBankBuffer.add(new String[] {posOrNeg+" |BT| "+t+ " |ET|"});

+			}

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		return treeBankBuffer;

+	}

+

+	public static void main(String[] args){

+		String query = null;

+		

+		/*" I see no meaningful distinction between complacency or complicity in the military's latest failure to uphold their own " +

+				"standards of conduct. Nor do I see a distinction between the service member who orchestrated this offense and the chain of " +

+				"command that was either oblivious to or tolerant of criminal behavior";

+		

+		query = "I am now living abroad and have health insurance from Russia. How can I avoid penalty for not having health insurance in US";

+		

+		query = "ECUADOR'S PRESIDENT RAFAEL CORREA SAYS U.S. VP JOE BIDEN WANTS HIM TO REFUSE WHISTLEBLOWER EDWARD SNOWDEN'S BID FOR ASYLUM";

+		query = "how to pay tax on foreign income from real estate";

+		*/

+		if (args!=null && args.length>0)

+			query = args[0];

+		

+		MultiSentenceKernelBasedExtendedForestSearchResultsProcessor proc = new MultiSentenceKernelBasedExtendedForestSearchResultsProcessor();

+		proc.setKernelPath("C:\\stanford-corenlp\\tree_kernel\\");

+		proc.runSearchViaAPI(query);

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
new file mode 100644
index 0000000..df6189d
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
@@ -0,0 +1,203 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.parse_thicket.kernel_interface;

+

+import java.util.ArrayList;

+import java.util.Collections;

+import java.util.List;

+import java.util.logging.Logger;

+

+import edu.stanford.nlp.trees.Tree;

+

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.apps.BingQueryRunnerMultipageSearchResults;

+import opennlp.tools.parse_thicket.apps.MultiSentenceSearchResultsProcessor;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph;

+import opennlp.tools.parse_thicket.matching.Matcher;

+import opennlp.tools.similarity.apps.BingQueryRunner;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.HitBaseComparable;

+import opennlp.tools.similarity.apps.WebSearchEngineResultsScraper;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+public class MultiSentenceKernelBasedSearchResultsProcessor  extends MultiSentenceSearchResultsProcessor{

+	private static Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.MultiSentenceKernelBasedSearchResultsProcessor");

+

+	private WebSearchEngineResultsScraper scraper = new WebSearchEngineResultsScraper();

+	protected Matcher matcher = new Matcher();

+	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+	protected BingQueryRunnerMultipageSearchResults bingSearcher = new BingQueryRunnerMultipageSearchResults();

+	private SnippetToParagraph snp = new SnippetToParagraph();

+	private TreeKernelRunner tkRunner = new TreeKernelRunner();

+

+	private String path;

+	public void setKernelPath (String path){

+		this.path=path;

+	}

+	protected static final String modelFileName = "model.txt";

+

+	private static final String trainingFileName = "training.txt";

+

+	protected static final String unknownToBeClassified = "unknown.txt";

+

+	private static final String classifierOutput = "classifier_output.txt";

+

+

+	public List<HitBase> runSearchViaAPI(String query) {

+		List<HitBase> hits = null;

+		try {

+			List<HitBase> resultList = bingSearcher.runSearch(query);

+			// now we apply our own relevance filter

+			//hits = calculateMatchScoreResortHits(resultList, query);

+			

+			hits = resultList;

+			//once we applied our re-ranking, we set highly ranked as positive set, low-rated as negative set

+			//and classify all these search results again

+			//training set is formed from original documents for the search results, 

+			// and snippets of these search results are classified

+			hits = filterOutIrrelevantHitsByTreeKernelLearning(hits, query);

+

+		} catch (Exception e) {

+			e.printStackTrace();

+			LOG.info("No search results for query '" + query);

+			return null;

+		}

+

+

+		return hits;

+	}

+

+	private List<HitBase> filterOutIrrelevantHitsByTreeKernelLearning(

+			List<HitBase> hits, String query) {

+		List<HitBase> newHitList = new ArrayList<HitBase>(), newHitListReRanked = new ArrayList<HitBase>();

+		// form the training set from original documets. Since search results are ranked, we set the first half as positive set,

+		//and the second half as negative set.

+		// after re-classification, being re-ranked, the search results might end up in a different set

+		List<String[]> treeBankBuffer = new ArrayList<String[]>();

+		int count = 0;

+		for (HitBase hit : hits) {

+			count++;

+			// if orig content has been already set in HIT object, ok; otherwise set it

+			String searchResultText = hit.getPageContent();

+			if (searchResultText ==null){

+				String[] pageSentsAndSnippet = formTextForReRankingFromHit(hit);

+				searchResultText = pageSentsAndSnippet[0];

+				hit.setPageContent(searchResultText);

+			}			

+			newHitList.add(hit);

+			treeBankBuffer.addAll(formTreeKernelStructure(searchResultText, count, hits));

+			

+		}	

+		// write the lits of samples to a file

+		ProfileReaderWriter.writeReport(treeBankBuffer, path+trainingFileName, ' ');

+		// build the model

+		tkRunner.runLearner(path, trainingFileName, modelFileName);

+

+		// now we preparing the same answers to be classifies in/out

+		treeBankBuffer = new ArrayList<String[]>();

+		for (HitBase hit : newHitList) {			

+			// not original docs now but instead a snippet

+			String searchResultTextAbstr = hit.getAbstractText();

+			String snippet = searchResultTextAbstr.replace("<b>...</b>", ". ").replace("<span class='best-phrase'>", " ").replace("<span>", " ").replace("<span>", " ")

+					.replace("<b>", "").replace("</b>", "");

+			snippet = snippet.replace("</B>", "").replace("<B>", "")

+					.replace("<br>", "").replace("</br>", "").replace("...", ". ")

+					.replace("|", " ").replace(">", " ").replace(". .", ". ");

+			snippet =  hit.getTitle() + " " + snippet;

+			

+			ParseThicket pt = matcher.buildParseThicketFromTextWithRST(snippet);

+					//hit.getPageContent());

+			List<Tree> forest = pt.getSentences();

+			// we consider the snippet as a single sentence to be classified

+			if (forest.size()>0){

+				treeBankBuffer.add(new String[] {"0 |BT| "+forest.get(0).toString()+ " |ET|"});

+				newHitListReRanked .add(hit);

+			}

+

+		}	

+		// form a file from the snippets to be classified

+		ProfileReaderWriter.writeReport(treeBankBuffer, path+unknownToBeClassified, ' ');

+		tkRunner.runClassifier(path, unknownToBeClassified, modelFileName, classifierOutput);

+		// read classification results

+		List<String[]> classifResults = ProfileReaderWriter.readProfiles(path+classifierOutput, ' ');

+		// iterate through classification results and set them as scores for hits

+		newHitList = new ArrayList<HitBase>();

+		for(int i=0; i<newHitListReRanked.size() && i<classifResults.size() ; i++){

+			String scoreClassif = classifResults.get(i)[0];

+			float val = Float.parseFloat(scoreClassif);

+			HitBase hit = newHitListReRanked.get(i);

+			hit.setGenerWithQueryScore((double) val);

+			newHitList.add(hit);

+		}

+		

+		// sort by SVM classification results

+		Collections.sort(newHitList, new HitBaseComparable());

+		System.out.println("\n\n ============= NEW ORDER ================= ");

+		for (HitBase hit : newHitList) {

+			System.out.println(hit.getOriginalSentences().toString() + " => "+hit.getGenerWithQueryScore());

+			System.out.println("page content = "+hit.getPageContent());

+			System.out.println("title = "+hit.getAbstractText());

+			System.out.println("snippet = "+hit.getAbstractText());

+			System.out.println("match = "+hit.getSource());

+		}

+		

+		return newHitList;

+

+	}

+

+	protected List<String[]> formTreeKernelStructure(String searchResultText, int count, List<HitBase> hits) {

+		List<String[]> treeBankBuffer = new ArrayList<String[]> ();

+		try {

+			// get the parses from original documents, and form the training dataset

+			ParseThicket pt = matcher.buildParseThicketFromTextWithRST(searchResultText);

+			List<Tree> forest = pt.getSentences();

+			// if from the first half or ranked docs, then positive, otherwise negative

+			String posOrNeg = null;

+			if (count<hits.size()/2)

+				posOrNeg=" 1 ";

+			else 

+				posOrNeg=" -1 ";

+			// form the list of training samples

+			for(Tree t: forest){

+				treeBankBuffer.add(new String[] {posOrNeg+" |BT| "+t.toString()+ " |ET|"});

+			}

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		return treeBankBuffer;

+	}

+

+	public static void main(String[] args){

+		String query = " I see no meaningful distinction between complacency or complicity in the military's latest failure to uphold their own " +

+				"standards of conduct. Nor do I see a distinction between the service member who orchestrated this offense and the chain of " +

+				"command that was either oblivious to or tolerant of criminal behavior";

+		

+		query = "I am now living abroad and have health insurance from Russia. How can I avoid penalty for not having health insurance in US";

+		

+		MultiSentenceKernelBasedSearchResultsProcessor proc = new MultiSentenceKernelBasedSearchResultsProcessor();

+		proc.setKernelPath("C:\\stanford-corenlp\\tree_kernel\\");

+		proc.runSearchViaAPI(query);

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
new file mode 100644
index 0000000..9c1c44a
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
@@ -0,0 +1,80 @@
+package opennlp.tools.parse_thicket.kernel_interface;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import edu.stanford.nlp.trees.Tree;

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.matching.Matcher;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+

+public class PT2ExtendedTreeForestBuilder {

+	private Matcher matcher = new Matcher();	

+	private TreeKernelRunner tkRunner = new TreeKernelRunner();

+	private static final String modelFileName = "model.txt",

+			trainingFileName = "training.txt";

+	

+	private List<String[]> formTrainingSetFromText(String para,  boolean positive){

+		String prefix = null;

+		if (positive)

+			prefix=" 1 ";

+		else

+			prefix=" -1 ";

+			

+		ParseThicket pt = matcher.buildParseThicketFromTextWithRST(para);

+		List<Tree> forest = pt.getSentences();

+		List<String[]> treeBankBuffer = new ArrayList<String[]>();

+		for(Tree t: forest){

+			treeBankBuffer.add(new String[] {prefix+"|BT| "+t.toString()+ " |ET|"});

+		}

+		return treeBankBuffer;

+	}

+	

+	public void formPosNegTrainingSet(String pos, String neg, String path){

+		List<String[]> list = formTrainingSetFromText(pos,  true), 

+				negList= formTrainingSetFromText(neg, false);

+		list.addAll(negList);

+		ProfileReaderWriter.writeReport(list, path+trainingFileName, ' ');

+		tkRunner.runLearner(path, trainingFileName, modelFileName);

+	}

+	

+	public void classifySentences(String sentences, String path){

+		ParseThicket pt = matcher.buildParseThicketFromTextWithRST(sentences);

+		List<Tree> forest = pt.getSentences();

+		List<String[]> treeBankBuffer = new ArrayList<String[]>();

+		for(Tree t: forest){

+			treeBankBuffer.add(new String[] {" 0 |BT| "+t.toString()+ " |ET|"});

+		}

+		

+		ProfileReaderWriter.writeReport(treeBankBuffer, path+"unknown.txt", ' ');

+		tkRunner.runClassifier(path, "unknown.txt", modelFileName, "classifier_output.txt");

+		

+		

+	}

+	

+	

+	public static void main(String[] args){

+		

+		PT2ExtendedTreeForestBuilder builder = new PT2ExtendedTreeForestBuilder();

+		

+			

+		String posSents = "Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+

+				"UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " +

+				"A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " +

+				"Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. ";

+

+		String negSents = "Iran refuses the UN offer to end a conflict over its nuclear weapons."+

+						"UN passes a resolution prohibiting Iran from developing its uranium enrichment site. " +

+						"A recent UN report presented charts saying Iran was working on nuclear weapons. " +

+				"Iran envoy to UN states its nuclear development is for peaceful purpose, and the evidence against its claim is fabricated by the US. ";

+		builder.formPosNegTrainingSet(posSents, negSents, "C:\\stanford-corenlp\\tree_kernel\\");

+		

+		

+		builder.classifySentences("Iran refuses Iraq's offer to end its conflict with UN. Iran passes a resolution prohibiting UN from doing second" +

+				" uranium enrichment site. Envoy to US says its nuclear development is for peaceful purposes. Material evidence againt US has been fabricated by UN.", 

+				

+				"C:\\stanford-corenlp\\tree_kernel\\");

+	}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
new file mode 100644
index 0000000..4cf3b34
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
@@ -0,0 +1,83 @@
+package opennlp.tools.parse_thicket.kernel_interface;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collections;

+import java.util.Comparator;

+import java.util.List;

+import java.util.logging.Logger;

+

+import org.apache.commons.lang.StringUtils;

+

+

+import opennlp.tools.parse_thicket.apps.MinedSentenceProcessor;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph;

+import opennlp.tools.similarity.apps.Fragment;

+import opennlp.tools.similarity.apps.GeneratedSentenceProcessor;

+import opennlp.tools.similarity.apps.HitBase;

+import opennlp.tools.similarity.apps.RelatedSentenceFinder;

+import opennlp.tools.similarity.apps.utils.PageFetcher;

+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

+import opennlp.tools.similarity.apps.utils.Utils;

+import opennlp.tools.textsimilarity.TextProcessor;

+

+

+public class SnippetToParagraphFull extends SnippetToParagraph {

+	private PageFetcher pFetcher = new PageFetcher();

+	private static Logger LOG = Logger

+			.getLogger("com.become.parse_thicket.apps.SnippetToParagraphFull");

+

+	

+

+	public HitBase formTextFromOriginalPageGivenSnippet(HitBase item) {

+

+		String[] sents = extractSentencesFromPage(item.getUrl());

+

+		String title = item.getTitle().replace("<b>", " ").replace("</b>", " ")

+				.replace("  ", " ").replace("  ", " ");

+		// generation results for this sentence

+		List<String> result = new ArrayList<String>();

+		// form plain text from snippet

+		String snapshot = item.getAbstractText().replace("<b>", " ")

+				.replace("</b>", " ").replace("  ", " ").replace("  ", " ").replace("\"", "");

+

+		String snapshotMarked = snapshot.replace(" ...", ".");

+		List<String> fragments = TextProcessor.splitToSentences(snapshotMarked);

+		if (fragments.size()<3 && StringUtils.countMatches(snapshotMarked, ".")>1){

+			snapshotMarked = snapshotMarked.replace("..", "&").replace(".", "&");

+			String[] fragmSents = snapshotMarked.split("&");

+			fragments = Arrays.asList(fragmSents);

+		}

+

+		for (String f : fragments) {

+			String followSent = null;

+			if (f.length() < 50)

+				continue;

+			String pageSentence = "";

+			// try to find original sentence from webpage

+

+			try {

+				String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(

+						f, sents);

+				pageSentence = mainAndFollowSent[0];

+				followSent = mainAndFollowSent[1];

+				if (pageSentence!=null)

+					result.add(pageSentence);

+				else {

+					result.add(f);

+					LOG.info("Could not find the original sentence \n"+f +"\n in the page " );

+				}

+				//if (followSent !=null)

+				//	result.add(followSent);

+			} catch (Exception e) {

+

+				e.printStackTrace();

+			}

+		}

+		item.setOriginalSentences(result);

+		return item;

+	}

+

+	

+}

+

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
new file mode 100644
index 0000000..47e474f
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
@@ -0,0 +1,292 @@
+package opennlp.tools.parse_thicket.kernel_interface;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.jsmlearning.ProfileReaderWriter;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;

+import opennlp.tools.parse_thicket.matching.Matcher;

+import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;

+import edu.stanford.nlp.trees.Tree;

+

+public class TreeExtenderByAnotherLinkedTree extends  PT2ThicketPhraseBuilder {

+

+	public List<String> buildForestForCorefArcs(ParseThicket pt){

+		List<String> results = new ArrayList<String>();

+		for(WordWordInterSentenceRelationArc arc: pt.getArcs()){

+			if (!arc.getArcType().getType().startsWith("coref"))

+				continue;

+			int fromSent = arc.getCodeFrom().getFirst();

+			int toSent = arc.getCodeTo().getFirst();

+			String wordFrom = arc.getLemmaFrom();

+			String wordTo = arc.getLemmaTo();

+

+			List<Tree> trees = getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent-1), pt.getSentences().get(fromSent-1), new String[]{ wordFrom});

+			if (trees==null || trees.size()<1)

+				continue;

+			System.out.println(trees);

+			StringBuilder sb = new StringBuilder(10000);	

+			toStringBuilderExtenderByAnotherLinkedTree1(sb, pt.getSentences().get(toSent-1), trees.get(0), new String[]{wordTo});

+			System.out.println(sb.toString());

+			results.add(sb.toString());

+		}

+		/*

+		List<String[]> treeBankBuffer = new ArrayList<String[]>();

+		for(String t: results){

+			treeBankBuffer.add(new String[] {" 0 |BT|"+t.toString()+ "|ET|"});

+		}

+		ProfileReaderWriter.writeReport(treeBankBuffer, "C:\\stanford-corenlp\\tree_kernel\\unknownForest.txt", ' ');

+		*/

+		return results;

+	}

+

+	public StringBuilder toStringBuilderExtenderByAnotherLinkedTree1(StringBuilder sb, Tree t, Tree treeToInsert, String[] corefWords) {

+		if (t.isLeaf()) {

+			if (t.label() != null) {

+				sb.append(t.label().value());

+			}

+			return sb;

+		} else {

+			sb.append('(');

+			if (t.label() != null) {

+				if (t.value() != null) {

+					sb.append(t.label().value());

+				}

+			}

+			boolean bInsertNow=false;

+			Tree[] kids = t.children();

+			if (kids != null) {

+				for (Tree kid : kids) {

+					if (corefWords!=null){

+						String word = corefWords[corefWords.length-1];

+						String phraseStr = kid.toString();

+						phraseStr=phraseStr.replace(")", "");

+						if (phraseStr.endsWith(word)){

+							bInsertNow=true;

+						}

+					}

+				}

+				if (bInsertNow){ 

+					for (Tree kid : kids) {

+						sb.append(' ');

+						toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, null, null);

+					}

+					sb.append(' ');

+					toStringBuilderExtenderByAnotherLinkedTree1(sb, treeToInsert, null, null);

+					int z=0; z++;

+

+				} else {

+					for (Tree kid : kids) {

+						sb.append(' ');

+						toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, treeToInsert, corefWords);

+					}

+

+				}

+			}

+

+			return sb.append(')');

+		}

+	}

+

+	public List<Tree> getASubtreeWithRootAsNodeForWord1(Tree tree, Tree currentSubTree, String[] corefWords){

+		if (currentSubTree.isLeaf()){

+			return null;

+		}

+		List<Tree> result = null;

+		Tree[] kids = currentSubTree.children();

+		if (kids != null) {

+			boolean bInsert=false;

+			String word = corefWords[corefWords.length-1];

+

+			for (Tree kid : kids) {

+				if (bInsert){

+					result.add(kid);

+				} else {

+

+					String phraseStr = kid.toString();

+					phraseStr=phraseStr.replace(")", "");

+					if (phraseStr.endsWith(word)){

+						bInsert=true;

+						result = new ArrayList<Tree>();

+					}

+				}

+			}

+			if (bInsert){

+				return result;

+			}

+

+			// if not a selected node, proceed with iteration

+			for (Tree kid : kids) {

+				List<Tree> ts = getASubtreeWithRootAsNodeForWord1(tree, kid, corefWords);

+				if (ts!=null)

+					return ts;

+			}

+

+		}

+		return null;

+	}

+

+

+	public Tree[] getASubtreeWithRootAsNodeForWord(Tree tree, Tree currentSubTree, String[] corefWords){

+		if (currentSubTree.isLeaf()){

+			return null;

+		}

+

+

+		boolean bInsertNow=false;

+		/*List<ParseTreeNode> bigTreeNodes = parsePhrase(currentSubTree.label().value());	

+		for(ParseTreeNode smallNode: bigTreeNodes ){

+			if (bigTreeNodes.get(0).getWord().equals("") )

+				continue;

+			String word = bigTreeNodes.get(0).getWord();

+			for(String cWord: corefWords){

+

+				if (word.equalsIgnoreCase(cWord))

+					bInsertNow=true;

+			}

+		} */

+

+		String nodePhraseStr = currentSubTree.toString();

+		System.out.println(nodePhraseStr);

+		for(String w: corefWords)

+			nodePhraseStr = nodePhraseStr.replace(w, "");

+		// all words are covered

+		if (nodePhraseStr.toUpperCase().equals(nodePhraseStr))

+			bInsertNow=true;

+

+		//if(bInsertNow)

+		//	return currentSubTree;

+

+		Tree[] kids = currentSubTree.children();

+		if (kids != null) {

+			/*for (Tree kid : kids) {

+				List<ParseTreeNode> bigTreeNodes = parsePhrase(kid.label().value());	

+				if (bigTreeNodes!=null && bigTreeNodes.size()>0 && bigTreeNodes.get(0)!=null &&

+						bigTreeNodes.get(0).getWord().equalsIgnoreCase(corefWords[0])){

+					bInsertNow=true;

+					return kids;

+				}

+

+			}*/

+

+

+			for (Tree kid : kids) {

+				Tree[] t = getASubtreeWithRootAsNodeForWord(tree, kid, corefWords);

+				if (t!=null)

+					return t;

+			}

+

+		}

+		return null;

+	}

+

+

+	public StringBuilder toStringBuilderExtenderByAnotherLinkedTree(StringBuilder sb, Tree t, Tree treeToInsert) {

+		if (t.isLeaf()) {

+			if (t.label() != null) {

+				sb.append(t.label().value());

+			}

+			return sb;

+		} else {

+			sb.append('(');

+			if (t.label() != null) {

+				if (t.value() != null) {

+					sb.append(t.label().value());

+				}

+			}

+

+			boolean bInsertNow=false;

+			// we try match trees to find out if we are at the insertion position

+			if (treeToInsert!=null){

+				List<ParseTreeNode> bigTreeNodes = parsePhrase(t.label().value());	

+				List<ParseTreeNode> smallTreeNodes = parsePhrase(treeToInsert.getChild(0).getChild(0).getChild(0).label().value());	

+

+				System.out.println(t + " \n "+ treeToInsert+ "\n");

+

+				if (smallTreeNodes.size()>0 && bigTreeNodes.size()>0)

+					for(ParseTreeNode smallNode: smallTreeNodes ){

+						if (!bigTreeNodes.get(0).getWord().equals("") 

+								&& bigTreeNodes.get(0).getWord().equalsIgnoreCase(smallNode.getWord()))

+							bInsertNow=true;

+					}

+			}

+

+			if (bInsertNow){ 

+				Tree[] kids = t.children();

+				if (kids != null) {

+					for (Tree kid : kids) {

+						sb.append(' ');

+						toStringBuilderExtenderByAnotherLinkedTree(sb, kid, null);

+					}

+					sb.append(' ');

+					toStringBuilderExtenderByAnotherLinkedTree(sb, treeToInsert.getChild(0).getChild(1), null);

+					int z=0; z++;

+				}

+			} else {

+				Tree[] kids = t.children();

+				if (kids != null) {

+					for (Tree kid : kids) {

+						sb.append(' ');

+						toStringBuilderExtenderByAnotherLinkedTree(sb, kid, treeToInsert);

+					}

+

+				}

+			}

+			return sb.append(')');

+		}

+	}

+

+	private StringBuilder toStringBuilder(StringBuilder sb, Tree t) {

+		if (t.isLeaf()) {

+			if (t.label() != null) {

+				sb.append(t.label().value());

+			}

+			return sb;

+		} else {

+			sb.append('(');

+			if (t.label() != null) {

+				if (t.value() != null) {

+					sb.append(t.label().value());

+				}

+			}

+			Tree[] kids = t.children();

+			if (kids != null) {

+				for (Tree kid : kids) {

+					sb.append(' ');

+					toStringBuilder(sb, kid);

+				}

+			}

+			return sb.append(')');

+		}

+	}

+

+	public static void main(String[] args){

+		Matcher matcher = new Matcher();

+		TreeExtenderByAnotherLinkedTree extender = new TreeExtenderByAnotherLinkedTree();

+		

+		ParseThicket pt = matcher.buildParseThicketFromTextWithRST(//"I went to the forest to look for a tree. I found out that it was thick and green");

+				"Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+

+				"UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " +

+				"A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " +

+				"Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. ");

+

+		List<String> results = extender.buildForestForCorefArcs(pt);

+		System.out.println(results);

+		System.exit(0);

+

+		List<Tree> forest = pt.getSentences();

+		

+		List<Tree> trees = extender.getASubtreeWithRootAsNodeForWord1(forest.get(1), forest.get(1), new String[]{"it"});

+		System.out.println(trees);

+		StringBuilder sb = new StringBuilder(10000);	

+		extender.toStringBuilderExtenderByAnotherLinkedTree1(sb, forest.get(0), trees.get(0), new String[]{"the", "forest"});

+		System.out.println(sb.toString());

+

+

+		//

+		//extender.toStringBuilderExtenderByAnotherLinkedTree(sb, forest.get(0), forest.get(1));

+		//System.out.println(sb.toString());

+	}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
new file mode 100644
index 0000000..f00904f
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
@@ -0,0 +1,115 @@
+package opennlp.tools.parse_thicket.kernel_interface;

+

+import java.io.BufferedReader;

+import java.io.File;

+import java.io.IOException;

+import java.io.InputStream;

+import java.io.InputStreamReader;

+

+public class TreeKernelRunner {

+	public void runEXE(String[] command, String runPath){

+		Runtime r = Runtime.getRuntime();

+		Process mStartProcess = null;

+		try {

+			mStartProcess = r.exec( command, null, new File(runPath));

+		} catch (IOException e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+

+		StreamLogger outputGobbler = new StreamLogger(mStartProcess.getInputStream());

+		outputGobbler.start();

+

+		try {

+			int returnCode = mStartProcess.waitFor();

+		} catch (InterruptedException e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+	}

+

+	public void runLearner(String dir, String learning_file, String  model_file)

+	{

+		dir = dir.replace('/', '\\');

+		

+		if (!dir.endsWith("\\"))

+				dir+="\\";

+		String[] runString = new String[]{dir+"svm_learn.exe","-t", "5", dir+learning_file,  dir+model_file};

+		runEXE(runString, dir);

+	}

+	

+	

+	//svm_classify example_file model_file predictions_file

+	public void runClassifier(String dir, String example_file, String  model_file, String predictions_file)

+	{

+		dir = dir.replace('/', '\\');

+		

+		if (!dir.endsWith("\\"))

+				dir+="\\";

+		String[] runString = new String[]{dir+"svm_classify.exe", dir+example_file,  dir+model_file, dir+predictions_file};

+		runEXE(runString, dir);

+	}

+

+	class StreamLogger extends Thread{

+

+		private InputStream mInputStream;

+

+		public StreamLogger(InputStream is) {

+			this.mInputStream = is;

+		}

+

+		public void run() {

+			try {

+				InputStreamReader isr = new InputStreamReader(mInputStream);

+				BufferedReader br = new BufferedReader(isr);

+				String line = null;

+				while ((line = br.readLine()) != null) {

+					System.out.println(line);

+				}

+			} catch (IOException ioe) {

+				ioe.printStackTrace();

+			}

+		}

+

+	}

+	

+	public static void main(String[] args){

+		TreeKernelRunner runner = new TreeKernelRunner();

+		runner.runLearner("C:\\stanford-corenlp\\tree_kernel\\", "training.txt", "arg0.model1.txt");

+		runner.runClassifier("C:\\stanford-corenlp\\tree_kernel\\", "arg0.test", "arg0.model1.txt", "arg0.output1.txt");

+	}

+}

+

+	/*

+exec:

+

+public Process exec(String command, String envp[], File dir) 

+

+

+

+   @param      command   a specified system command.

+   @param      envp      array of strings, each element of which 

+                         has environment variable settings in format

+                         <i>name</i>=<i>value</i>.

+   @param      dir       the working directory of the subprocess, or

+                         <tt>null</tt> if the subprocess should inherit

+                         the working directory of the current process.

+

+                         В ди�трибутиве два exe-файла: svm_learn.exe и svm_classify.exe.

+

+1.   svm_learn.exe берет файл � примерами, обрабатывает его, �троит файл model м правилами обучение.

+

+Примеры запу�ка: 

+svm_learn -t 5 learning_file model_file - �то �амый про�той вариант запу�ка, SubSetTreeKernel (допу�кают�� разрывы при обходе деревьев)

+

+svm_learn -t 5 -D 0 learning_file model_file - другой вариант �дра, SubTreeKernel

+

+Пример файла лежит на его �траничке. Там же опи�ание параметров.

+

+2. svm_classify.exe берет файл � те�товыми примерами, файл � моделью, по�троенный svm_learn, и запи�ывает результаты обучени� в файл predictions_file.

+

+Запу�к:     svm_classify example_file model_file predictions_file

+

+Файл имеет тот же формат, что и входные примеры. Образец лежит в архиве на �траничке Мо�китти. 

+Можно �разу же указывать, к какому кла��у отно�ит�� пример (1 или -1 в начале �троки). В �том �лучае точно�ть и полнота оценивают�� автоматиче�ки. Или �тавить там 0.

+	 */
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
new file mode 100644
index 0000000..ef0569a
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
@@ -0,0 +1,148 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.parse_thicket.matching;

+

+import java.util.ArrayList;

+import java.util.HashSet;

+import java.util.List;

+

+public class GeneralizationListReducer {

+  public List<ParseTreePath> applyFilteringBySubsumption_OLD(

+      List<ParseTreePath> result) {

+    List<ParseTreePath> resultDupl = new ArrayList<ParseTreePath>();

+    resultDupl.addAll(new HashSet<ParseTreePath>(result));

+    result = resultDupl;

+    if (result.size() < 2)

+      return result; // nothing to reduce

+    List<ParseTreePath> resultReduced = new ArrayList<ParseTreePath>();

+    int size = result.size();

+    for (int i = 0; i < size; i++) {

+      Boolean bSubChunk = false;

+      for (int j = 0; j < size; j++) {

+        if (i == j) {

+          continue;

+        }

+        if (result.get(j).isASubChunk(result.get(i))) {

+          bSubChunk = true;

+        }

+      }

+      if (!bSubChunk)

+        resultReduced.add(result.get(i));

+    }

+

+    if (resultReduced.size() < 1) {

+      System.err.println("Wrong subsumption reduction");

+    }

+

+    if (resultReduced.size() > 1) {

+      int z = 0;

+      z++;

+    }

+    return resultReduced;

+

+  }

+

+  public List<ParseTreePath> applyFilteringBySubsumptionOLD(

+      List<ParseTreePath> result) {

+    List<ParseTreePath> resultDupl = null;

+    if (result.size() < 2)

+      return result; // nothing to reduce

+    List<ParseTreePath> resultReduced = new ArrayList<ParseTreePath>();

+    int size = result.size();

+    resultDupl = new ArrayList<ParseTreePath>(result);

+    for (int s = 0; s < size; s++) {

+      for (int i = 0; i < resultDupl.size(); i++) {

+        Boolean bStop = false;

+        for (int j = 0; j < resultDupl.size(); j++) {

+          if (i == j) {

+            continue;

+          }

+          if (result.get(j).isASubChunk(result.get(i))

+              && !result.get(i).isASubChunk(result.get(j))) {

+            resultDupl.remove(i);

+            bStop = true;

+            break;

+          }

+        }

+        if (bStop) {

+          break;

+        }

+      }

+    }

+    resultReduced = resultDupl;

+    if (resultReduced.size() < 1) {

+      System.err.println("Wrong subsumption reduction");

+    }

+

+    if (resultReduced.size() > 1) {

+      int z = 0;

+      z++;

+    }

+    return resultReduced;

+

+  }

+

+  public List<ParseTreePath> applyFilteringBySubsumption(

+      List<ParseTreePath> result) {

+    List<Integer> resultDuplIndex = new ArrayList<Integer>();

+    List<ParseTreePath> resultReduced = new ArrayList<ParseTreePath>();

+

+    if (result.size() < 2) {

+      return result; // nothing to reduce

+    }

+    // remove empty

+    for (ParseTreePath ch : result) {

+      if (ch.getLemmas().size() > 0) {

+        resultReduced.add(ch);

+      }

+    }

+    result = resultReduced;

+

+    for (int i = 0; i < result.size(); i++) {

+      for (int j = i + 1; j < result.size(); j++) {

+        if (i == j) {

+          continue;

+        }

+        if (result.get(j).isASubChunk(result.get(i))) {

+          resultDuplIndex.add(i);

+        } else if (result.get(i).isASubChunk(result.get(j))) {

+          resultDuplIndex.add(j);

+        }

+      }

+

+    }

+    resultReduced = new ArrayList<ParseTreePath>();

+    for (int i = 0; i < result.size(); i++) {

+      if (!resultDuplIndex.contains(i)) {

+        resultReduced.add(result.get(i));

+      }

+    }

+

+    if (resultReduced.size() < 1) {

+      System.err.println("Wrong subsumption reduction");

+      resultReduced = result;

+    }

+

+    return resultReduced;

+

+  }

+

+  // testing sub-chunk functionality and

+  // elimination more general according to subsumption relation

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaFormManager.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaFormManager.java
new file mode 100644
index 0000000..cb6f3e9
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaFormManager.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.parse_thicket.matching;
+
+import java.util.List;
+
+import opennlp.tools.stemmer.PorterStemmer;
+
+public class LemmaFormManager {
+
+  public String matchLemmas(PorterStemmer ps, String lemma1, String lemma2,
+      String POS) {
+    if (POS == null) {
+      return null;
+    }
+    lemma1 = lemma1.toLowerCase();
+    lemma2 = lemma2.toLowerCase();
+    // numbers have to be exact
+    if (POS.equals("CD")) {
+      if (lemma1.equals(lemma2)) {
+        return lemma1;
+      } else {
+        return null;
+      }
+    }
+
+    // 'must' occurrence of word - if not equal then 'fail'
+    if (lemma1.endsWith("_xyz") || lemma2.endsWith("_xyz")) {
+      lemma1 = lemma1.replace("_xyz", "");
+      lemma2 = lemma2.replace("_xyz", "");
+      if (lemma1.equals(lemma2)) {
+        return lemma1;
+      } else { // trying to check if nouns and different plural/single form
+        if (POS.equals("NN") || POS.equals("NP")) {
+          if ((lemma1.equals(lemma2 + "s") || lemma2.equals(lemma1 + "s"))
+              || lemma1.endsWith(lemma2) || lemma2.endsWith(lemma1)
+              || lemma1.startsWith(lemma2) || lemma2.startsWith(lemma1))
+            return lemma1;
+        }
+        return "fail";
+      }
+    }
+
+    if (lemma1.equals(lemma2)) {
+      return lemma1;
+    }
+
+    if (POS.equals("NN") || POS.equals("NP")) {
+      if ((lemma1.equals(lemma2 + "s") || lemma2.equals(lemma1 + "s"))
+          || lemma1.endsWith(lemma2) || lemma2.endsWith(lemma1)
+          || lemma1.startsWith(lemma2) || lemma2.startsWith(lemma1)) {
+        return lemma1;
+      }
+    }
+    try {
+      if (ps != null) {
+        if (ps.stem(lemma1).toString()
+            .equalsIgnoreCase(ps.stem(lemma2).toString())) {
+          return lemma1;
+        }
+      }
+    } catch (Exception e) {
+      System.err.println("Problem processing " + lemma1 + " " + lemma2);
+      return null;
+    }
+
+    return null;
+  }
+
+  public boolean acceptableLemmaAndPOS(String sim, String lemmaMatch) {
+    if (sim == null) {
+      return false;
+    }
+
+    if (lemmaMatch != null && !lemmaMatch.equals("fail")) {
+      return false;
+    }
+    // even if lemmaMatch==null
+    return true;
+    // if (sim!=null && (lemmaMatch!=null && !lemmaMatch.equals("fail"))){
+
+  }
+
+  // all lemmas ending with # in ch1 and/or ch2 SHOULD occur in chunkToAdd
+  public boolean mustOccurVerifier(ParseTreePath ch1, ParseTreePath ch2,
+      ParseTreePath chunkToAdd) {
+    List<String> lemmasWithMustOccur = ch1.getLemmas();
+    lemmasWithMustOccur.addAll(ch2.getLemmas());
+    List<String> res = chunkToAdd.getLemmas();
+    for (String lem : lemmasWithMustOccur) {
+      if (lem.endsWith("_xyz")) {
+        String pureLem = lem.replace("_xyz", "");
+        if (!res.contains(pureLem)) { // should occur but does not
+          return false;
+        }// failed the test
+      }
+    }
+    return true;
+  }
+
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java
new file mode 100644
index 0000000..0830276
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java
@@ -0,0 +1,142 @@
+package opennlp.tools.parse_thicket.matching;

+

+import java.util.ArrayList;

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+

+

+import opennlp.tools.parse_thicket.IGeneralizer;

+import opennlp.tools.parse_thicket.ParseCorefsBuilder;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+import opennlp.tools.textsimilarity.LemmaPair;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+public class Matcher implements IGeneralizer<List<List<ParseTreeNode>>>{

+	ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic();

+	ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance();

+	PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder();

+	Map<String, ParseThicket> parseThicketHash = new HashMap<String, ParseThicket>();

+	/**	   * The key function of similarity component which takes two portions of text

+	 * and does similarity assessment by finding the set of all maximum common

+	 * subtrees of the set of parse trees for each portion of text

+	 * 

+	 * @param input

+	 *          text 1

+	 * @param input

+	 *          text 2

+	 * @return the matching results structure, which includes the similarity score

+	 */

+	

+	public Matcher(){

+		

+	}

+	

+	public List<List<ParseTreeChunk>> assessRelevance(String para1, String para2) {

+		// first build PTs for each text

+		ParseThicket pt1 = ptBuilder.buildParseThicket(para1);

+		ParseThicket pt2 = ptBuilder.buildParseThicket(para2);

+		// then build phrases and rst arcs

+		List<List<ParseTreeNode>> phrs1 = phraseBuilder.buildPT2ptPhrases(pt1);

+		List<List<ParseTreeNode>> phrs2 = phraseBuilder.buildPT2ptPhrases(pt2);

+		// group phrases by type

+		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(phrs1), 

+				sent2GrpLst = formGroupedPhrasesFromChunksForPara(phrs2);

+

+		

+		List<List<ParseTreeChunk>> res = md

+				.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);

+		return res;

+

+	}

+	

+	public List<List<ParseTreeChunk>> assessRelevanceCache(String para1, String para2) {

+		// first build PTs for each text

+		

+		ParseThicket pt1 = parseThicketHash.get(para1);

+		if (pt1==null){

+			 pt1=	ptBuilder.buildParseThicket(para1);

+			 parseThicketHash.put(para1, pt1);

+		}

+		

+		ParseThicket pt2 = parseThicketHash.get(para2);

+		if (pt2==null){

+			 pt2=	ptBuilder.buildParseThicket(para2);

+			 parseThicketHash.put(para2, pt2);

+		}

+		

+		// then build phrases and rst arcs

+		List<List<ParseTreeNode>> phrs1 = phraseBuilder.buildPT2ptPhrases(pt1);

+		List<List<ParseTreeNode>> phrs2 = phraseBuilder.buildPT2ptPhrases(pt2);

+		// group phrases by type

+		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(phrs1), 

+				sent2GrpLst = formGroupedPhrasesFromChunksForPara(phrs2);

+

+		

+		List<List<ParseTreeChunk>> res = md

+				.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);

+		return res;

+

+	}

+	

+	public List<List<ParseTreeChunk>> generalize(List<List<ParseTreeNode>> phrs1,

+			List<List<ParseTreeNode>> phrs2) {

+		// group phrases by type

+				List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(phrs1), 

+						sent2GrpLst = formGroupedPhrasesFromChunksForPara(phrs2);

+

+				

+				List<List<ParseTreeChunk>> res = md

+						.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst);

+				return res;

+	}

+	private List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(

+			List<List<ParseTreeNode>> phrs) {

+		List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

+		List<ParseTreeChunk> nps = new ArrayList<ParseTreeChunk>(), vps = new ArrayList<ParseTreeChunk>(), 

+				pps = new ArrayList<ParseTreeChunk>();

+		for(List<ParseTreeNode> ps:phrs){

+			ParseTreeChunk ch = convertNodeListIntoChunk(ps);

+			String ptype = ps.get(0).getPhraseType();

+			if (ptype.equals("NP")){

+				nps.add(ch);

+			} else if (ptype.equals("VP")){

+				vps.add(ch);

+			} else if (ptype.equals("PP")){

+				pps.add(ch);

+			}

+		}

+		results.add(nps); results.add(vps); results.add(pps);

+		return results;

+	}

+

+	private ParseTreeChunk convertNodeListIntoChunk(List<ParseTreeNode> ps) {

+		List<String> lemmas = new ArrayList<String>(),  poss = new ArrayList<String>();

+		for(ParseTreeNode n: ps){

+			lemmas.add(n.getWord());

+			poss.add(n.getPos());

+		}

+		ParseTreeChunk ch = new ParseTreeChunk(lemmas, poss, 0, 0);

+		ch.setMainPOS(ps.get(0).getPhraseType());

+		return ch;

+	}

+	

+	// this function is the main entry point into the PT builder if rst arcs are required

+	public ParseThicket buildParseThicketFromTextWithRST(String para){

+		ParseThicket pt = ptBuilder.buildParseThicket(para);

+		phraseBuilder.buildPT2ptPhrases(pt);

+		return pt;	

+	}

+

+

+	@Override

+	public List<List<List<ParseTreeNode>>> generalize(Object o1, Object o2) {

+		// TODO Auto-generated method stub

+		return null;

+	}

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java
new file mode 100644
index 0000000..7612f26
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java
@@ -0,0 +1,421 @@
+package opennlp.tools.parse_thicket.matching;

+

+import java.util.ArrayList;

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;

+import opennlp.tools.parse_thicket.rhetoric_structure.RhetoricStructureArcsBuilder;

+

+import org.jgrapht.Graph;

+import org.jgrapht.graph.DefaultEdge;

+import org.jgrapht.graph.SimpleGraph;

+

+

+import edu.stanford.nlp.trees.Tree;

+

+public class PT2ThicketPhraseBuilder {

+	

+	RhetoricStructureArcsBuilder rstBuilder = new RhetoricStructureArcsBuilder();

+	

+	/*

+	 * Building phrases takes a Parse Thicket and forms phrases for each sentence individually

+	 * Then based on built phrases and obtained arcs, it builds arcs for RST

+	 * Finally, based on all formed arcs, it extends phrases with thicket phrases

+	 */

+

+	public List<List<ParseTreeNode>> buildPT2ptPhrases(ParseThicket pt ) {

+		List<List<ParseTreeNode>> phrasesAllSent = new ArrayList<List<ParseTreeNode>> ();

+		Map<Integer, List<List<ParseTreeNode>>> sentNumPhrases = new HashMap<Integer, List<List<ParseTreeNode>>>();

+		// build regular phrases

+		for(int nSent=0; nSent<pt.getSentences().size(); nSent++){

+			

+			

+			List<ParseTreeNode> sentence = pt.getNodesThicket().get(nSent);

+			Tree ptree = pt.getSentences().get(nSent);

+			//ptree.pennPrint();

+			List<List<ParseTreeNode>> phrases = buildPT2ptPhrasesForASentence(ptree, sentence);

+			System.out.println(phrases);

+			phrasesAllSent.addAll(phrases);

+			sentNumPhrases.put(nSent, phrases);

+

+		}

+		

+		// discover and add RST arcs

+		List<WordWordInterSentenceRelationArc> arcsRST =

+				rstBuilder.buildRSTArcsFromMarkersAndCorefs(pt.getArcs(), sentNumPhrases, pt);

+		

+		List<WordWordInterSentenceRelationArc> arcs = pt.getArcs();

+		arcs.addAll(arcsRST);

+		pt.setArcs(arcs);

+		

+		

+		List<List<ParseTreeNode>> expandedPhrases = expandTowardsThicketPhrases(phrasesAllSent, pt.getArcs(), sentNumPhrases, pt);

+		return expandedPhrases;

+	}

+

+/* Take all phrases, all arcs and merge phrases into Thicket phrases.

+ * Then add the set of generalized (Thicket) phrases to the input set of phrases

+ * phrasesAllSent - list of lists of phrases for each sentence

+ * sentNumPhrase - map , gives for each sentence id, the above list

+ * arcs - arcs formed so far

+ * pt - the built Parse Thicket

+ */

+	private List<List<ParseTreeNode>> expandTowardsThicketPhrases(

+			List<List<ParseTreeNode>> phrasesAllSent,

+			List<WordWordInterSentenceRelationArc> arcs,

+			Map<Integer, List<List<ParseTreeNode>>> sentNumPhrases, 

+			ParseThicket pt ) {

+		List<List<ParseTreeNode>> thicketPhrasesAllSent = new ArrayList<List<ParseTreeNode>>();

+		

+		

+			for(int nSent=0; nSent<pt.getSentences().size(); nSent++){

+				for(int mSent=nSent+1; mSent<pt.getSentences().size(); mSent++){

+					// for given arc, find phrases connected by this arc and add to the list of phrases

+					for(WordWordInterSentenceRelationArc arc: arcs){

+						List<List<ParseTreeNode>> phrasesFrom = sentNumPhrases.get(nSent);

+						List<List<ParseTreeNode>> phrasesTo = sentNumPhrases.get(mSent);

+						int fromIndex = arc.getCodeFrom().getFirst();

+						int toIndex = arc.getCodeTo().getFirst();

+						if (nSent==fromIndex && mSent==toIndex){

+							int sentPosFrom = arc.getCodeFrom().getSecond();

+							int sentPosTo = arc.getCodeTo().getSecond();

+							// for the given arc arc, find phrases which are connected by it

+							List<ParseTreeNode> lFromFound = null, lToFound = null;

+							for(List<ParseTreeNode> lFrom: phrasesFrom){

+								if (lToFound!=null)

+									break;

+								for(ParseTreeNode lFromP: lFrom){

+									if (lFromP.getId()!=null &&  lFromP.getId()==sentPosFrom){

+											lFromFound = lFrom;

+											break;

+										}

+								}

+							}

+							for(List<ParseTreeNode> lTo: phrasesTo){

+								if (lToFound!=null)

+									break;

+								for(ParseTreeNode lToP: lTo)

+									if (lToP.getId()!=null && lToP.getId()==sentPosTo){

+										lToFound = lTo;

+										break;

+									}

+							}

+							// obtain a thicket phrase and add it to the list

+							if (lFromFound!=null && lToFound!=null){

+								

+								if (identicalSubPhrase(lFromFound, lToFound))

+									continue;

+								List<ParseTreeNode> appended = append(lFromFound, lToFound);

+								if (thicketPhrasesAllSent.contains(appended))

+									continue;

+								System.out.println("rel: "+arc);

+								System.out.println("From "+lFromFound);

+								System.out.println("TO "+lToFound);

+								thicketPhrasesAllSent.add(append(lFromFound, lToFound));	

+								//break;

+							}

+						}

+						

+					}

+				}

+			}

+			phrasesAllSent.addAll(thicketPhrasesAllSent);

+			return phrasesAllSent;

+	}

+

+/* check that one phrase is subphrase of another by lemma (ignoring other node properties)

+ * returns true if not found different word

+ */

+	

+	private boolean identicalSubPhrase(List<ParseTreeNode> lFromFound,

+			List<ParseTreeNode> lToFound) {

+		for(int pos=0; pos<lFromFound.size()&& pos<lToFound.size(); pos++){

+			if (!lFromFound.get(pos).getWord().equals(lToFound.get(pos).getWord()))

+				return false;

+		}

+		return true;

+	}

+

+	private List<ParseTreeNode> append(List<ParseTreeNode> lFromFound,

+			List<ParseTreeNode> lToFound) {

+		List<ParseTreeNode> appendList = new ArrayList<ParseTreeNode>();

+		appendList.addAll(lFromFound);

+		appendList.addAll(lToFound);

+		return appendList;

+	}

+

+

+	public List<List<ParseTreeNode>> buildPT2ptPhrasesForASentence(Tree tree, List<ParseTreeNode> sentence ) {

+		List<List<ParseTreeNode>> phrases;

+

+		phrases = new ArrayList<List<ParseTreeNode>>();		

+		navigateR(tree, sentence, phrases);

+

+		return phrases;

+	}

+

+

+	

+

+/*

+ * 

+[[<1>NP'Iran':NNP], [<2>VP'refuses':VBZ, <3>VP'to':TO, <4>VP'accept':VB, <5>VP'the':DT, <6>VP'UN':NNP, 

+<7>VP'proposal':NN, <8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, <12>VP'over':IN, <13>VP'its':PRP$,

+ <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], [<3>VP'to':TO, <4>VP'accept':VB, <5>VP'the':DT,

+  <6>VP'UN':NNP, <7>VP'proposal':NN, <8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, <12>VP'over':IN, 

+  <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], [<4>VP'accept':VB, 

+  <5>VP'the':DT, <6>VP'UN':NNP, <7>VP'proposal':NN, <8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, 

+  <12>VP'over':IN, <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], 

+  [<5>NP'the':DT, <6>NP'UN':NNP, <7>NP'proposal':NN], [<8>VP'to':TO, <9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, 

+  <12>VP'over':IN, <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN, <16>VP'nuclear':JJ, <17>VP'weapons':NNS], 

+  [<9>VP'end':VB, <10>VP'its':PRP$, <11>VP'dispute':NN, <12>VP'over':IN, <13>VP'its':PRP$, <14>VP'work':NN, <15>VP'on':IN,

+   <16>VP'nuclear':JJ, <17>VP'weapons':NNS], [<10>NP'its':PRP$, <11>NP'dispute':NN], [<12>PP'over':IN, <13>PP'its':PRP$, 

+   <14>PP'work':NN, <15>PP'on':IN, <16>PP'nuclear':JJ, <17>PP'weapons':NNS], [<13>NP'its':PRP$, <14>NP'work':NN, 

+   <15>NP'on':IN, <16>NP'nuclear':JJ, <17>NP'weapons':NNS], [<13>NP'its':PRP$, <14>NP'work':NN],

+ [<15>PP'on':IN, <16>PP'nuclear':JJ, <17>PP'weapons':NNS], [<16>NP'nuclear':JJ, <17>NP'weapons':NNS]]

+ *  

+ * 

+ */

+	private void navigateR(Tree t, List<ParseTreeNode> sentence,

+			List<List<ParseTreeNode>> phrases) {

+		if (!t.isPreTerminal()) {

+			if (t.label() != null) {

+				if (t.value() != null) {

+					// if ROOT or S, returns empty

+					List<ParseTreeNode> nodes = parsePhrase(t.label().value(), t.toString());

+					nodes = assignIndexToNodes(nodes, sentence);

+					if (!nodes.isEmpty())

+						phrases.add(nodes);

+					if (nodes.size()>0 && nodes.get(0).getId()==null){

+							System.err.println("Failed alignment:"+nodes);

+					}

+				}

+			}

+			Tree[] kids = t.children();

+			if (kids != null) {

+				for (Tree kid : kids) {

+					navigateR(kid,sentence,  phrases);

+				}

+			}

+			return ;

+		}

+	}

+	

+	

+	/* alignment of phrases extracted from tree against the sentence as a list of lemma-pos */

+	

+	private List<ParseTreeNode> assignIndexToNodes(List<ParseTreeNode> node,

+			List<ParseTreeNode> sentence) {

+		if (sentence==null || sentence.size()<1)

+			return node;

+		

+		List<ParseTreeNode> results = new ArrayList<ParseTreeNode>();

+		

+		for(int i= 0; i<node.size(); i++){

+			String thisLemma = node.get(i).getWord();			

+			String thisPOS = node.get(i).getPos();

+			String nextLemma = null, nextPOS = null;

+			

+			if (i+1<node.size()){

+				nextLemma = node.get(i+1).getWord();

+				nextPOS = node.get(i+1).getPos();

+			}

+			Boolean matchOccurred = false;

+			int j = 0;

+			for(j= 0; j<sentence.size(); j++){

+				if (!(sentence.get(j).getWord().equals(thisLemma) && (sentence.get(j).getPos().equals(thisPOS))))

+					continue;

+				if (i+1<node.size() && j+1 < sentence.size() && nextLemma!=null 

+						&& ! (sentence.get(j+1).getWord().equals(nextLemma)

+					  && sentence.get(j+1).getPos().equals(nextPOS)))

+					continue;

+				matchOccurred = true;

+				break;

+			}

+			

+			ParseTreeNode n = node.get(i);

+			if (matchOccurred){

+				n.setId(sentence.get(j).getId());

+				n.setNe(sentence.get(j).getNe());

+			}

+			results.add(n);

+		}

+		

+		try {

+			if (results!=null && results.size()>1 && results.get(0)!=null && results.get(0).getId()!=null &&

+					results.get(1) !=null && results.get(1).getId()!=null &&  results.get(1).getId()>0){

+				ParseTreeNode p = results.get(0);

+				p.setId(results.get(1).getId()-1);

+				results.set(0, p);

+			}

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+		return results;

+	}

+

+

+	/*

+	 * [[NP'':], ['(NNP':Iran)], [VP'':], ['(VBZ':refuses)], [VP'':], ['(TO':to)], [VP'':], ['(VB':accept)], [NP'':], 

+	 * ['(DT':the)], ['(NNP':UN)], ['(NN':proposal)], [VP'':], ['(TO':to)], [VP'':], ['(VB':end)], [NP'':], 

+	 * ['(PRP$':its)], ['(NN':dispute)], [PP'':], ['(IN':over)], [NP'':], [NP'':],

+	 *  ['(PRP$':its)], ['(NN':work)], [PP'':], ['(IN':on)], [NP'':], ['(JJ':nuclear)], ['(NNS':weapons)], ['(.':.)]]

+	 * 

+	 * [[NP'':], ['(NNP':Iran)],

+ [VP'':], ['(VBZ':refuses)], 

+ [VP'':], ['(TO':to)], 

+ [VP'':], ['(VB':accept)], 

+    [NP'':], ['(DT':the)], ['(NNP':UN)], ['(NN':proposal)], 

+    [VP'':], ['(TO':to)], [VP'':], ['(VB':end)], 

+    [NP'':], ['(PRP$':its)], ['(NN':dispute)], 

+        [PP'':], ['(IN':over)], 

+            [NP'':], [NP'':], ['(PRP$':its)], ['(NN':work)], 

+              [PP'':], ['(IN':on)], 

+                [NP'':], ['(JJ':nuclear)], ['(NNS':weapons)], 

+['(.':.)]]

+	 */

+	private void navigateR1(Tree t, List<ParseTreeNode> sentence, int l,

+			List<List<ParseTreeNode>> phrases) {

+		if (t.isPreTerminal()) {

+			if (t.label() != null) {

+				List<ParseTreeNode> node = parsePhrase(t.toString());	

+				if (!node.isEmpty())

+					phrases.add(node);

+			}

+			return;

+		} else {

+			if (t.label() != null) {

+				if (t.value() != null) {

+					List<ParseTreeNode> node = parsePhrase(t.label().value());		 

+					if (!node.isEmpty())

+						phrases.add(node);

+				}

+			}

+			Tree[] kids = t.children();

+			if (kids != null) {

+				for (Tree kid : kids) {

+					navigateR1(kid,sentence,  l, phrases);

+				}

+			}

+			return ;

+		}

+	}

+

+

+	protected List<ParseTreeNode> parsePhrase(String value) {

+		List<ParseTreeNode> nlist = new ArrayList<ParseTreeNode>(); 

+		if (value==null)

+			return nlist;

+		if (value.equals("ROOT")|| value.equals("S")) 

+			return nlist;

+		

+		String[] pos_value = value.split(" ");

+		ParseTreeNode node = null;

+		if (value.endsWith("P")){

+			node = new ParseTreeNode("", ""); 

+		    node.setPhraseType(value);

+		} else 

+		if (pos_value != null && pos_value.length==2){

+			node = new ParseTreeNode(pos_value[0], pos_value[1]);

+		} else {

+			node = new ParseTreeNode(value, "");

+		}

+			

+		nlist.add(node);

+		return nlist;

+	}

+	

+	private ParseTreeNode parsePhraseNode(String value) {

+		

+		if (value.equals("ROOT")|| value.equals("S")) 

+			return null;

+		

+		String[] pos_value = value.split(" ");

+		ParseTreeNode node = null;

+		if (value.endsWith("P")){

+			node = new ParseTreeNode("", ""); 

+		    node.setPhraseType(value);

+		} else 

+		if (pos_value != null && pos_value.length==2){

+			node = new ParseTreeNode(pos_value[0], pos_value[1]);

+		} else {

+			node = new ParseTreeNode(value, "");

+		}			

+		

+		return node;

+	}

+	

+	public List<ParseTreeNode> parsePhrase(String value, String fullDump) {

+		

+		List<ParseTreeNode> nlist = new ArrayList<ParseTreeNode>(); 

+		if (value.equals("S")|| value.equals("ROOT"))

+				return nlist;

+		

+		String flattened = fullDump.replace("(ROOT","").replace("(NP","").replace("(VP","").replace("(PP","")

+				.replace("(ADVP","").replace("(UCP","").replace("(ADJP","").replace("(SBAR","").

+				replace("(PRT", "").replace("(WHNP","").

+				 replace("))))",")").replace(")))",")").replace("))",")")

+				.replace("   ", " ").replace("  ", " ").replace("(S","")

+				.replace(") (","#").replace(")  (", "#");

+		String[] flattenedArr =  flattened.split("#");

+		for(String term: flattenedArr){

+			term = term.replace('(', ' ').replace(')',' ').trim();

+			if (term!=null && term.split(" ")!=null && term.split(" ").length==2){

+				ParseTreeNode node = new ParseTreeNode(term.split(" ")[1],term.split(" ")[0] );

+				node.setPhraseType(value);

+				nlist.add(node);

+			}

+		}

+		return nlist;

+	}

+	

+/* recursion example */

+	

+	private StringBuilder toStringBuilder(StringBuilder sb, Tree t) {

+		if (t.isLeaf()) {

+			if (t.label() != null) {

+				sb.append(t.label().value());

+			}

+			return sb;

+		} else {

+			sb.append('(');

+			if (t.label() != null) {

+				if (t.value() != null) {

+					sb.append(t.label().value());

+				}

+			}

+			Tree[] kids = t.children();

+			if (kids != null) {

+				for (Tree kid : kids) {

+					sb.append(' ');

+					toStringBuilder(sb, kid);

+				}

+			}

+			return sb.append(')');

+		}

+	}

+	

+	public static void main(String[] args){

+		PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder();

+		String line = "(NP (NNP Iran)) (VP (VBZ refuses) (S (VP (TO to) (VP (VB accept) (S (NP (DT the) " +

+				"(NNP UN) (NN proposal)) (VP (TO to) (VP (VB end) (NP (PRP$ its) (NN dispute))))))))";

+		

+		List<ParseTreeNode> res = phraseBuilder. parsePhrase("NP", line);

+		System.out.println(res);

+		

+

+		line = "(VP (VBP am) (NP (NP (DT a) (NNP US) (NN citizen)) (UCP (VP (VBG living) (ADVP (RB abroad))) (, ,) (CC and) (ADJP (JJ concerned) (PP (IN about) (NP (NP (DT the) (NN health) (NN reform) (NN regulation)) (PP (IN of) (NP (CD 2014)))))))))";

+		res = phraseBuilder. parsePhrase("VP", line);

+		System.out.println(res);

+				

+		line = "(VP (TO to) (VP (VB wait) (SBAR (IN till) (S (NP (PRP I)) (VP (VBP am) (ADJP (JJ sick) (S (VP (TO to) (VP (VB buy) (NP (NN health) (NN insurance)))))))))))";

+		res = phraseBuilder. parsePhrase("VP", line);

+		System.out.println(res);

+	}

+  

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreeChunkListScorer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreeChunkListScorer.java
new file mode 100644
index 0000000..21e7f52
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreeChunkListScorer.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.parse_thicket.matching;
+
+import java.util.List;
+
+public class ParseTreeChunkListScorer {
+  // find the single expression with the highest score
+  public double getParseTreeChunkListScore(
+      List<List<ParseTreePath>> matchResult) {
+    double currScore = 0.0;
+    for (List<ParseTreePath> chunksGivenPhraseType : matchResult)
+      for (ParseTreePath chunk : chunksGivenPhraseType) {
+        Double score = getScore(chunk);
+        // System.out.println(chunk+ " => score >>> "+score);
+        if (score > currScore) {
+          currScore = score;
+        }
+      }
+    return currScore;
+  }
+
+  // get max score per phrase type and then sum up
+  public double getParseTreeChunkListScoreAggregPhraseType(
+      List<List<ParseTreePath>> matchResult) {
+    double currScoreTotal = 0.0;
+    for (List<ParseTreePath> chunksGivenPhraseType : matchResult) {
+      double currScorePT = 0.0;
+      for (ParseTreePath chunk : chunksGivenPhraseType) {
+        Double score = getScore(chunk);
+        // System.out.println(chunk+ " => score >>> "+score);
+        if (score > currScorePT) {
+          currScorePT = score;
+        }
+      }
+      // if substantial for given phrase type
+      if (currScorePT > 0.5) {
+        currScoreTotal += currScorePT;
+      }
+    }
+    return currScoreTotal;
+  }
+
+  // score is meaningful only for chunks which are results of generalization
+
+  public double getScore(ParseTreePath chunk) {
+    double score = 0.0;
+    int i = 0;
+    for (String l : chunk.getLemmas()) {
+      String pos = chunk.getPOSs().get(i);
+      if (l.equals("*")) {
+        if (pos.startsWith("CD")) { // number vs number gives high score
+                                    // although different numbers
+          score += 0.7;
+        } else if (pos.endsWith("_high")) { // if query modification adds 'high'
+          score += 1.0;
+        } else {
+          score += 0.1;
+        }
+      } else {
+
+        if (pos.startsWith("NN") || pos.startsWith("NP")
+            || pos.startsWith("CD") || pos.startsWith("RB")) {
+          score += 1.0;
+        } else if (pos.startsWith("VB") || pos.startsWith("JJ")) {
+          if (l.equals("get")) { // 'common' verbs are not that important
+            score += 0.3;
+          } else {
+            score += 0.5;
+          }
+        } else {
+          score += 0.3;
+        }
+      }
+      i++;
+
+    }
+    return score;
+  }
+
+}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePath.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePath.java
new file mode 100644
index 0000000..d0bf61f
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePath.java
@@ -0,0 +1,422 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.parse_thicket.matching;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.textsimilarity.LemmaPair;

+

+public class ParseTreePath {

+  private String mainPOS;

+

+  private List<String> lemmas;

+

+  private List<String> POSs;

+  //order number of a word in a sentence

+  private List<Integer> wordUniqueCodes;

+

+  private int startPos;

+

+  private int endPos;

+

+  private int size;

+

+  private ParseTreePathMatcher parseTreeMatcher;

+

+  private LemmaFormManager lemmaFormManager;

+

+  private GeneralizationListReducer generalizationListReducer;

+

+  public ParseTreePath() {

+  }

+

+  public ParseTreePath(List<String> lemmas, List<String> POSs, int startPos,

+      int endPos) {

+    this.lemmas = lemmas;

+    this.POSs = POSs;

+    this.startPos = startPos;

+    this.endPos = endPos;

+

+  }

+

+  // constructor which takes lemmas and POS as lists so that phrases can be

+  // conveniently specified.

+  // usage: stand-alone runs

+  public ParseTreePath(String mPOS, String[] lemmas, String[] POSss) {

+    this.mainPOS = mPOS;

+    this.lemmas = new ArrayList<String>();

+    for (String l : lemmas) {

+      this.lemmas.add(l);

+    }

+    if (mPOS.equals("SENTENCE")){

+    	for(int i=0; i<lemmas.length; i++){

+    		wordUniqueCodes.add(this.lemmas.get(i).hashCode());

+    	}

+    }

+    

+    this.POSs = new ArrayList<String>();

+    for (String p : POSss) {

+      this.POSs.add(p);

+    }

+  }

+

+  // constructor which takes lemmas and POS as lists so that phrases can be

+  // conveniently specified.

+  // usage: stand-alone runs

+  public ParseTreePath(String mPOS, List<String> lemmas, List<String> POSss) {

+    this.mainPOS = mPOS;

+    this.lemmas = lemmas;

+    this.POSs = POSss;

+

+  }

+

+  // Before:

+  // [0(S-At home we like to eat great pizza deals), 0(PP-At home), 0(IN-At),

+  // 3(NP-home), 3(NN-home), 8(NP-we),

+  // 8(PRP-we), 11(VP-like to eat great pizza deals), 11(VBP-like), 16(S-to eat

+  // great pizza deals), 16(VP-to eat great

+  // pizza deals),

+  // 16(TO-to), 19(VP-eat great pizza deals), 19(VB-eat), 23(NP-great pizza

+  // deals), 23(JJ-great), 29(NN-pizza),

+  // 35(NNS-deals)]

+

+  // After:

+  // [S [IN-At NP-home NP-we VBP-like ], PP [IN-At NP-home ], IN [IN-At ], NP

+  // [NP-home ], NN [NP-home ], NP [NP-we ],

+  // PRP [NP-we ], VP [VBP-like TO-to VB-eat JJ-great ], VBP [VBP-like ], S

+  // [TO-to VB-eat JJ-great NN-pizza ], VP

+  // [TO-to VB-eat JJ-great NN-pizza ], TO [TO-to ], VP [VB-eat JJ-great

+  // NN-pizza NNS-deals ],

+  // VB [VB-eat ], NP [JJ-great NN-pizza NNS-deals ], JJ [JJ-great ], NN

+  // [NN-pizza ], NNS [NNS-deals ]]

+

+  public List<ParseTreePath> buildChunks(List<LemmaPair> parseResults) {

+    List<ParseTreePath> chunksResults = new ArrayList<ParseTreePath>();

+    for (LemmaPair chunk : parseResults) {

+      String[] lemmasAr = chunk.getLemma().split(" ");

+      List<String> poss = new ArrayList<String>(), lems = new ArrayList<String>();

+      for (String lem : lemmasAr) {

+        lems.add(lem);

+        // now looking for POSs for individual word

+        for (LemmaPair chunkCur : parseResults) {

+          if (chunkCur.getLemma().equals(lem)

+              &&

+              // check that this is a proper word in proper position

+              chunkCur.getEndPos() <= chunk.getEndPos()

+              && chunkCur.getStartPos() >= chunk.getStartPos()) {

+            poss.add(chunkCur.getPOS());

+            break;

+          }

+        }

+      }

+      if (lems.size() != poss.size()) {

+        System.err.println("lems.size()!= poss.size()");

+      }

+      if (lems.size() < 2) { // single word phrase, nothing to match

+        continue;

+      }

+      ParseTreePath ch = new ParseTreePath(lems, poss, chunk.getStartPos(),

+          chunk.getEndPos());

+      ch.setMainPOS(chunk.getPOS());

+      chunksResults.add(ch);

+    }

+    return chunksResults;

+  }

+

+  public List<List<ParseTreePath>> matchTwoSentencesGivenPairLists(

+      List<LemmaPair> sent1Pairs, List<LemmaPair> sent2Pairs) {

+

+    List<ParseTreePath> chunk1List = buildChunks(sent1Pairs);

+    List<ParseTreePath> chunk2List = buildChunks(sent2Pairs);

+

+    List<List<ParseTreePath>> sent1GrpLst = groupChunksAsParses(chunk1List);

+    List<List<ParseTreePath>> sent2GrpLst = groupChunksAsParses(chunk2List);

+

+    System.out.println("=== Grouped chunks 1 " + sent1GrpLst);

+    System.out.println("=== Grouped chunks 2 " + sent2GrpLst);

+

+    return matchTwoSentencesGroupedChunks(sent1GrpLst, sent2GrpLst);

+  }

+

+  // groups noun phrases, verb phrases, propos phrases etc. for separate match

+

+  public List<List<ParseTreePath>> groupChunksAsParses(

+      List<ParseTreePath> parseResults) {

+    List<ParseTreePath> np = new ArrayList<ParseTreePath>(), vp = new ArrayList<ParseTreePath>(), prp = new ArrayList<ParseTreePath>(), sbarp = new ArrayList<ParseTreePath>(), pp = new ArrayList<ParseTreePath>(), adjp = new ArrayList<ParseTreePath>(), whadvp = new ArrayList<ParseTreePath>(), restOfPhrasesTypes = new ArrayList<ParseTreePath>();

+    List<List<ParseTreePath>> results = new ArrayList<List<ParseTreePath>>();

+    for (ParseTreePath ch : parseResults) {

+      String mainPos = ch.getMainPOS().toLowerCase();

+

+      if (mainPos.equals("s")) {

+        continue;

+      }

+      if (mainPos.equals("np")) {

+        np.add(ch);

+      } else if (mainPos.equals("vp")) {

+        vp.add(ch);

+      } else if (mainPos.equals("prp")) {

+        prp.add(ch);

+      } else if (mainPos.equals("pp")) {

+        pp.add(ch);

+      } else if (mainPos.equals("adjp")) {

+        adjp.add(ch);

+      } else if (mainPos.equals("whadvp")) {

+        whadvp.add(ch);

+      } else if (mainPos.equals("sbar")) {

+        sbarp.add(ch);

+      } else {

+        restOfPhrasesTypes.add(ch);

+      }

+

+    }

+    results.add(np);

+    results.add(vp);

+    results.add(prp);

+    results.add(pp);

+    results.add(adjp);

+    results.add(whadvp);

+    results.add(restOfPhrasesTypes);

+

+    return results;

+

+  }

+

+  // main function to generalize two expressions grouped by phrase types

+  // returns a list of generalizations for each phrase type with filtered

+  // sub-expressions

+  public List<List<ParseTreePath>> matchTwoSentencesGroupedChunks(

+      List<List<ParseTreePath>> sent1, List<List<ParseTreePath>> sent2) {

+    List<List<ParseTreePath>> results = new ArrayList<List<ParseTreePath>>();

+    // first irerate through component

+    for (int comp = 0; comp < 2 && // just np & vp

+        comp < sent1.size() && comp < sent2.size(); comp++) {

+      List<ParseTreePath> resultComps = new ArrayList<ParseTreePath>();

+      // then iterate through each phrase in each component

+      for (ParseTreePath ch1 : sent1.get(comp)) {

+        for (ParseTreePath ch2 : sent2.get(comp)) { // simpler version

+          ParseTreePath chunkToAdd = parseTreeMatcher

+              .generalizeTwoGroupedPhrasesRandomSelectHighestScoreWithTransforms(

+                  ch1, ch2);

+

+          if (!lemmaFormManager.mustOccurVerifier(ch1, ch2, chunkToAdd)) {

+            continue; // if the words which have to stay do not stay, proceed to

+                      // other elements

+          }

+          Boolean alreadyThere = false;

+          for (ParseTreePath chunk : resultComps) {

+            if (chunk.equalsTo(chunkToAdd)) {

+              alreadyThere = true;

+              break;

+            }

+

+            if (parseTreeMatcher

+                .generalizeTwoGroupedPhrasesRandomSelectHighestScore(chunk,

+                    chunkToAdd).equalsTo(chunkToAdd)) {

+              alreadyThere = true;

+              break;

+            }

+          }

+

+          if (!alreadyThere) {

+            resultComps.add(chunkToAdd);

+          }

+

+          List<ParseTreePath> resultCompsReduced = generalizationListReducer

+              .applyFilteringBySubsumption(resultComps);

+          // if (resultCompsReduced.size() != resultComps.size())

+          // System.out.println("reduction of gen list occurred");

+        }

+      }

+      results.add(resultComps);

+    }

+

+    return results;

+  }

+

+  public Boolean equals(ParseTreePath ch) {

+    List<String> lems = ch.getLemmas();

+    List<String> poss = ch.POSs;

+

+    if (this.lemmas.size() <= lems.size())

+      return false; // sub-chunk should be shorter than chunk

+

+    for (int i = 0; i < lems.size() && i < this.lemmas.size(); i++) {

+      if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(

+          poss.get(i))))

+        return false;

+    }

+    return true;

+  }

+

+  // 'this' is super - chunk of ch, ch is sub-chunk of 'this'

+  public Boolean isASubChunk(ParseTreePath ch) {

+    List<String> lems = ch.getLemmas();

+    List<String> poss = ch.POSs;

+

+    if (this.lemmas.size() < lems.size())

+      return false; // sub-chunk should be shorter than chunk

+

+    for (int i = 0; i < lems.size() && i < this.lemmas.size(); i++) {

+      if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(

+          poss.get(i))))

+        return false;

+    }

+    return true;

+  }

+

+  public Boolean equalsTo(ParseTreePath ch) {

+    List<String> lems = ch.getLemmas();

+    List<String> poss = ch.POSs;

+    if (this.lemmas.size() != lems.size() || this.POSs.size() != poss.size())

+      return false;

+

+    for (int i = 0; i < lems.size(); i++) {

+      if (!(this.lemmas.get(i).equals(lems.get(i)) && this.POSs.get(i).equals(

+          poss.get(i))))

+        return false;

+    }

+

+    return true;

+  }

+

+  public String toString() {

+    String buf = " [";

+    if (mainPOS != null)

+      buf = mainPOS + " [";

+    for (int i = 0; i < lemmas.size() && i < POSs.size() // && i<=3

+    ; i++) {

+      buf += POSs.get(i) + "-" + lemmas.get(i) + " ";

+    }

+    return buf + "]";

+  }

+

+  public int compareTo(ParseTreePath o) {

+    if (this.size > o.size)

+      return -1;

+    else

+      return 1;

+

+  }

+

+  public String listToString(List<List<ParseTreePath>> chunks) {

+    StringBuffer buf = new StringBuffer();

+    if (chunks.get(0).size() > 0) {

+      buf.append(" np " + chunks.get(0).toString());

+    }

+    if (chunks.get(1).size() > 0) {

+      buf.append(" vp " + chunks.get(1).toString());

+    }

+    if (chunks.size() < 3) {

+      return buf.toString();

+    }

+    if (chunks.get(2).size() > 0) {

+      buf.append(" prp " + chunks.get(2).toString());

+    }

+    if (chunks.get(3).size() > 0) {

+      buf.append(" pp " + chunks.get(3).toString());

+    }

+    if (chunks.get(4).size() > 0) {

+      buf.append(" adjp " + chunks.get(4).toString());

+    }

+    if (chunks.get(5).size() > 0) {

+      buf.append(" whadvp " + chunks.get(5).toString());

+    }

+    /*

+     * if (mainPos.equals("np")) np.add(ch); else if (mainPos.equals( "vp"))

+     * vp.add(ch); else if (mainPos.equals( "prp")) prp.add(ch); else if

+     * (mainPos.equals( "pp")) pp.add(ch); else if (mainPos.equals( "adjp"))

+     * adjp.add(ch); else if (mainPos.equals( "whadvp")) whadvp.add(ch);

+     */

+    return buf.toString();

+  }

+

+  public List<List<ParseTreePath>> obtainParseTreeChunkListByParsingList(

+      String toParse) {

+    List<List<ParseTreePath>> results = new ArrayList<List<ParseTreePath>>();

+    // if (toParse.endsWith("]]]")){

+    // toParse = toParse.replace("[[","").replace("]]","");

+    // }

+    toParse = toParse.replace(" ]], [ [", "&");

+    String[] phraseTypeFragments = toParse.trim().split("&");

+    for (String toParseFragm : phraseTypeFragments) {

+      toParseFragm = toParseFragm.replace("],  [", "#");

+

+      List<ParseTreePath> resultsPhraseType = new ArrayList<ParseTreePath>();

+      String[] indivChunks = toParseFragm.trim().split("#");

+      for (String expr : indivChunks) {

+        List<String> lems = new ArrayList<String>(), poss = new ArrayList<String>();

+        expr = expr.replace("[", "").replace(" ]", "");

+        String[] pairs = expr.trim().split(" ");

+        for (String word : pairs) {

+          word = word.replace("]]", "").replace("]", "");

+          String[] pos_lem = word.split("-");

+          lems.add(pos_lem[1].trim());

+          poss.add(pos_lem[0].trim());

+        }

+        ParseTreePath ch = new ParseTreePath();

+        ch.setLemmas(lems);

+        ch.setPOSs(poss);

+        resultsPhraseType.add(ch);

+      }

+      results.add(resultsPhraseType);

+    }

+    System.out.println(results);

+    return results;

+

+    // 2.1 | Vietnam <b>embassy</b> <b>in</b> <b>Israel</b>: information on how

+    // to get your <b>visa</b> at Vietnam

+    // <b>embassy</b> <b>in</b> <b>Israel</b>. <b>...</b> <b>Spain</b>.

+    // Scotland. Sweden. Slovakia. Switzerland. T

+    // [Top of Page] <b>...</b>

+    // [[ [NN-* IN-in NP-israel ], [NP-* IN-in NP-israel ], [NP-* IN-* TO-* NN-*

+    // ], [NN-visa IN-* NN-* IN-in ]], [

+    // [VB-get NN-visa IN-* NN-* IN-in .-* ], [VBD-* IN-* NN-* NN-* .-* ], [VB-*

+    // NP-* ]]]

+

+  }

+

+  public void setMainPOS(String mainPOS) {

+    this.mainPOS = mainPOS;

+  }

+

+  public String getMainPOS() {

+    return mainPOS;

+  }

+

+  public List<String> getLemmas() {

+    return lemmas;

+  }

+

+  public void setLemmas(List<String> lemmas) {

+    this.lemmas = lemmas;

+  }

+

+  public List<String> getPOSs() {

+    return POSs;

+  }

+

+  public void setPOSs(List<String> pOSs) {

+    POSs = pOSs;

+  }

+

+  public ParseTreePathMatcher getParseTreeMatcher() {

+    return parseTreeMatcher;

+  }

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathComparable.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathComparable.java
new file mode 100644
index 0000000..539c61e
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathComparable.java
@@ -0,0 +1,32 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.parse_thicket.matching;

+

+import java.util.Comparator;

+

+public class ParseTreePathComparable implements Comparator<ParseTreePath> {

+  public int compare(ParseTreePath ch1, ParseTreePath ch2) {

+    for (int i = 0; i < ch1.getLemmas().size() && i < ch2.getLemmas().size(); i++) {

+      if (!(ch1.getLemmas().get(i).equals(ch2.getLemmas().get(i)) && ch1

+          .getPOSs().get(i).equals(ch2.getPOSs().get(i))))

+        return -1;

+    }

+    return 0;

+

+  }

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcher.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcher.java
new file mode 100644
index 0000000..7323a8e
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcher.java
@@ -0,0 +1,254 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.parse_thicket.matching;

+

+import java.util.ArrayList;

+import java.util.Collections;

+import java.util.List;

+

+import opennlp.tools.textsimilarity.POSManager;

+

+public class ParseTreePathMatcher {

+

+  private static final int NUMBER_OF_ITERATIONS = 2;

+

+  private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+  private POSManager posManager = new POSManager();

+  private LemmaFormManager lemmaFormManager = new LemmaFormManager();

+

+  public ParseTreePathMatcher() {

+

+  }

+

+  public ParseTreePath generalizeTwoGroupedPhrasesOLD(ParseTreePath chunk1,

+      ParseTreePath chunk2) {

+    List<String> pos1 = chunk1.getPOSs();

+    List<String> pos2 = chunk1.getPOSs();

+

+    List<String> commonPOS = new ArrayList<String>(), commonLemmas = new ArrayList<String>();

+    int k1 = 0, k2 = 0;

+    Boolean incrFirst = true;

+    while (k1 < pos1.size() && k2 < pos2.size()) {

+      // first check if the same POS

+      String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));

+      if (sim != null) {

+        commonPOS.add(pos1.get(k1));

+        if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2

+            && chunk1.getLemmas().get(k1).equals(chunk2.getLemmas().get(k2))) {

+          commonLemmas.add(chunk1.getLemmas().get(k1));

+        } else {

+          commonLemmas.add("*");

+        }

+        k1++;

+        k2++;

+      } else if (incrFirst) {

+        k1++;

+      } else {

+        k2++;

+      }

+      incrFirst = !incrFirst;

+    }

+

+    ParseTreePath res = new ParseTreePath(commonLemmas, commonPOS, 0, 0);

+    // if (parseTreeChunkListScorer.getScore(res)> 0.6)

+    // System.out.println(chunk1 + "  + \n"+ chunk2 + " = \n" + res);

+    return res;

+  }

+

+  // A for B => B have A

+  // transforms expr { A B C prep X Y }

+  // into {A B {X Y} C}

+  // should only be applied to a noun phrase

+  public ParseTreePath prepositionalNNSTransform(ParseTreePath ch) {

+    List<String> transfPOS = new ArrayList<String>(), transfLemmas = new ArrayList<String>();

+    if (!ch.getPOSs().contains("IN"))

+      return ch;

+    int indexIN = ch.getPOSs().lastIndexOf("IN");

+

+    if (indexIN < 2)// preposition is a first word - should not be in a noun

+                    // phrase

+      return ch;

+    String Word_IN = ch.getLemmas().get(indexIN);

+    if (!(Word_IN.equals("to") || Word_IN.equals("on") || Word_IN.equals("in")

+        || Word_IN.equals("of") || Word_IN.equals("with")

+        || Word_IN.equals("by") || Word_IN.equals("from")))

+      return ch;

+

+    List<String> toShiftAfterPartPOS = ch.getPOSs().subList(indexIN + 1,

+        ch.getPOSs().size());

+    List<String> toShiftAfterPartLemmas = ch.getLemmas().subList(indexIN + 1,

+        ch.getLemmas().size());

+

+    if (indexIN - 1 > 0)

+      transfPOS.addAll(ch.getPOSs().subList(0, indexIN - 1));

+    transfPOS.addAll(toShiftAfterPartPOS);

+    transfPOS.add(ch.getPOSs().get(indexIN - 1));

+

+    if (indexIN - 1 > 0)

+      transfLemmas.addAll(ch.getLemmas().subList(0, indexIN - 1));

+    transfLemmas.addAll(toShiftAfterPartLemmas);

+    transfLemmas.add(ch.getLemmas().get(indexIN - 1));

+

+    return new ParseTreePath(transfLemmas, transfPOS, 0, 0);

+  }

+

+  public ParseTreePath generalizeTwoGroupedPhrasesRandomSelectHighestScoreWithTransforms(

+      ParseTreePath chunk1, ParseTreePath chunk2) {

+    ParseTreePath chRes1 = generalizeTwoGroupedPhrasesRandomSelectHighestScore(

+        chunk1, chunk2);

+    ParseTreePath chRes2 = generalizeTwoGroupedPhrasesRandomSelectHighestScore(

+        prepositionalNNSTransform(chunk1), chunk2);

+    ParseTreePath chRes3 = generalizeTwoGroupedPhrasesRandomSelectHighestScore(

+        prepositionalNNSTransform(chunk2), chunk1);

+

+    ParseTreePath chRes = null;

+    if (parseTreeChunkListScorer.getScore(chRes1) > parseTreeChunkListScorer

+        .getScore(chRes2))

+      if (parseTreeChunkListScorer.getScore(chRes1) > parseTreeChunkListScorer

+          .getScore(chRes3))

+        chRes = chRes1;

+      else

+        chRes = chRes3;

+    else if (parseTreeChunkListScorer.getScore(chRes2) > parseTreeChunkListScorer

+        .getScore(chRes3))

+      chRes = chRes2;

+    else

+      chRes = chRes3;

+

+    return chRes;

+  }

+

+  public ParseTreePath generalizeTwoGroupedPhrasesRandomSelectHighestScore(

+      ParseTreePath chunk1, ParseTreePath chunk2) {

+    List<String> pos1 = chunk1.getPOSs();

+    List<String> pos2 = chunk2.getPOSs();

+    // Map <ParseTreeChunk, Double> scoredResults = new HashMap <ParseTreeChunk,

+    // Double> ();

+    int timesRepetitiveRun = NUMBER_OF_ITERATIONS;

+

+    Double globalScore = -1.0;

+    ParseTreePath result = null;

+

+    for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {

+      List<String> commonPOS = new ArrayList<String>(), commonLemmas = new ArrayList<String>();

+      int k1 = 0, k2 = 0;

+      Double score = 0.0;

+      while (k1 < pos1.size() && k2 < pos2.size()) {

+        // first check if the same POS

+        String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));

+        String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1

+            .getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);

+        // if (LemmaFormManager.acceptableLemmaAndPOS(sim, lemmaMatch)){

+        if ((sim != null)

+            && (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch

+                .equals("fail")))) {

+          // if (sim!=null){ // && (lemmaMatch!=null &&

+          // !lemmaMatch.equals("fail"))){

+          commonPOS.add(pos1.get(k1));

+          if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2

+              && lemmaMatch != null) {

+            commonLemmas.add(lemmaMatch);

+

+          } else {

+            commonLemmas.add("*");

+

+          }

+          k1++;

+          k2++;

+        } else if (Math.random() > 0.5) {

+          k1++;

+        } else {

+          k2++;

+        }

+

+      }

+      ParseTreePath currResult = new ParseTreePath(commonLemmas, commonPOS,

+          0, 0);

+      score = parseTreeChunkListScorer.getScore(currResult);

+      if (score > globalScore) {

+        // System.out.println(chunk1 + "  + \n"+ chunk2 + " = \n" +

+        // result+" score = "+ score +"\n\n");

+        result = currResult;

+        globalScore = score;

+      }

+    }

+

+    for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {

+      List<String> commonPOS = new ArrayList<String>(), commonLemmas = new ArrayList<String>();

+      int k1 = pos1.size() - 1, k2 = pos2.size() - 1;

+      Double score = 0.0;

+      while (k1 >= 0 && k2 >= 0) {

+        // first check if the same POS

+        String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));

+        String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1

+            .getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);

+        // if (acceptableLemmaAndPOS(sim, lemmaMatch)){

+        if ((sim != null)

+            && (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch

+                .equals("fail")))) {

+          commonPOS.add(pos1.get(k1));

+          if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2

+              && lemmaMatch != null) {

+            commonLemmas.add(lemmaMatch);

+          } else {

+            commonLemmas.add("*");

+

+          }

+          k1--;

+          k2--;

+        } else if (Math.random() > 0.5) {

+          k1--;

+        } else {

+          k2--;

+        }

+

+      }

+      Collections.reverse(commonLemmas);

+      Collections.reverse(commonPOS);

+

+      ParseTreePath currResult = new ParseTreePath(commonLemmas, commonPOS,

+          0, 0);

+      score = parseTreeChunkListScorer.getScore(currResult);

+      if (score > globalScore) {

+        // System.out.println(chunk1 + "  + \n"+ chunk2 + " = \n" +

+        // currResult+" score = "+ score +"\n\n");

+        result = currResult;

+        globalScore = score;

+      }

+    }

+

+    // // System.out.println(chunk1 + "  + \n"+ chunk2 + " = \n" + result

+    // +" score = " +

+    // // parseTreeChunkListScorer.getScore(result)+"\n\n");

+    return result;

+  }

+

+  public Boolean acceptableLemmaAndPOS(String sim, String lemmaMatch) {

+    if (sim == null) {

+      return false;

+    }

+

+    if (lemmaMatch != null && !lemmaMatch.equals("fail")) {

+      return false;

+    }

+    // even if lemmaMatch==null

+    return true;

+    // if (sim!=null && (lemmaMatch!=null && !lemmaMatch.equals("fail"))){

+

+  }

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcherDeterministic.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcherDeterministic.java
new file mode 100644
index 0000000..fc32380
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcherDeterministic.java
@@ -0,0 +1,280 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.parse_thicket.matching;

+

+import java.util.ArrayList;

+import java.util.List;

+

+import opennlp.tools.stemmer.PorterStemmer;

+import opennlp.tools.textsimilarity.POSManager;

+

+

+public class ParseTreePathMatcherDeterministic {

+

+  private GeneralizationListReducer generalizationListReducer = new GeneralizationListReducer();

+

+  private LemmaFormManager lemmaFormManager = new LemmaFormManager();

+

+  private POSManager posManager = new POSManager();

+

+  /**

+   * key matching function which takes two phrases, aligns them and finds a set

+   * of maximum common sub-phrase

+   * 

+   * @param chunk1

+   * @param chunk2

+   * @return

+   */

+

+  public List<ParseTreePath> generalizeTwoGroupedPhrasesDeterministic(

+      ParseTreePath chunk1, ParseTreePath chunk2) {

+    List<String> pos1 = chunk1.getPOSs();

+    List<String> pos2 = chunk2.getPOSs();

+    List<String> lem1 = chunk1.getLemmas();

+    List<String> lem2 = chunk2.getLemmas();

+

+    List<String> lem1stem = new ArrayList<String>();

+    List<String> lem2stem = new ArrayList<String>();

+

+    PorterStemmer ps = new PorterStemmer();

+    for (String word : lem1) {

+      try {

+        lem1stem.add(ps.stem(word.toLowerCase()).toString());

+      } catch (Exception e) {

+        // e.printStackTrace();

+

+        if (word.length() > 2)

+          System.err.println("Unable to stem: " + word);

+      }

+    }

+    try {

+      for (String word : lem2) {

+        lem2stem.add(ps.stem(word.toLowerCase()).toString());

+      }

+    } catch (Exception e) {

+      System.err.println("problem processing word " + lem2.toString());

+    }

+

+    List<String> overlap = new ArrayList(lem1stem);

+    overlap.retainAll(lem2stem);

+

+    if (overlap == null || overlap.size() < 1)

+      return null;

+

+    List<Integer> occur1 = new ArrayList<Integer>(), occur2 = new ArrayList<Integer>();

+    for (String word : overlap) {

+      Integer i1 = lem1stem.indexOf(word);

+      Integer i2 = lem2stem.indexOf(word);

+      occur1.add(i1);

+      occur2.add(i2);

+    }

+

+    // now we search for plausible sublists of overlaps

+    // if at some position correspondence is inverse (one of two position

+    // decreases instead of increases)

+    // then we terminate current alignment accum and start a new one

+    List<List<int[]>> overlapsPlaus = new ArrayList<List<int[]>>();

+    // starts from 1, not 0

+    List<int[]> accum = new ArrayList<int[]>();

+    accum.add(new int[] { occur1.get(0), occur2.get(0) });

+    for (int i = 1; i < occur1.size(); i++) {

+

+      if (occur1.get(i) > occur1.get(i - 1)

+          && occur2.get(i) > occur2.get(i - 1))

+        accum.add(new int[] { occur1.get(i), occur2.get(i) });

+      else {

+        overlapsPlaus.add(accum);

+        accum = new ArrayList<int[]>();

+        accum.add(new int[] { occur1.get(i), occur2.get(i) });

+      }

+    }

+    if (accum.size() > 0) {

+      overlapsPlaus.add(accum);

+    }

+

+    List<ParseTreePath> results = new ArrayList<ParseTreePath>();

+    for (List<int[]> occur : overlapsPlaus) {

+      List<Integer> occr1 = new ArrayList<Integer>(), occr2 = new ArrayList<Integer>();

+      for (int[] column : occur) {

+        occr1.add(column[0]);

+        occr2.add(column[1]);

+      }

+

+      int ov1 = 0, ov2 = 0; // iterators over common words;

+      List<String> commonPOS = new ArrayList<String>(), commonLemmas = new ArrayList<String>();

+      // we start two words before first word

+      int k1 = occr1.get(ov1) - 2, k2 = occr2.get(ov2) - 2;

+      // if (k1<0) k1=0; if (k2<0) k2=0;

+      Boolean bReachedCommonWord = false;

+      while (k1 < 0 || k2 < 0) {

+        k1++;

+        k2++;

+      }

+      int k1max = pos1.size() - 1, k2max = pos2.size() - 1;

+      while (k1 <= k1max && k2 <= k2max) {

+        // first check if the same POS

+        String sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));

+        String lemmaMatch = lemmaFormManager.matchLemmas(ps, lem1.get(k1),

+            lem2.get(k2), sim);

+        if ((sim != null)

+            && (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch

+                .equals("fail")))) {

+          commonPOS.add(pos1.get(k1));

+          if (lemmaMatch != null) {

+            commonLemmas.add(lemmaMatch);

+            // System.out.println("Added "+lemmaMatch);

+            if (k1 == occr1.get(ov1) && k2 == occr2.get(ov2))

+              bReachedCommonWord = true; // now we can have different increment

+                                         // opera

+            else {

+              if (occr1.size() > ov1 + 1 && occr2.size() > ov2 + 1

+                  && k1 == occr1.get(ov1 + 1) && k2 == occr2.get(ov2 + 1)) {

+                ov1++;

+                ov2++;

+                bReachedCommonWord = true;

+              }

+              // else

+              // System.err.println("Next match reached '"+lemmaMatch+

+              // "' | k1 - k2: "+k1 + " "+k2 +

+              // "| occur index ov1-ov2 "+

+              // ov1+" "+ov2+

+              // "| identified positions of match: occr1.get(ov1) - occr2.get(ov1) "

+              // +

+              // occr1.get(ov1) + " "+ occr2.get(ov1));

+            }

+          } else {

+            commonLemmas.add("*");

+          } // the same parts of speech, proceed to the next word in both

+            // expressions

+          k1++;

+          k2++;

+

+        } else if (!bReachedCommonWord) {

+          k1++;

+          k2++;

+        } // still searching

+        else {

+          // different parts of speech, jump to the next identified common word

+          ov1++;

+          ov2++;

+          if (ov1 > occr1.size() - 1 || ov2 > occr2.size() - 1)

+            break;

+          // now trying to find

+          int kk1 = occr1.get(ov1) - 2, // new positions of iterators

+          kk2 = occr2.get(ov2) - 2;

+          int countMove = 0;

+          while ((kk1 < k1 + 1 || kk2 < k2 + 1) && countMove < 2) { // if it is

+                                                                    // behind

+                                                                    // current

+                                                                    // position,

+                                                                    // synchroneously

+                                                                    // move

+                                                                    // towards

+                                                                    // right

+            kk1++;

+            kk2++;

+            countMove++;

+          }

+          k1 = kk1;

+          k2 = kk2;

+

+          if (k1 > k1max)

+            k1 = k1max;

+          if (k2 > k2max)

+            k2 = k2max;

+          bReachedCommonWord = false;

+        }

+      }

+      ParseTreePath currResult = new ParseTreePath(commonLemmas, commonPOS,

+          0, 0);

+      results.add(currResult);

+    }

+

+    return results;

+  }

+

+  /**

+   * main function to generalize two expressions grouped by phrase types returns

+   * a list of generalizations for each phrase type with filtered

+   * sub-expressions

+   * 

+   * @param sent1

+   * @param sent2

+   * @return List<List<ParseTreeChunk>> list of list of POS-words pairs for each

+   *         resultant matched / overlapped phrase

+   */

+  public List<List<ParseTreePath>> matchTwoSentencesGroupedChunksDeterministic(

+      List<List<ParseTreePath>> sent1, List<List<ParseTreePath>> sent2) {

+    List<List<ParseTreePath>> results = new ArrayList<List<ParseTreePath>>();

+    // first iterate through component

+    for (int comp = 0; comp < 2 && // just np & vp

+        comp < sent1.size() && comp < sent2.size(); comp++) {

+      List<ParseTreePath> resultComps = new ArrayList<ParseTreePath>();

+      // then iterate through each phrase in each component

+      for (ParseTreePath ch1 : sent1.get(comp)) {

+        for (ParseTreePath ch2 : sent2.get(comp)) { // simpler version

+          List<ParseTreePath> chunkToAdd = generalizeTwoGroupedPhrasesDeterministic(

+              ch1, ch2);

+

+          if (chunkToAdd == null)

+            chunkToAdd = new ArrayList<ParseTreePath>();

+          // System.out.println("ch1 = "+

+          // ch1.toString()+" | ch2="+ch2.toString()

+          // +"\n result = "+chunkToAdd.toString() + "\n");

+          /*

+           * List<ParseTreeChunk> chunkToAdd1 =

+           * ParseTreeMatcherDeterministic.generalizeTwoGroupedPhrasesDeterministic

+           * ( ParseTreeMatcher.prepositionalNNSTransform(ch1), ch2); if

+           * (chunkToAdd1!=null) chunkToAdd.addAll(chunkToAdd1);

+           * List<ParseTreeChunk> chunkToAdd2 =

+           * ParseTreeMatcherDeterministic.generalizeTwoGroupedPhrasesDeterministic

+           * ( ParseTreeMatcher.prepositionalNNSTransform(ch2), ch1); if

+           * (chunkToAdd2!=null) chunkToAdd.addAll(chunkToAdd2);

+           */

+

+          // For generalized match not with orig sentences but with templates

+          // if (!LemmaFormManager.mustOccurVerifier(ch1, ch2, chunkToAdd))

+          // continue; // if the words which have to stay do not stay, proceed

+          // to other elements

+          Boolean alreadyThere = false;

+          for (ParseTreePath chunk : resultComps) {

+            if (chunkToAdd.contains(chunk)) {

+              alreadyThere = true;

+              break;

+            }

+

+            // }

+          }

+

+          if (!alreadyThere && chunkToAdd != null && chunkToAdd.size() > 0) {

+            resultComps.addAll(chunkToAdd);

+          }

+

+        }

+      }

+      List<ParseTreePath> resultCompsRed = generalizationListReducer

+          .applyFilteringBySubsumption(resultComps);

+

+      resultComps = resultCompsRed;

+      results.add(resultComps);

+    }

+

+    return results;

+  }

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java
new file mode 100644
index 0000000..fb97716
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java
@@ -0,0 +1,121 @@
+package opennlp.tools.parse_thicket.parse_thicket2graph;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collection;

+import java.util.List;

+import java.util.Set;

+

+import opennlp.tools.parse_thicket.ParseCorefsBuilder;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+import opennlp.tools.parse_thicket.matching.Matcher;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+

+import org.jgrapht.Graph;

+import org.jgrapht.alg.BronKerboschCliqueFinder;

+import org.jgrapht.graph.DefaultEdge;

+import org.jgrapht.graph.SimpleGraph;

+

+

+public class EdgeProductBuilder {

+	private Matcher matcher = new Matcher();

+	private ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance();

+	private GraphFromPTreeBuilder graphBuilder = new GraphFromPTreeBuilder();

+	

+	

+	public Graph<ParseGraphNode[], DefaultEdge>  

+		buildEdgeProduct(Graph<ParseGraphNode, DefaultEdge> g1, Graph<ParseGraphNode, DefaultEdge> g2 ){

+			Graph<ParseGraphNode[], DefaultEdge> gp = 

+				new SimpleGraph<ParseGraphNode[], DefaultEdge>(DefaultEdge.class);

+		

+		Set<DefaultEdge> edges1 = g1.edgeSet();

+		Set<DefaultEdge> edges2 = g2.edgeSet();

+		// build nodes of product graph

+		for(DefaultEdge e1:edges1){

+			for(DefaultEdge e2:edges2){

+				ParseGraphNode sourceE1s = g1.getEdgeSource(e1), sourceE1t = g1.getEdgeTarget(e1);

+				ParseGraphNode sourceE2s = g2.getEdgeSource(e2), sourceE2t = g2.getEdgeTarget(e2);

+				

+				if (isNotEmpty(matcher.generalize(sourceE1s.getPtNodes(), sourceE2s.getPtNodes())) && 

+						isNotEmpty(matcher.generalize(sourceE1t.getPtNodes(), sourceE2t.getPtNodes()))

+					)

+					gp.addVertex(new ParseGraphNode[] {sourceE1s, sourceE1t, sourceE2s, sourceE2t } );

+			}

+		}

+		

+		Set<ParseGraphNode[]> productVerticesSet = gp.vertexSet();

+		List<ParseGraphNode[]> productVerticesList = new ArrayList<ParseGraphNode[]>(productVerticesSet);

+		for(int i=0; i<productVerticesList.size(); i++){

+			for(int j=i+1; j<productVerticesList.size(); j++){

+				ParseGraphNode[] prodVertexI = productVerticesList.get(i);

+				ParseGraphNode[] prodVertexJ = productVerticesList.get(j);

+				if (bothAjacentOrNeitherAdjacent(prodVertexI, prodVertexJ)){

+					gp.addEdge(prodVertexI, prodVertexJ);

+				}

+			}

+		}

+		

+		

+		return gp;

+		

+	}

+	/*

+	 * Finding the maximal clique is the slowest part

+	 */

+	

+	public Collection<Set<ParseGraphNode[]>> getMaximalCommonSubgraphs(Graph<ParseGraphNode[], DefaultEdge>  g){

+		BronKerboschCliqueFinder<ParseGraphNode[], DefaultEdge> finder =

+	            new BronKerboschCliqueFinder<ParseGraphNode[], DefaultEdge>(g);

+

+	        Collection<Set<ParseGraphNode[]>> cliques = finder.getBiggestMaximalCliques();

+	        return cliques;

+	}

+

+

+	private boolean bothAjacentOrNeitherAdjacent(ParseGraphNode[] prodVertexI,

+			ParseGraphNode[] prodVertexJ) {

+		List<ParseGraphNode> prodVertexIlist = 

+				new ArrayList<ParseGraphNode>(Arrays.asList(prodVertexI));

+		List<ParseGraphNode> prodVertexJlist = 

+				new ArrayList<ParseGraphNode>(Arrays.asList(prodVertexJ));

+		prodVertexIlist.retainAll(prodVertexJlist);

+		return (prodVertexIlist.size()==2 || prodVertexIlist.size()==4);

+	}

+

+

+	private boolean isNotEmpty(List<List<ParseTreeChunk>> generalize) {

+		if (generalize!=null && generalize.get(0)!=null && generalize.get(0).size()>0)

+			return true;

+		else

+			return false;

+	}

+	

+	public Collection<Set<ParseGraphNode[]>>  assessRelevanceViaMaximalCommonSubgraphs(String para1, String para2) {

+		// first build PTs for each text

+		ParseThicket pt1 = ptBuilder.buildParseThicket(para1);

+		ParseThicket pt2 = ptBuilder.buildParseThicket(para2);

+		// then build phrases and rst arcs

+		Graph<ParseGraphNode, DefaultEdge> g1 = graphBuilder.buildGraphFromPT(pt1);

+		Graph<ParseGraphNode, DefaultEdge> g2 = graphBuilder.buildGraphFromPT(pt2);

+		

+		Graph<ParseGraphNode[], DefaultEdge> gp =  buildEdgeProduct(g1, g2);

+		Collection<Set<ParseGraphNode[]>> col = getMaximalCommonSubgraphs(gp);

+		return col;

+		}

+	

+	public static void main(String[] args){

+		 EdgeProductBuilder b = new  EdgeProductBuilder();

+		 Collection<Set<ParseGraphNode[]>> col = b.assessRelevanceViaMaximalCommonSubgraphs("Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+

+				"UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " +

+				"A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " +

+				"Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. "

+

+				, "Iran refuses the UN offer to end a conflict over its nuclear weapons."+

+						"UN passes a resolution prohibiting Iran from developing its uranium enrichment site. " +

+						"A recent UN report presented charts saying Iran was working on nuclear weapons. " +

+				"Iran envoy to UN states its nuclear development is for peaceful purpose, and the evidence against its claim is fabricated by the US. ");

+		System.out.print(col);

+	}

+}

+				

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java
new file mode 100644
index 0000000..bad6403
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java
@@ -0,0 +1,131 @@
+package opennlp.tools.parse_thicket.parse_thicket2graph;

+

+import java.io.PrintWriter;

+import java.util.List;

+

+import opennlp.tools.parse_thicket.PTTree;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+import org.jgrapht.Graph;

+import org.jgrapht.graph.DefaultDirectedWeightedGraph;

+import org.jgrapht.graph.DefaultEdge;

+import org.jgrapht.graph.SimpleGraph;

+

+

+import edu.stanford.nlp.trees.LabeledScoredTreeNode;

+import edu.stanford.nlp.trees.Tree;

+

+public class GraphFromPTreeBuilder {

+	

+	

+	public Graph<ParseGraphNode, DefaultEdge> buildGraphFromPT(ParseThicket pt){

+		PrintWriter out = new PrintWriter(System.out);

+

+		

+		List<Tree> ts = pt.getSentences();

+		ts.get(0).pennPrint(out);

+		Graph<ParseGraphNode, DefaultEdge> gfragment = buildGGraphFromTree(ts.get(0));

+		

+		//ParseTreeVisualizer applet = new ParseTreeVisualizer();

+		//applet.showGraph(gfragment);

+		

+		return gfragment;

+		

+	}

+	

+	

+	private Graph<ParseGraphNode, DefaultEdge> buildGGraphFromTree(Tree tree) {

+		Graph<ParseGraphNode, DefaultEdge> g =

+				new SimpleGraph<ParseGraphNode, DefaultEdge>(DefaultEdge.class);

+		ParseGraphNode root = new ParseGraphNode(tree,"S 0");

+		g.addVertex(root);

+		navigate(tree, g, 0, root);

+	        

+		return g;

+	}

+

+

+

+	private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) {

+		//String currParent = tree.label().value()+" $"+Integer.toString(l);

+		//g.addVertex(currParent);

+		if (tree.getChildrenAsList().size()==1)

+			navigate(tree.getChildrenAsList().get(0), g, l+1, currParent);

+		else

+			if (tree.getChildrenAsList().size()==0)

+				return;

+		

+		for(Tree child: tree.getChildrenAsList()){

+			String currChild = null;

+			ParseGraphNode currChildNode = null;

+			try {

+				if (child.isLeaf()) 

+					continue;

+				if (child.label().value().startsWith("S"))

+					navigate(child.getChildrenAsList().get(0), g, l+1, currParent);

+				

+				if (!child.isPhrasal() || child.isPreTerminal())

+					currChild = child.toString()+" #"+Integer.toString(l);

+				else 

+					currChild = child.label().value()+" #"+Integer.toString(l);

+				currChildNode = new ParseGraphNode(child, currChild);

+				g.addVertex(currChildNode);

+				g.addEdge(currParent, currChildNode);

+			} catch (Exception e) {

+				// TODO Auto-generated catch block

+				e.printStackTrace();

+			}

+			navigate(child, g, l+1, currChildNode);

+		}

+	}

+

+

+	/*

+	private static void navigateChildren(PTTree[] trChildren, int indent, boolean parentLabelNull, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) {

+	    boolean firstSibling = true;

+	    boolean leftSibIsPreTerm = true;  // counts as true at beginning

+	    for (PTTree currentTree : trChildren) {

+	      currentTree.navigate(indent, parentLabelNull, firstSibling, leftSibIsPreTerm, false, onlyLabelValue, phrases);

+	      leftSibIsPreTerm = currentTree.isPreTerminal();

+	      // CC is a special case for English, but leave it in so we can exactly match PTB3 tree formatting

+	      if (currentTree.value() != null && currentTree.value().startsWith("CC")) {

+	        leftSibIsPreTerm = false;

+	      }

+	      firstSibling = false;

+	    }

+	  }

+	

+	

+	  private void navigate(int indent, boolean parentLabelNull, boolean firstSibling, boolean leftSiblingPreTerminal, boolean topLevel, boolean onlyLabelValue, List<LabeledScoredTreeNode> phrases) {

+	    // the condition for staying on the same line in Penn Treebank

+	    boolean suppressIndent = (parentLabelNull || (firstSibling && isPreTerminal()) || (leftSiblingPreTerminal && isPreTerminal() && (label() == null || !label().value().startsWith("CC"))));

+	    if (suppressIndent) {

+	      //pw.print(" ");

+	      // pw.flush();

+	    } else {

+	      if (!topLevel) {

+	        //pw.println();

+	      }

+	      for (int i = 0; i < indent; i++) {

+	        //pw.print("  ");

+	        // pw.flush();

+	      }

+	    }

+	    if (isLeaf() || isPreTerminal()) {

+	      String terminalString = toStringBuilder(new StringBuilder(), onlyLabelValue).toString();

+	      //pw.print(terminalString);

+	      //pw.flush();

+	      return;

+	    }

+	    //pw.print("(");

+	    String nodeString = onlyLabelValue ? value() : nodeString();

+	    //pw.print(nodeString);

+	    // pw.flush();

+	    boolean parentIsNull = label() == null || label().value() == null;

+	    navigateChildren(children(), indent + 1, parentIsNull, true, phrases);

+	    //pw.print(")");

+	    

+	  }

+	  */

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java
new file mode 100644
index 0000000..9620499
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java
@@ -0,0 +1,51 @@
+package opennlp.tools.parse_thicket.parse_thicket2graph;

+

+import java.util.List;

+

+import opennlp.tools.parse_thicket.ParseTreeNode;

+import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;

+

+

+import edu.stanford.nlp.trees.Tree;

+

+public class ParseGraphNode {

+	 PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder();

+	 

+	private Tree tree;

+	private String label;

+	private List<List<ParseTreeNode>> ptNodes;

+	

+	

+	

+	public List<List<ParseTreeNode>> getPtNodes() {

+		return ptNodes;

+	}

+

+	public ParseGraphNode(Tree tree, String label) {

+		super();

+		this.tree = tree;

+		this.label = label;

+		ptNodes =  phraseBuilder.buildPT2ptPhrasesForASentence(tree, null);

+	}

+

+	public Tree getTree() {

+		return tree;

+	}

+

+	public void setTree(Tree tree) {

+		this.tree = tree;

+	}

+

+	public String getLabel() {

+		return label;

+	}

+

+	public void setLabel(String label) {

+		this.label = label;

+	}

+

+	public String toString(){

+		return label;

+	}

+}

+	

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java
new file mode 100644
index 0000000..d34d974
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java
@@ -0,0 +1,194 @@
+/* ==========================================

+ * JGraphT : a free Java graph-theory library

+ * ==========================================

+ *

+ * Project Info:  http://jgrapht.sourceforge.net/

+ * Project Creator:  Barak Naveh (http://sourceforge.net/users/barak_naveh)

+ *

+ * (C) Copyright 2003-2008, by Barak Naveh and Contributors.

+ *

+ * This library is free software; you can redistribute it and/or modify it

+ * under the terms of the GNU Lesser General Public License as published by

+ * the Free Software Foundation; either version 2.1 of the License, or

+ * (at your option) any later version.

+ *

+ * This library is distributed in the hope that it will be useful, but

+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public

+ * License for more details.

+ *

+ * You should have received a copy of the GNU Lesser General Public License

+ * along with this library; if not, write to the Free Software Foundation,

+ * Inc.,

+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

+ */

+/* ----------------------

+ * JGraphAdapterDemo.java

+ * ----------------------

+ * (C) Copyright 2003-2008, by Barak Naveh and Contributors.

+ *

+ * Original Author:  Barak Naveh

+ * Contributor(s):   -

+ *

+ * $Id: JGraphAdapterDemo.java 725 2010-11-26 01:24:28Z perfecthash $

+ *

+ * Changes

+ * -------

+ * 03-Aug-2003 : Initial revision (BN);

+ * 07-Nov-2003 : Adaptation to JGraph 3.0 (BN);

+ *

+ */

+package opennlp.tools.parse_thicket.parse_thicket2graph;

+

+import java.awt.*;

+import java.awt.geom.*;

+import java.util.HashMap;

+import java.util.Map;

+import java.util.Set;

+

+import javax.swing.*;

+

+

+import org.jgraph.*;

+import org.jgraph.graph.*;

+

+import org.jgrapht.*;

+import org.jgrapht.ext.*;

+import org.jgrapht.graph.*;

+

+

+import org.jgrapht.graph.DefaultEdge;

+

+public class ParseTreeVisualizer

+extends JApplet

+{

+	//~ Static fields/initializers ---------------------------------------------

+

+	private static final long serialVersionUID = 3256346823498765434L;

+	private static final Color DEFAULT_BG_COLOR = Color.decode("#FAFBFF");

+	private static final Dimension DEFAULT_SIZE = new Dimension(1200, 800);

+

+	//~ Instance fields --------------------------------------------------------

+

+	//

+	private JGraphModelAdapter<String, DefaultEdge> jgAdapter;

+

+	public void  showGraph(Graph g){

+		ParseTreeVisualizer applet = new ParseTreeVisualizer();

+		applet.importGraph(g);

+

+		JFrame frame = new JFrame();

+		frame.getContentPane().add(applet);

+		frame.setTitle("Showing parse thicket");

+		frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

+		frame.pack();

+		frame.setVisible(true);

+	}

+

+	// TODO cast to ParseGraphNode

+	private void importGraph(Graph g) {

+		// create a visualization using JGraph, via an adapter

+		jgAdapter = new JGraphModelAdapter<String, DefaultEdge>(g);

+

+		JGraph jgraph = new JGraph(jgAdapter);

+

+		adjustDisplaySettings(jgraph);

+		getContentPane().add(jgraph);

+		resize(DEFAULT_SIZE);

+

+		Set<String> vertexSet = ( Set<String>)g.vertexSet();

+		int count=0;

+		Map<Integer, Integer> level_count = new HashMap<Integer, Integer> ();

+

+		for(String vertexStr: vertexSet){

+			Integer key = 0;

+			try {

+				if (vertexStr.indexOf('#')>-1)

+					key = Integer.parseInt(vertexStr.split("#")[1]);

+			} catch (Exception e) {

+				// TODO Auto-generated catch block

+				e.printStackTrace();

+			}

+			Integer howManyAlready = 0;

+

+			if (key>0){

+				 howManyAlready = level_count.get(key);

+				if (howManyAlready==null){

+					howManyAlready=0;

+					level_count.put(key, 1);

+				} else {

+					level_count.put(key, howManyAlready+1);

+				}

+			}

+			positionVertexAt(vertexStr, count+howManyAlready*50, count);

+			count+=20;

+		}

+

+

+	}

+

+	/**

+	 * An alternative starting point for this demo, to also allow running this

+	 * applet as an application.

+	 *

+	 * @param args ignored.

+	 */

+	public static void main(String [] args)

+	{

+		ParseTreeVisualizer applet = new ParseTreeVisualizer();

+		applet.init();

+

+		JFrame frame = new JFrame();

+		frame.getContentPane().add(applet);

+		frame.setTitle("JGraphT Adapter to JGraph Demo");

+		frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

+		frame.pack();

+		frame.setVisible(true);

+	}

+

+

+

+	private void adjustDisplaySettings(JGraph jg)

+	{

+		jg.setPreferredSize(DEFAULT_SIZE);

+

+		Color c = DEFAULT_BG_COLOR;

+		String colorStr = null;

+

+		try {

+			colorStr = getParameter("bgcolor");

+		} catch (Exception e) {

+		}

+

+		if (colorStr != null) {

+			c = Color.decode(colorStr);

+		}

+

+		jg.setBackground(c);

+	}

+

+	@SuppressWarnings("unchecked") // FIXME hb 28-nov-05: See FIXME below

+	private void positionVertexAt(Object vertex, int x, int y)

+	{

+		DefaultGraphCell cell = jgAdapter.getVertexCell(vertex);

+		AttributeMap attr = cell.getAttributes();

+		Rectangle2D bounds = GraphConstants.getBounds(attr);

+

+		Rectangle2D newBounds =

+				new Rectangle2D.Double(

+						x,

+						y,

+						bounds.getWidth(),

+						bounds.getHeight());

+

+		GraphConstants.setBounds(attr, newBounds);

+

+		// TODO: Clean up generics once JGraph goes generic

+		AttributeMap cellAttr = new AttributeMap();

+		cellAttr.put(cell, attr);

+		jgAdapter.edit(cellAttr, null, null, null);

+	}

+

+}

+

+// End JGraphAdapterDemo.java

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhraseConcept.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhraseConcept.java
new file mode 100644
index 0000000..ecba4b5
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhraseConcept.java
@@ -0,0 +1,45 @@
+package opennlp.tools.parse_thicket.pattern_structure;
+
+import java.util.*;
+import java.io.*;
+
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+
+public class PhraseConcept {
+	int position;
+	//Set<Integer> intent;
+	List<List<ParseTreeChunk>> intent;
+	Set<Integer> parents;
+	public PhraseConcept() {
+		position = -1;
+		intent = new ArrayList<List<ParseTreeChunk>>();
+		parents = new HashSet<Integer>();
+	}
+	public void setPosition( int newPosition ){
+	       position = newPosition;
+	}
+	public void setIntent( List<List<ParseTreeChunk>> newIntent ){
+	       intent.clear();
+	       intent.addAll(newIntent);
+	}
+	public void setParents( Set<Integer> newParents ){
+	       //parents = newParents;
+		parents.clear();
+		parents.addAll(newParents);
+	}
+	public void printConcept() {
+		System.out.println("Concept position:" + position);
+		System.out.println("Concept intent:" + intent);
+		System.out.println("Concept parents:" + parents);
+	}
+	 public static void main(String []args) {
+		 PhraseConcept c = new PhraseConcept();
+		 c.printConcept();
+		 c.setPosition(10);
+		 c.printConcept();
+		 //List<List<ParseTreeChunk>> test = new List<List<ParseTreeChunk>>();
+		 //c.setIntent(test);
+		 c.printConcept();
+
+	 }
+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java
new file mode 100644
index 0000000..23fd5a3
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java
@@ -0,0 +1,166 @@
+package opennlp.tools.parse_thicket.pattern_structure;
+
+import java.util.*;
+import java.io.*;
+
+import opennlp.tools.parse_thicket.ParseCorefsBuilder;
+import opennlp.tools.parse_thicket.ParseThicket;
+import opennlp.tools.parse_thicket.ParseTreeNode;
+import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic;
+
+
+public class PhrasePatternStructure {
+	int objectCount;
+	int attributeCount;
+	ArrayList<PhraseConcept> conceptList;
+	ParseTreeMatcherDeterministic md; 
+	public PhrasePatternStructure(int objectCounts, int attributeCounts) {
+		objectCount = objectCounts;
+		attributeCount = attributeCounts;
+		conceptList = new ArrayList<PhraseConcept>();
+		PhraseConcept bottom = new PhraseConcept();
+		md = new ParseTreeMatcherDeterministic();
+		/*Set<Integer> b_intent = new HashSet<Integer>();
+		for (int index = 0; index < attributeCount; ++index) {
+			b_intent.add(index);
+		}
+		bottom.setIntent(b_intent);*/
+		bottom.setPosition(0);
+		conceptList.add(bottom);
+	}
+	public int GetMaximalConcept(List<List<ParseTreeChunk>> intent, int Generator) {
+		boolean parentIsMaximal = true;
+		while(parentIsMaximal) {
+			parentIsMaximal = false;
+			for (int parent : conceptList.get(Generator).parents) {
+				if (conceptList.get(parent).intent.containsAll(intent)) {
+					Generator = parent;
+					parentIsMaximal = true;
+					break;
+				}
+			}
+		}
+		return Generator;
+	}
+	public int AddIntent(List<List<ParseTreeChunk>> intent, int generator) {
+		System.out.println("debug");
+		System.out.println("called for " + intent);
+		//printLattice();
+		int generator_tmp = GetMaximalConcept(intent, generator);
+		generator = generator_tmp;
+		if (conceptList.get(generator).intent.equals(intent)) {
+			System.out.println("at generator:" + conceptList.get(generator).intent);
+			System.out.println("to add:" + intent);
+
+			System.out.println("already generated");
+			return generator;
+		}
+		Set<Integer> generatorParents = conceptList.get(generator).parents;
+		Set<Integer> newParents = new HashSet<Integer>();
+		for (int candidate : generatorParents) {
+			if (!intent.containsAll(conceptList.get(candidate).intent)) {
+			//if (!conceptList.get(candidate).intent.containsAll(intent)) {
+				//Set<Integer> intersection = new HashSet<Integer>(conceptList.get(candidate).intent);
+				//List<List<ParseTreeChunk>> intersection = new ArrayList<List<ParseTreeChunk>>(conceptList.get(candidate).intent);
+				//intersection.retainAll(intent);
+				List<List<ParseTreeChunk>> intersection = md
+				.matchTwoSentencesGroupedChunksDeterministic(intent, conceptList.get(candidate).intent);
+				System.out.println("recursive call (inclusion)");
+				candidate = AddIntent(intersection, candidate);
+			}
+			boolean addParents = true;
+			System.out.println("now iterating over parents");
+			Iterator<Integer> iterator = newParents.iterator();
+			while (iterator.hasNext()) {
+				Integer parent = iterator.next();
+				if (conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {
+					addParents = false;
+					break;
+				}
+				else {
+					if (conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {
+						iterator.remove();
+					}
+				}
+			}
+			/*for (int parent : newParents) {
+				System.out.println("parent = " + parent);
+				System.out.println("candidate intent:"+conceptList.get(candidate).intent);
+				System.out.println("parent intent:"+conceptList.get(parent).intent);
+				
+				if (conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {
+					addParents = false;
+					break;
+				}
+				else {
+					if (conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {
+						newParents.remove(parent);
+					}
+				}
+			}*/
+			if (addParents) {
+				newParents.add(candidate);
+			}
+		}
+		System.out.println("size of lattice: " + conceptList.size());
+		PhraseConcept newConcept = new PhraseConcept();
+		newConcept.setIntent(intent);
+		newConcept.setPosition(conceptList.size());
+		conceptList.add(newConcept);
+		conceptList.get(generator).parents.add(newConcept.position);
+		for (int newParent: newParents) {
+			if (conceptList.get(generator).parents.contains(newParent)) {
+				conceptList.get(generator).parents.remove(newParent);
+			}
+			conceptList.get(newConcept.position).parents.add(newParent);
+		}
+		return newConcept.position;
+	}
+	public void printLatticeStats() {
+		System.out.println("Lattice stats");
+		System.out.println("max_object_index = " + objectCount);
+		System.out.println("max_attribute_index = " + attributeCount);
+		System.out.println("Current concept count = " + conceptList.size());
+	}
+	public void printLattice() {
+		for (int i = 0; i < conceptList.size(); ++i) {
+			printConceptByPosition(i);
+		}
+	}
+	public void printConceptByPosition(int index) {
+		System.out.println("Concept at position " + index);
+		conceptList.get(index).printConcept();
+	}
+	public List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(
+			List<List<ParseTreeNode>> phrs) {
+		List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();
+		List<ParseTreeChunk> nps = new ArrayList<ParseTreeChunk>(), vps = new ArrayList<ParseTreeChunk>(), 
+				pps = new ArrayList<ParseTreeChunk>();
+		for(List<ParseTreeNode> ps:phrs){
+			ParseTreeChunk ch = convertNodeListIntoChunk(ps);
+			String ptype = ps.get(0).getPhraseType();
+			if (ptype.equals("NP")){
+				nps.add(ch);
+			} else if (ptype.equals("VP")){
+				vps.add(ch);
+			} else if (ptype.equals("PP")){
+				pps.add(ch);
+			}
+		}
+		results.add(nps); results.add(vps); results.add(pps);
+		return results;
+	}
+	private ParseTreeChunk convertNodeListIntoChunk(List<ParseTreeNode> ps) {
+		List<String> lemmas = new ArrayList<String>(),  poss = new ArrayList<String>();
+		for(ParseTreeNode n: ps){
+			lemmas.add(n.getWord());
+			poss.add(n.getPos());
+		}
+		ParseTreeChunk ch = new ParseTreeChunk(lemmas, poss, 0, 0);
+		ch.setMainPOS(ps.get(0).getPhraseType());
+		return ch;
+	}
+	
+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java
new file mode 100644
index 0000000..3a36e80
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java
@@ -0,0 +1,162 @@
+package opennlp.tools.parse_thicket.rhetoric_structure;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.HashMap;

+import java.util.List;

+import java.util.Map;

+

+import opennlp.tools.parse_thicket.ArcType;

+import opennlp.tools.parse_thicket.Pair;

+import opennlp.tools.parse_thicket.ParseThicket;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;

+

+import org.jgrapht.Graph;

+import org.jgrapht.graph.DefaultEdge;

+import org.jgrapht.graph.SimpleGraph;

+

+

+import edu.stanford.nlp.trees.Tree;

+

+public class RhetoricStructureArcsBuilder {

+	private RhetoricStructureMarker markerBuilderForSentence = new RhetoricStructureMarker();

+

+	private Map<Integer, List<Pair<String, Integer[]>>> buildMarkers(ParseThicket pt){

+

+		Map<Integer, List<Pair<String, Integer[]>>> sentNumMarkers = new 

+				HashMap<Integer, List<Pair<String, Integer[]>>>();

+		int count = 0;

+		for( List<ParseTreeNode> sent: pt.getNodesThicket()){

+			List<Pair<String, Integer[]>> markersForSentence = markerBuilderForSentence.

+					extractRSTrelationInSentenceGetBoundarySpan(sent);

+			sentNumMarkers.put(count,  markersForSentence);

+			count++;

+		}

+		return sentNumMarkers;

+	}

+

+

+	/*

+	 * Induced RST algorithm

+	 * 

+	 * Input: obtained RST markers (numbers of words which 

+	 * splits sentence in potential RST relation arguments) +

+	 * Current Parse Thicket with arcs for coreferences

+	 * 

+	 * We search for parts of sentences on the opposite side of RST markers

+	 * 

+	 * $sentPosFrom$  marker

+	 *  | == == == [ ] == == == |

+	 *     \				\

+	 *       \				  \

+	 *       coref          RST arc being formed

+	 *           \ 				\

+	 *             \			 \

+	 *     | == == == == == [  ] == == ==|      

+	 *     

+	 *       Mark yelled at his dog, but it disobeyed

+	 *        |							\

+	 *       coref                 RST arc for CONTRAST being formed

+	 *        | 							\

+	 *       He was upset, however he did not show it

+	 *       $sentPosTo$

+	 */

+	public List<WordWordInterSentenceRelationArc> buildRSTArcsFromMarkersAndCorefs(

+			List<WordWordInterSentenceRelationArc> arcs,

+			Map<Integer, List<List<ParseTreeNode>>> sentNumPhrasesMap, 

+			ParseThicket pt ) {

+		List<WordWordInterSentenceRelationArc> arcsRST = new ArrayList<WordWordInterSentenceRelationArc>();		

+

+		Map<Integer, List<Pair<String, Integer[]>>> rstMarkersMap = buildMarkers(pt);

+

+		for(int nSentFrom=0; nSentFrom<pt.getSentences().size(); nSentFrom++){

+			for(int nSentTo=nSentFrom+1; nSentTo<pt.getSentences().size(); nSentTo++){

+				// for given arc, find phrases connected by this arc and add to the list of phrases

+

+				List<List<ParseTreeNode>> phrasesFrom = sentNumPhrasesMap.get(nSentFrom);

+				List<List<ParseTreeNode>> phrasesTo = sentNumPhrasesMap.get(nSentTo);

+				List<Pair<String, Integer[]>> markersFrom = rstMarkersMap.get(nSentFrom);

+				List<Pair<String, Integer[]>> markersTo = rstMarkersMap.get(nSentTo);

+				for(WordWordInterSentenceRelationArc arc: arcs){

+					// arc should be coref and link these sentences

+					if (nSentFrom != arc.getCodeFrom().getFirst() ||

+							nSentTo != arc.getCodeTo().getFirst() ||

+							!arc.getArcType().getType().startsWith("coref")

+							)

+						continue;

+					int sentPosFrom = arc.getCodeFrom().getSecond();

+					int sentPosTo = arc.getCodeTo().getSecond();

+					// not more than a single RST link for a pair of sentences

+					boolean bFound = false;

+					for(List<ParseTreeNode> vpFrom: phrasesFrom){

+						if (bFound)

+							break;

+						for(List<ParseTreeNode> vpTo: phrasesTo){

+							for(Pair<String, Integer[]> mFrom: markersFrom){

+								for(Pair<String, Integer[]> mTo: markersTo) {

+									{

+										// the phrases should be on an opposite side of rst marker for a coref link

+										if (isSequence( new Integer[] { sentPosFrom,  vpFrom.get(0).getId(), mFrom.getSecond()[0]}) &&

+												isSequence( new Integer[] { sentPosTo,  vpTo.get(0).getId(), mTo.getSecond()[0]})	){

+											ArcType arcType = new ArcType("rst", mFrom.getFirst(), 0, 0);

+

+											WordWordInterSentenceRelationArc arcRST = 

+													new WordWordInterSentenceRelationArc(new Pair<Integer, Integer>(nSentFrom, mFrom.getSecond()[1]), 

+															new Pair<Integer, Integer>(nSentTo, mTo.getSecond()[1]), "", "", arcType);

+											arcsRST.add(arcRST);

+											bFound = true;

+											break;

+										}

+									}

+								}

+							}

+						}

+					}

+				}

+			}

+		}

+

+		return arcs;

+	}

+

+// check if the word positions occur in sentence in the order Integer[]

+// TODO make more sensitive algo	

+	private static boolean isSequence(Integer[] integers) {

+		//TODO better construction of array

+		if (integers==null || integers.length<3)

+			return false;

+		try {

+			for(Integer i: integers)

+				if (i==0)

+					return false;

+		} catch (Exception e) {

+			return false;

+		}

+		

+		Boolean bWrongOrder = false;

+		for(int i=1; i< integers.length; i++){

+			if (integers[i-1]>integers[i]){

+				bWrongOrder = true;

+				break;

+			}

+		}

+		

+		Boolean bWrongInverseOrder = false;

+		for(int i=1; i< integers.length; i++){

+			if (integers[i-1]<integers[i]){

+				bWrongInverseOrder = true;

+				break;

+			}

+		}

+		

+		return !(bWrongOrder && bWrongInverseOrder);

+	}

+

+

+

+	public static void main(String[] args){

+

+

+	}

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java
new file mode 100644
index 0000000..060d32f
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java
@@ -0,0 +1,129 @@
+package opennlp.tools.parse_thicket.rhetoric_structure;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.List;

+

+import opennlp.tools.parse_thicket.IGeneralizer;

+import opennlp.tools.parse_thicket.Pair;

+import opennlp.tools.parse_thicket.ParseTreeNode;

+

+

+public class RhetoricStructureMarker implements IGeneralizer<Integer[]>  {

+	//private static String rstRelations[] = {"antithesis", "concession", "contrast", "elaboration"};

+	List<Pair<String, ParseTreeNode[]>> rstMarkers = new ArrayList<Pair<String, ParseTreeNode[]>>();

+

+	public  RhetoricStructureMarker(){

+

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>("contrast", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("than",",")  }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "antithesis", new ParseTreeNode[]{new ParseTreeNode("although",","),  new ParseTreeNode("*","*")  }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("however","*")  }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("however","*"), new ParseTreeNode(",",","),

+					new ParseTreeNode("*","prp"),   }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("*","NN")  }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "elaboration", new ParseTreeNode[]{new ParseTreeNode("as","*"),  new ParseTreeNode("a","*")  }));

+	

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>("explanation", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("because",",")  }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "example", new ParseTreeNode[]{new ParseTreeNode("for","IN"),  new ParseTreeNode("example","NN")  }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("ye","*")  }));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode(",",","),

+					new ParseTreeNode("*","prp"),   }));

+		

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "contrast", new ParseTreeNode[]{new ParseTreeNode("yet","*"), new ParseTreeNode("i","*"),

+				  }));

+		

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "explanation", new ParseTreeNode[]{new ParseTreeNode(",",","),  new ParseTreeNode("where","*")  }));

+		//as long as

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","RB"), 

+				new ParseTreeNode("as","IN"),}));

+		rstMarkers.add(new Pair<String, ParseTreeNode[]>( "temp_sequence", new ParseTreeNode[]{/*new ParseTreeNode("as","*"),*/ new ParseTreeNode("*","VB*"), 

+				new ParseTreeNode("until","IN"),}));

+

+	}

+

+	/* For a sentence, we obtain a list of markers with the CA word and position in the sentence

+	 * Output span is an integer array with start/end occurrence of an RST marker in a sentence

+	 * */

+	public List<Pair<String, Integer[]>> extractRSTrelationInSentenceGetBoundarySpan(List<ParseTreeNode> sentence){

+		List<Pair<String, Integer[]>> results = new ArrayList<Pair<String, Integer[]>> ();

+		

+		for(Pair<String, ParseTreeNode[]> template: rstMarkers){

+			List<Integer[]> spanList = generalize(sentence,template.getSecond() );

+			if (!spanList.isEmpty())

+				results.add(new Pair<String, Integer[]>(template.getFirst(), spanList.get(0)));

+		}

+		return results;

+	}

+

+	/* Rule application in the form of generalization

+	 * Generalizing a sentence with a rule (a template), we obtain the occurrence of rhetoric marker

+	 *

+	 * o1 - sentence

+	 * o2 - rule/template, specifying lemmas and/or POS, including punctuation

+	 * @see opennlp.tools.parse_thicket.IGeneralizer#generalize(java.lang.Object, java.lang.Object)

+	 * returns the span Integer[] 

+	 */

+	@Override

+	public List<Integer[]> generalize(Object o1, Object o2) {

+		List<Integer[]> result = new ArrayList<Integer[]>();

+

+		List<ParseTreeNode> sentence = (List<ParseTreeNode> )o1;

+		ParseTreeNode[] template = (ParseTreeNode[]) o2;

+

+		boolean bBeingMatched = false;

+		for(int wordIndexInSentence=0; wordIndexInSentence<sentence.size(); wordIndexInSentence++){

+			ParseTreeNode word = sentence.get(wordIndexInSentence);

+			int wordIndexInSentenceEnd = wordIndexInSentence; //init iterators for internal loop

+			int templateIterator=0;

+			while (wordIndexInSentenceEnd<sentence.size() && templateIterator< template.length){

+				ParseTreeNode tword = template[templateIterator];

+				ParseTreeNode currWord=sentence.get(wordIndexInSentenceEnd);

+				List<ParseTreeNode> gRes = tword.generalize(tword, currWord);

+				if (gRes.isEmpty()|| gRes.get(0)==null || ( gRes.get(0).getWord().equals("*") 

+						&& gRes.get(0).getPos().equals("*") )){

+					bBeingMatched = false;

+					break;

+				} else {

+					bBeingMatched = true;

+				}

+				wordIndexInSentenceEnd++;

+				templateIterator++;

+			}

+			// template iteration is done

+			// the only condition for successful match is IF we are at the end of template

+			if (templateIterator == template.length){

+				result.add(new Integer[]{wordIndexInSentence, wordIndexInSentenceEnd-1});

+				return result;

+			}

+

+			// no match for current sentence word: proceed to the next

+		}

+		return result; 

+	}

+	

+	public String markerToString(List<Pair<String, Integer[]>> res){

+		StringBuffer buf = new StringBuffer();

+		buf.append("[");

+		for(Pair<String, Integer[]> marker: res){

+			buf.append(marker.getFirst()+":");

+			for(int a: marker.getSecond()){

+				buf.append(a+" ");

+			}

+			buf.append (" | ");

+		}

+		buf.append("]");

+		return buf.toString();

+	}

+

+	public static void main(String[] args){

+		ParseTreeNode[] sent = 	

+		new ParseTreeNode[]{new ParseTreeNode("he","prn"), new ParseTreeNode("was","vbz"), new ParseTreeNode("more","jj"), 

+				new ParseTreeNode(",",","),  new ParseTreeNode("than",","), new ParseTreeNode("little","jj"), new ParseTreeNode("boy","nn"),

+				new ParseTreeNode(",",","), new ParseTreeNode("however","*"), new ParseTreeNode(",",","),

+				new ParseTreeNode("he","prp"), new ParseTreeNode("was","vbz"), new ParseTreeNode("adult","jj")

+		};

+		

+		List<Pair<String, Integer[]>> res = new RhetoricStructureMarker().extractRSTrelationInSentenceGetBoundarySpan(Arrays.asList(sent));

+		System.out.println( new RhetoricStructureMarker().markerToString(res));

+	} 

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
index 9e793b3..c9b1f76 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
@@ -17,28 +17,90 @@
 

 package opennlp.tools.similarity.apps;

 

-import java.io.BufferedReader;

-import java.io.InputStreamReader;

-import java.net.URL;

-import java.net.URLConnection;

-import java.net.URLEncoder;

 import java.util.ArrayList;

 import java.util.List;

 import java.util.logging.Logger;

 

-import org.apache.commons.lang.StringUtils;

-import org.json.JSONArray;

-import org.json.JSONObject;

+import net.billylieurance.azuresearch.AzureSearchImageQuery;

+import net.billylieurance.azuresearch.AzureSearchImageResult;

+import net.billylieurance.azuresearch.AzureSearchResultSet;

+import net.billylieurance.azuresearch.AzureSearchWebQuery;

+import net.billylieurance.azuresearch.AzureSearchWebResult;

 

 public class BingQueryRunner {

-  protected static final String APP_ID = "e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=";

-    //"DD4E2A5DF8B7E5801ED443E47DC600D5F3E62713";

-  // TODO user needs to have own APP_ID from Bing API

+	

+	protected static String BING_KEY = "e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=";

+	private static final Logger LOG = Logger

+		      .getLogger("opennlp.tools.similarity.apps.BingQueryRunner");

+	protected AzureSearchWebQuery aq = new AzureSearchWebQuery();

+	private AzureSearchImageQuery iq = new AzureSearchImageQuery();

+	

+	public void setKey(String key){

+		BING_KEY = key;

+	}

+	

+	public void setLang(String language){

+		aq.setMarket(language);

+	}

+  

+	public List<HitBase> runSearch(String query, int nRes) {

+		aq.setAppid(BING_KEY);

+		aq.setQuery(query);		

+		aq.setPerPage(nRes);

+		try {

+			aq.doQuery();

+		} catch (Exception e) { // most likely exception is due to limit on bing key

+			aq.setAppid("pjtCgujmf9TtfjCVBdcQ2rBUQwGLmtLtgCG4Ex7kekw");

+			try {

+				aq.doQuery();

+			} catch (Exception e1) {

+				// TODO Auto-generated catch block

+				e1.printStackTrace();

+			}

+			e.printStackTrace();

+		}

+		

+		//org.xml.sax.SAXParseException

+		

+		List<HitBase> results = new ArrayList<HitBase> ();

+		AzureSearchResultSet<AzureSearchWebResult> ars = aq.getQueryResult();

+		

+		for (AzureSearchWebResult anr : ars){

+		    HitBase h = new HitBase();

+		    h.setAbstractText(anr.getDescription());

+		    h.setTitle(anr.getTitle());

+		    h.setUrl(anr.getUrl());

+		    results.add(h);

+		}

+		return results;

+	}

+	

+	

+	public AzureSearchResultSet<AzureSearchImageResult> runImageSearch(String query) {

+		iq.setAppid(BING_KEY);

+		iq.setQuery(query);		

+		iq.doQuery();

+		

+		AzureSearchResultSet<AzureSearchImageResult> ars = iq.getQueryResult();

+

+		return ars;

+	}

+	public int getTotalPagesAtASite(String site)

+	{

+		return runSearch("site:"+site, 1000000).size();

+	}

+	

+

+	public List<HitBase> runSearch(String query) {

+		return runSearch(query, 100);

+	}	

+	

+	

+	

 

   private float snapshotSimilarityThreshold = 0.4f;

 

-  private static final Logger LOG = Logger

-      .getLogger("opennlp.tools.similarity.apps.BingQueryRunner");

+  

 

   public void setSnapshotSimilarityThreshold(float thr) {

     snapshotSimilarityThreshold = thr;

@@ -53,8 +115,7 @@
   }

 

   /*

-   * 

-   */

+ 

 

   private String constructBingUrl(String query, String domainWeb, String lang,

       int numbOfHits) throws Exception {

@@ -73,9 +134,8 @@
     return yahooRequest;

   }

 

-  /*

-     *  

-     */

+ 

+    

   public ArrayList<String> search(String query, String domainWeb, String lang,

       int numbOfHits) throws Exception {

     URL url = new URL(constructBingUrl(query, domainWeb, lang, numbOfHits));

@@ -145,6 +205,7 @@
     hits = HitBase.removeDuplicates(hits);

     return hits;

   }

+  */

 

   // TODO comment back when dependencies resolved (CopyrightViolations)

   /*

@@ -185,10 +246,16 @@
 

   public static void main(String[] args) {

     BingQueryRunner self = new BingQueryRunner();

+    

+    AzureSearchResultSet<AzureSearchImageResult> res = self.runImageSearch("albert einstein");

+    System.out.println(res);

     try {

+    	self.setLang("es-MX");

+    	self.setKey(

+    			"e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=");

       List<HitBase> resp = self

-          .runSearch("Rates rise at weekly Treasury auction");

-      // "British Actress Lynn Redgrave dies at 67");

+          .runSearch(//"art scene");

+        		  "biomecanica las palancas");

       System.out.print(resp.get(0));

     } catch (Exception e) {

       // TODO Auto-generated catch block

@@ -196,6 +263,12 @@
     }

 

     /*

+     * 

+     * de-DE

+     * es-MX

+     * es-SP

+     */

+    /*

      * String[] submittedNews = new String[]{

      * "Asian airports had already increased security following the Christmas Day attack, but South Korea and Pakistan are thinking about additional measures."

      * ,

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
new file mode 100644
index 0000000..4bff64f
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
@@ -0,0 +1,467 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.similarity.apps;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.List;

+import java.util.logging.Logger;

+

+import opennlp.tools.parse_thicket.Triple;

+import opennlp.tools.similarity.apps.utils.PageFetcher;

+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

+import opennlp.tools.similarity.apps.utils.Utils;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+/*

+ * This class does content generation by using web mining and syntactic generalization to get sentences from the web, convert and combine 

+ * them in the form 

+ * expected to be readable by humans and not distinguishable from genuine content by search engines

+ * 

+ */

+

+public class ContentGenerator /*extends RelatedSentenceFinder*/ {

+	private static Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.ContentGenerator");

+	PageFetcher pFetcher = new PageFetcher();

+	ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor

+			.getInstance();

+	protected ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+	protected ParseTreeChunk parseTreeChunk = new ParseTreeChunk();

+	protected static StringDistanceMeasurer stringDistanceMeasurer = new StringDistanceMeasurer();

+	protected BingQueryRunner yrunner = new BingQueryRunner();

+	protected ContentGeneratorSupport support = new ContentGeneratorSupport();

+	protected int MAX_STEPS = 1;

+	protected int MAX_SEARCH_RESULTS = 1;

+	protected float RELEVANCE_THRESHOLD = 1.1f;

+

+	//private static final int MAX_FRAGMENT_SENTS = 10;

+

+	public ContentGenerator(int ms, int msr, float thresh, String key) {

+		this.MAX_STEPS = ms;

+		this.MAX_SEARCH_RESULTS = msr;

+		this.RELEVANCE_THRESHOLD=thresh;

+		yrunner.setKey(key);

+	}

+

+	public ContentGenerator() {

+		// TODO Auto-generated constructor stub

+	}

+	public void setLang(String lang) {

+		yrunner.setLang(lang);

+

+	}

+

+

+	/**

+	 * Main content generation function which takes a seed as a person, rock

+	 * group, or other entity name and produce a list of text fragments by web

+	 * mining for <br>

+	 * 

+	 * @param String

+	 *          entity name

+	 * @return List<HitBase> of text fragment structures which contain approved

+	 *         (in terms of relevance) mined sentences, as well as original search

+	 *         results objects such as doc titles, abstracts, and urls.

+	 */

+

+	public List<HitBase> generateContentAbout(String sentence) throws Exception {

+		List<HitBase> opinionSentencesToAdd = new ArrayList<HitBase>();

+		System.out.println(" \n=== Entity to write about = " + sentence);

+	

+		int stepCount=0;

+		for (String verbAddition : StoryDiscourseNavigator.frequentPerformingVerbs) {

+			List<HitBase> searchResult = yrunner.runSearch(sentence + " "

+					+ verbAddition, MAX_SEARCH_RESULTS); //100);

+			if (MAX_SEARCH_RESULTS<searchResult.size())

+				searchResult = searchResult.subList(0, MAX_SEARCH_RESULTS);

+			//TODO for shorter run

+			if (searchResult != null) {

+				for (HitBase item : searchResult) { // got some text from .html

+					if (item.getAbstractText() != null

+							&& !(item.getUrl().indexOf(".pdf") > 0)) { // exclude pdf

+						opinionSentencesToAdd

+						.add(buildParagraphOfGeneratedText(item, sentence, null));

+					}

+				}

+			}

+			stepCount++;

+			if (stepCount>MAX_STEPS)

+				break;

+		}

+

+		opinionSentencesToAdd = ContentGeneratorSupport.removeDuplicatesFromResultantHits(opinionSentencesToAdd);

+		return opinionSentencesToAdd;

+	}

+

+	/**

+	 * Takes a sentence and extracts noun phrases and entity names to from search

+	 * queries for finding relevant sentences on the web, which are then subject

+	 * to relevance assessment by Similarity. Search queries should not be too

+	 * general (irrelevant search results) or too specific (too few search

+	 * results)

+	 * 

+	 * @param String

+	 *          input sentence to form queries

+	 * @return List<String> of search expressions

+	 */

+	public static List<String> buildSearchEngineQueryFromSentence(String sentence) {

+		ParseTreeChunk matcher = new ParseTreeChunk();

+		ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor

+				.getInstance();

+		List<List<ParseTreeChunk>> sent1GrpLst = null;

+

+		List<ParseTreeChunk> nPhrases = pos

+				.formGroupedPhrasesFromChunksForSentence(sentence).get(0);

+		List<String> queryArrayStr = new ArrayList<String>();

+		for (ParseTreeChunk ch : nPhrases) {

+			String query = "";

+			int size = ch.getLemmas().size();

+

+			for (int i = 0; i < size; i++) {

+				if (ch.getPOSs().get(i).startsWith("N")

+						|| ch.getPOSs().get(i).startsWith("J")) {

+					query += ch.getLemmas().get(i) + " ";

+				}

+			}

+			query = query.trim();

+			int len = query.split(" ").length;

+			if (len < 2 || len > 5)

+				continue;

+			if (len < 4) { // every word should start with capital

+				String[] qs = query.split(" ");

+				boolean bAccept = true;

+				for (String w : qs) {

+					if (w.toLowerCase().equals(w)) // idf only two words then

+						// has to be person name,

+						// title or geo location

+						bAccept = false;

+				}

+				if (!bAccept)

+					continue;

+			}

+

+			query = query.trim().replace(" ", " +");

+			query = " +" + query;

+

+			queryArrayStr.add(query);

+

+		}

+		if (queryArrayStr.size() < 1) { // release constraints on NP down to 2

+			// keywords

+			for (ParseTreeChunk ch : nPhrases) {

+				String query = "";

+				int size = ch.getLemmas().size();

+

+				for (int i = 0; i < size; i++) {

+					if (ch.getPOSs().get(i).startsWith("N")

+							|| ch.getPOSs().get(i).startsWith("J")) {

+						query += ch.getLemmas().get(i) + " ";

+					}

+				}

+				query = query.trim();

+				int len = query.split(" ").length;

+				if (len < 2)

+					continue;

+

+				query = query.trim().replace(" ", " +");

+				query = " +" + query;

+

+				queryArrayStr.add(query);

+

+			}

+		}

+

+		queryArrayStr = ContentGeneratorSupport.removeDuplicatesFromQueries(queryArrayStr);

+		queryArrayStr.add(sentence);

+

+		return queryArrayStr;

+

+	}

+

+	private Triple<List<String>, String, String[]> formCandidateFragmentsForPage(HitBase item, String originalSentence, List<String> sentsAll){

+		if (sentsAll == null)

+			sentsAll = new ArrayList<String>();

+		// put orig sentence in structure

+		List<String> origs = new ArrayList<String>();

+		origs.add(originalSentence);

+		item.setOriginalSentences(origs);

+		String title = item.getTitle().replace("<b>", " ").replace("</b>", " ")

+				.replace("  ", " ").replace("  ", " ");

+		// generation results for this sentence

+		List<Fragment> result = new ArrayList<Fragment>();

+		// form plain text from snippet

+		String snapshot = item.getAbstractText().replace("<b>", " ")

+				.replace("</b>", " ").replace("  ", " ").replace("  ", " ");

+

+

+		// fix a template expression which can be substituted by original if

+		// relevant

+		String snapshotMarked = snapshot.replace("...",

+				" _should_find_orig_ . _should_find_orig_");

+		String[] fragments = sm.splitSentences(snapshotMarked);

+		List<String> allFragms = new ArrayList<String>();

+		allFragms.addAll(Arrays.asList(fragments));

+

+		String[] sents = null;

+		String downloadedPage = null;

+		try {

+			if (snapshotMarked.length() != snapshot.length()) {

+				downloadedPage = pFetcher.fetchPage(item.getUrl());

+				if (downloadedPage != null && downloadedPage.length() > 100) {

+					item.setPageContent(downloadedPage);

+					String pageContent = Utils.fullStripHTML(item.getPageContent());

+					pageContent = GeneratedSentenceProcessor

+							.normalizeForSentenceSplitting(pageContent);

+					pageContent = ContentGeneratorSupport.cleanSpacesInCleanedHTMLpage(pageContent);

+			

+					sents = sm.splitSentences(pageContent);

+

+					sents = ContentGeneratorSupport.cleanListOfSents(sents);

+				}

+			}

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			// e.printStackTrace();

+			System.err

+			.println("Problem downloading  the page and splitting into sentences");

+			return new Triple(allFragms, downloadedPage, sents);

+		}

+		return new Triple(allFragms, downloadedPage, sents);

+	}

+

+	private String[] formCandidateSentences(String fragment, Triple<List<String>, String, String[]> fragmentExtractionResults){

+		String[] mainAndFollowSent = null;

+

+		List<String> allFragms = (List<String>)fragmentExtractionResults.getFirst();

+		String downloadedPage = (String)fragmentExtractionResults.getSecond();

+		String[] sents = (String[])fragmentExtractionResults.getThird();

+

+		String followSent = null;

+		if (fragment.length() < 50)

+			return null;

+		String pageSentence = "";

+		// try to find original sentence from webpage

+		if (fragment.indexOf("_should_find_orig_") > -1 && sents != null

+				&& sents.length > 0){

+			try { 

+				// first try sorted sentences from page by length approach

+				String[] sentsSortedByLength = support.extractSentencesFromPage(downloadedPage);

+

+

+				try {

+					mainAndFollowSent = ContentGeneratorSupport.getFullOriginalSentenceFromWebpageBySnippetFragment(

+							fragment.replace("_should_find_orig_", ""), sentsSortedByLength);

+				} catch (Exception e) {

+					// TODO Auto-generated catch block

+					e.printStackTrace();

+				}

+				// if the above gives null than try to match all sentences from snippet fragment

+				if (mainAndFollowSent==null || mainAndFollowSent[0]==null){

+					mainAndFollowSent = ContentGeneratorSupport.getFullOriginalSentenceFromWebpageBySnippetFragment(

+							fragment.replace("_should_find_orig_", ""), sents);

+				}

+

+

+			} catch (Exception e) {

+

+				// TODO Auto-generated catch block

+				e.printStackTrace();

+			}

+		}

+		else

+			// or get original snippet

+			pageSentence = fragment;

+		if (pageSentence != null)

+			pageSentence.replace("_should_find_orig_", "");

+

+		return mainAndFollowSent;

+

+	}	

+

+	private Fragment verifyCandidateSentencesAndFormParagraph(

+			String[] candidateSentences, HitBase item, String fragment, String originalSentence, List<String> sentsAll) {

+		Fragment result = null;	

+

+		String pageSentence = candidateSentences[0];

+		String followSent = "";

+		for(int i = 1; i< candidateSentences.length; i++)

+			followSent+= candidateSentences[i];

+		String title = item.getTitle();

+

+		// resultant sentence SHOULD NOT be longer than for times the size of

+		// snippet fragment

+		if (!(pageSentence != null && pageSentence.length()>50 

+				&& (float) pageSentence.length() / (float) fragment.length() < 4.0) )

+			return null;

+

+

+		try { // get score from syntactic match between sentence in

+			// original text and mined sentence

+			double measScore = 0.0, syntScore = 0.0, mentalScore = 0.0;

+

+			SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence

+					+ " " + title, originalSentence);

+			List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

+			if (!matchRes.isVerbExists() || matchRes.isImperativeVerb()) {

+				System.out

+				.println("Rejected Sentence : No verb OR Yes imperative verb :"

+						+ pageSentence);

+				return null;

+			}

+

+			syntScore = parseTreeChunkListScorer

+					.getParseTreeChunkListScore(match);

+			System.out.println(parseTreeChunk.listToString(match) + " "

+					+ syntScore + "\n pre-processed sent = '" + pageSentence);

+

+			if (syntScore < RELEVANCE_THRESHOLD){ // 1.5) { // trying other sents

+				for (String currSent : sentsAll) {

+					if (currSent.startsWith(originalSentence))

+						continue;

+					match = sm.assessRelevance(currSent, pageSentence)

+							.getMatchResult();

+					double syntScoreCurr = parseTreeChunkListScorer

+							.getParseTreeChunkListScore(match);

+					if (syntScoreCurr > syntScore) {

+						syntScore = syntScoreCurr;

+					}

+				}

+				if (syntScore > RELEVANCE_THRESHOLD) {

+					System.out.println("Got match with other sent: "

+							+ parseTreeChunk.listToString(match) + " " + syntScore);

+				}

+			}

+

+			measScore = stringDistanceMeasurer.measureStringDistance(

+					originalSentence, pageSentence);

+

+

+			if ((syntScore > RELEVANCE_THRESHOLD || measScore > 0.5)

+					&& measScore < 0.8 && pageSentence.length() > 40) // >70

+			{

+				String pageSentenceProc = GeneratedSentenceProcessor

+						.acceptableMinedSentence(pageSentence);

+				if (pageSentenceProc != null) {

+					pageSentenceProc = GeneratedSentenceProcessor

+							.processSentence(pageSentenceProc);

+					followSent = GeneratedSentenceProcessor.processSentence(followSent);

+					if (followSent != null) {

+						pageSentenceProc += " "+ followSent;

+					}

+

+					pageSentenceProc = Utils.convertToASCII(pageSentenceProc);

+					result = new Fragment(pageSentenceProc, syntScore + measScore

+							+ mentalScore + (double) pageSentenceProc.length()

+							/ (double) 50);

+					result.setSourceURL(item.getUrl());

+					result.fragment = fragment;

+

+					System.out.println("Accepted sentence: " + pageSentenceProc

+							+ "| with title= " + title);

+					System.out.println("For fragment = " + fragment);

+				} else

+					System.out

+					.println("Rejected sentence due to wrong area at webpage: "

+							+ pageSentence);

+			} else

+				System.out.println("Rejected sentence due to low score: "

+						+ pageSentence);

+			// }

+		} catch (Throwable t) {

+			t.printStackTrace();

+		}

+

+	return result;

+}

+	/**

+	 * Takes single search result for an entity which is the subject of the essay

+	 * to be written and forms essey sentences from the title, abstract, and

+	 * possibly original page

+	 * 

+	 * @param HitBase

+	 *          item : search result

+	 * @param originalSentence

+	 *          : seed for the essay to be written

+	 * @param sentsAll

+	 *          : list<String> of other sentences in the seed if it is

+	 *          multi-sentence

+	 * @return search result

+	 */

+	public HitBase buildParagraphOfGeneratedText(HitBase item,

+			String originalSentence, List<String> sentsAll) {

+		List<Fragment> results = new ArrayList<Fragment>() ;

+		

+		Triple<List<String>, String, String[]> fragmentExtractionResults = formCandidateFragmentsForPage(item, originalSentence, sentsAll);

+

+		List<String> allFragms = (List<String>)fragmentExtractionResults.getFirst();

+		String downloadedPage = (String)fragmentExtractionResults.getSecond();

+		String[] sents = (String[])fragmentExtractionResults.getThird();

+

+		for (String fragment : allFragms) {

+			String[] candidateSentences = formCandidateSentences(fragment, fragmentExtractionResults);

+			if (candidateSentences == null)

+				continue;

+			Fragment res = verifyCandidateSentencesAndFormParagraph(candidateSentences, item, fragment, originalSentence, sentsAll);

+			if (res!=null)

+				results.add(res);

+

+		}

+		

+		item.setFragments(results );

+		return item;

+	}

+

+

+

+

+public static void main(String[] args) {

+	ContentGenerator f = new ContentGenerator();

+

+	List<HitBase> hits = null;

+	try {

+		// uncomment the sentence you would like to serve as a seed sentence for

+		// content generation for an event description

+

+		// uncomment the sentence you would like to serve as a seed sentence for

+		// content generation for an event description

+		hits = f.generateContentAbout("Albert Einstein"

+				// "Britney Spears - The Femme Fatale Tour"

+				// "Rush Time Machine",

+				// "Blue Man Group" ,

+				// "Belly Dance With Zaharah",

+				// "Hollander Musicology Lecture: Danielle Fosler-Lussier, Guest Lecturer",

+				// "Jazz Master and arguably the most famous jazz musician alive, trumpeter Wynton Marsalis",

+				);

+		System.out.println(HitBase.toString(hits));

+		System.out.println(HitBase.toResultantString(hits));

+		// WordFileGenerator.createWordDoc("Essey about Albert Einstein",

+		// hits.get(0).getTitle(), hits);

+

+	} catch (Exception e) {

+		e.printStackTrace();

+	}

+

+}

+

+

+

+}
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorRunner.java
new file mode 100644
index 0000000..4cc36a5
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorRunner.java
@@ -0,0 +1,99 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.similarity.apps;

+

+import java.util.List;

+

+import javax.mail.internet.AddressException;

+import javax.mail.internet.InternetAddress;

+

+import opennlp.tools.apps.utils.email.EmailSender;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+public class ContentGeneratorRunner {

+	public static void main(String[] args) {

+		ParserChunker2MatcherProcessor sm = null;

+	    	    

+	    try {

+			String resourceDir = args[2];

+			if (resourceDir!=null)

+				sm = ParserChunker2MatcherProcessor.getInstance(resourceDir);

+			else

+				sm = ParserChunker2MatcherProcessor.getInstance();

+	

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+	    

+	    String bingKey = args[7];

+	    if (bingKey == null){

+	    	bingKey = "e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=";

+	    }

+	    

+	    RelatedSentenceFinder f = null;

+	    String lang = args[6];

+	    if (lang.startsWith("es")){

+	    	f = new RelatedSentenceFinderML(Integer.parseInt(args[3]), Integer.parseInt(args[4]), Float.parseFloat(args[5]), bingKey);

+	    	f.setLang(lang);

+	    } else	    

+	    

+		    if (args.length>4 && args[4]!=null)

+		    	f = new RelatedSentenceFinder(Integer.parseInt(args[3]), Integer.parseInt(args[4]), Float.parseFloat(args[5]), bingKey);

+		    else

+		    	f = new RelatedSentenceFinder();

+		    

+	    List<HitBase> hits = null;

+	    try {

+	      

+	      hits = f.generateContentAbout(args[0].replace('+', ' ').replace('"', ' ').trim());

+	      System.out.println(HitBase.toString(hits));

+	      String generatedContent = HitBase.toResultantString(hits);

+	      

+	      opennlp.tools.apps.utils.email.EmailSender s = new opennlp.tools.apps.utils.email.EmailSender();

+			

+			try {

+				s.sendMail("smtp.live.com", "bgalitsky@hotmail.com", "borgalor", new InternetAddress("bgalitsky@hotmail.com"), new InternetAddress[]{new InternetAddress(args[1])}, new InternetAddress[]{}, new InternetAddress[]{}, 

+						"Generated content for you on '"+args[0].replace('+', ' ')+"'", generatedContent, null);

+			} catch (AddressException e) {

+				// TODO Auto-generated catch block

+				e.printStackTrace();

+			} catch (Exception e) {

+		

+				e.printStackTrace();

+				try {

+					s.sendMail("smtp.live.com", "bgalitsky@hotmail.com", "borgalor", new InternetAddress("bgalitsky@hotmail.com"), new InternetAddress[]{new InternetAddress(args[1])}, new InternetAddress[]{}, new InternetAddress[]{}, 

+							"Generated content for you on '"+args[0].replace('+', ' ')+"'", generatedContent, null);

+				} catch (Exception e1) {

+					// TODO Auto-generated catch block

+					e1.printStackTrace();

+				}

+			}

+	      

+	      

+	    } catch (Exception e) {

+	      e.printStackTrace();

+	    }

+

+	  }

+}

+

+/*

+ * C:\stanford-corenlp>java -Xmx1g -jar pt.jar albert+einstein bgalitsky@hotmail.com C:/stanford-corenlp/src/test/resources

+ * 

+ * http://173.255.254.250:8983/solr/contentgen/?q=albert+einstein&email=bgalitsky@hotmail.com&resourceDir=/home/solr/solr-4.4.0/example/src/test/resources&workDir=/home/solr/solr-4.4.0/example/solr-webapp/webapp/WEB-INF/lib&stepsNum=20&searchResultsNum=100&relevanceThreshold=0.5&lang=es-US&bingKey=e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=

+ */

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
new file mode 100644
index 0000000..428cd4e
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
@@ -0,0 +1,478 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.similarity.apps;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collections;

+import java.util.Comparator;

+import java.util.List;

+import java.util.logging.Logger;

+

+import opennlp.tools.parse_thicket.Triple;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph.TextChunk;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph.TextChunkComparable;

+import opennlp.tools.similarity.apps.utils.PageFetcher;

+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

+import opennlp.tools.similarity.apps.utils.Utils;

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.TextProcessor;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+import org.apache.commons.lang.StringUtils;

+

+/*

+ * This class supports content generation by static functions

+ * 

+ */

+

+public class ContentGeneratorSupport {

+	private static Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.ContentGeneratorSupport");

+

+	/**

+	 * Takes a sentence and extracts noun phrases and entity names to from search

+	 * queries for finding relevant sentences on the web, which are then subject

+	 * to relevance assessment by Similarity. Search queries should not be too

+	 * general (irrelevant search results) or too specific (too few search

+	 * results)

+	 * 

+	 * @param String

+	 *          input sentence to form queries

+	 * @return List<String> of search expressions

+	 */

+	public static List<String> buildSearchEngineQueryFromSentence(String sentence) {

+		ParseTreeChunk matcher = new ParseTreeChunk();

+		ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor

+				.getInstance();

+		List<List<ParseTreeChunk>> sent1GrpLst = null;

+

+		List<ParseTreeChunk> nPhrases = pos

+				.formGroupedPhrasesFromChunksForSentence(sentence).get(0);

+		List<String> queryArrayStr = new ArrayList<String>();

+		for (ParseTreeChunk ch : nPhrases) {

+			String query = "";

+			int size = ch.getLemmas().size();

+

+			for (int i = 0; i < size; i++) {

+				if (ch.getPOSs().get(i).startsWith("N")

+						|| ch.getPOSs().get(i).startsWith("J")) {

+					query += ch.getLemmas().get(i) + " ";

+				}

+			}

+			query = query.trim();

+			int len = query.split(" ").length;

+			if (len < 2 || len > 5)

+				continue;

+			if (len < 4) { // every word should start with capital

+				String[] qs = query.split(" ");

+				boolean bAccept = true;

+				for (String w : qs) {

+					if (w.toLowerCase().equals(w)) // idf only two words then

+						// has to be person name,

+						// title or geo location

+						bAccept = false;

+				}

+				if (!bAccept)

+					continue;

+			}

+

+			query = query.trim().replace(" ", " +");

+			query = " +" + query;

+

+			queryArrayStr.add(query);

+

+		}

+		if (queryArrayStr.size() < 1) { // release constraints on NP down to 2

+			// keywords

+			for (ParseTreeChunk ch : nPhrases) {

+				String query = "";

+				int size = ch.getLemmas().size();

+

+				for (int i = 0; i < size; i++) {

+					if (ch.getPOSs().get(i).startsWith("N")

+							|| ch.getPOSs().get(i).startsWith("J")) {

+						query += ch.getLemmas().get(i) + " ";

+					}

+				}

+				query = query.trim();

+				int len = query.split(" ").length;

+				if (len < 2)

+					continue;

+

+				query = query.trim().replace(" ", " +");

+				query = " +" + query;

+

+				queryArrayStr.add(query);

+

+			}

+		}

+

+		queryArrayStr = removeDuplicatesFromQueries(queryArrayStr);

+		queryArrayStr.add(sentence);

+

+		return queryArrayStr;

+

+	}

+	

+	public static String[] cleanListOfSents(String[] sents) {

+		List<String> sentsClean = new ArrayList<String>();

+		for (String s : sents) {

+			if (s == null || s.trim().length() < 30 || s.length() < 20)

+				continue;

+			sentsClean.add(s);

+		}

+		return (String[]) sentsClean.toArray(new String[0]);

+	}

+

+	public static String cleanSpacesInCleanedHTMLpage(String pageContent){ //was 4 spaces 

+		 //was 3 spaces => now back to 2

+		//TODO - verify regexp!!

+		pageContent = pageContent.trim().replaceAll("([a-z])(\\s{2,3})([A-Z])", "$1. $3")

+				//replaceAll("[a-z]  [A-Z]", ". $0")// .replace("  ",

+				// ". ")

+				.replace("..", ".").replace(". . .", " ").

+				replace(".    .",". ").trim(); // sometimes   html breaks are converted into ' ' (two spaces), so

+		// we need to put '.'

+		return pageContent;

+	}

+

+	/**

+	 * remove dupes from queries to easy cleaning dupes and repetitive search

+	 * afterwards

+	 * 

+	 * @param List

+	 *          <String> of sentences (search queries, or search results

+	 *          abstracts, or titles

+	 * @return List<String> of sentences where dupes are removed

+	 */

+	public static List<String> removeDuplicatesFromQueries(List<String> hits) {

+		StringDistanceMeasurer meas = new StringDistanceMeasurer();

+		double dupeThresh = 0.8; // if more similar, then considered dupes was

+		// 0.7

+		List<Integer> idsToRemove = new ArrayList<Integer>();

+		List<String> hitsDedup = new ArrayList<String>();

+		try {

+			for (int i = 0; i < hits.size(); i++)

+				for (int j = i + 1; j < hits.size(); j++) {

+					String title1 = hits.get(i);

+					String title2 = hits.get(j);

+					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

+						continue;

+					if (meas.measureStringDistance(title1, title2) > dupeThresh) {

+						idsToRemove.add(j); // dupes found, later list member to

+						// be deleted

+

+					}

+				}

+

+			for (int i = 0; i < hits.size(); i++)

+				if (!idsToRemove.contains(i))

+					hitsDedup.add(hits.get(i));

+

+			if (hitsDedup.size() < hits.size()) {

+				LOG.info("Removed duplicates from formed query, including "

+						+ hits.get(idsToRemove.get(0)));

+			}

+

+		} catch (Exception e) {

+			LOG.severe("Problem removing duplicates from query list");

+		}

+

+		return hitsDedup;

+

+	}

+

+	/**

+	 * remove dupes from search results

+	 * 

+	 * @param List

+	 *          <HitBase> of search results objects

+	 * @return List<String> of search results objects where dupes are removed

+	 */

+	public static List<HitBase> removeDuplicatesFromResultantHits(

+			List<HitBase> hits) {

+		StringDistanceMeasurer meas = new StringDistanceMeasurer();

+		double dupeThresh = // 0.8; // if more similar, then considered dupes was

+				0.7;

+		List<Integer> idsToRemove = new ArrayList<Integer>();

+		List<HitBase> hitsDedup = new ArrayList<HitBase>();

+		try {

+			for (int i = 0; i < hits.size(); i++)

+				for (int j = i + 1; j < hits.size(); j++) {

+					HitBase hit2 = hits.get(j);

+					List<Fragment> fragmList1 = hits.get(i).getFragments();

+					List<Fragment> fragmList2 = hits.get(j).getFragments();

+					List<Fragment> fragmList2Results = new ArrayList<Fragment>(fragmList2);

+					for (Fragment f1 : fragmList1)

+						for (Fragment f2 : fragmList2) {

+							String sf1 = f1.getResultText();

+							String sf2 = f2.getResultText();

+							if (StringUtils.isEmpty(sf1) || StringUtils.isEmpty(sf1))

+								continue;

+							if (meas.measureStringDistance(sf1, sf2) > dupeThresh) {

+								fragmList2Results.remove(f2);

+								LOG.info("Removed duplicates from formed fragments list: "

+										+ sf2);

+							}

+						}

+

+					hit2.setFragments(fragmList2Results);

+					hits.set(j, hit2);

+				}

+		} catch (Exception e) {

+			LOG.severe("Problem removing duplicates from list of fragment");

+		}

+		return hits;

+	}

+

+

+

+	// given a fragment from snippet, finds an original sentence at a webpage by

+	// optimizing alignmemt score

+	public static String[] getFullOriginalSentenceFromWebpageBySnippetFragment(

+			String fragment, String[] sents) {

+		if (fragment.trim().length() < 15)

+			return null;

+

+		StringDistanceMeasurer meas = new StringDistanceMeasurer();

+		Double dist = 0.0;

+		String result = null, followSent = "";

+		for (int i = 0; i < sents.length; i++) {

+			String s = sents[i];

+			if (s == null || s.length() < 30)

+				continue;

+			Double distCurr = meas.measureStringDistance(s, fragment);

+			if (distCurr > dist && distCurr > 0.4) {

+				result = s;

+				dist = distCurr;

+				try {

+					if (i < sents.length - 1 && sents[i + 1].length() > 60) { 

+						String f1 = GeneratedSentenceProcessor.acceptableMinedSentence(sents[i+1]);

+						if (f1!=null){

+							followSent = f1;

+						}

+					}

+

+					if (i < sents.length - 2 && sents[i + 2].length() > 60) {

+						String f2 = GeneratedSentenceProcessor.acceptableMinedSentence(sents[i+2]);

+						if (f2!=null){

+							followSent += " "+f2;

+						}

+					}

+				} catch (Exception e) {

+					// TODO Auto-generated catch block

+					e.printStackTrace();

+				}

+			}

+		}

+		return new String[] { result, followSent };

+	}

+

+	// given a fragment from snippet, finds an original sentence at a webpage by

+	// optimizing alignmemt score

+	public static String[] getBestFullOriginalSentenceFromWebpageBySnippetFragment(

+			String fragment, String[] sents) {

+		if (fragment.trim().length() < 15)

+			return null;

+		int bestSentIndex = -1;

+		StringDistanceMeasurer meas = new StringDistanceMeasurer();

+		Double distBest = 10.0; // + sup

+		String result = null, followSent = null;

+		for (int i = 0; i < sents.length; i++) {

+			String s = sents[i];

+			if (s == null || s.length() < 30)

+				continue;

+			Double distCurr = meas.measureStringDistance(s, fragment);

+			if (distCurr > distBest) {

+				distBest = distCurr;

+				bestSentIndex = i;

+			}

+

+		}

+		if (distBest > 0.4) {

+			result = sents[bestSentIndex];

+

+			if (bestSentIndex < sents.length - 1

+					&& sents[bestSentIndex + 1].length() > 60) {

+				followSent = sents[bestSentIndex + 1];

+			}

+

+		}

+

+		return new String[] { result, followSent };

+	}

+

+	public String[] extractSentencesFromPage(String downloadedPage)

+	{

+

+		int maxSentsFromPage= 100;

+		List<String[]> results = new ArrayList<String[]>();

+

+		//String pageOrigHTML = pFetcher.fetchOrigHTML(url);

+

+		downloadedPage= downloadedPage.replace("     ", "&");

+		downloadedPage = downloadedPage.replaceAll("(?:&)+", "#");

+		String[] sents = downloadedPage.split("#");

+		List<TextChunk> sentsList = new ArrayList<TextChunk>();

+		for(String s: sents){

+			s = ContentGeneratorSupport.cleanSpacesInCleanedHTMLpage(s);

+			sentsList.add(new TextChunk(s, s.length()));

+		}

+

+		Collections.sort(sentsList, new TextChunkComparable());

+		String[] longestSents = new String[maxSentsFromPage];

+		int j=0;

+		int initIndex = sentsList.size()-1 -maxSentsFromPage;

+		if (initIndex<0)

+			initIndex = 0;

+		for(int i=initIndex; i< sentsList.size() && j<maxSentsFromPage ; i++){

+			longestSents[j] = sentsList.get(i).text;

+			j++;

+		}

+

+		sents = cleanSplitListOfSents(longestSents);

+

+		//sents = removeDuplicates(sents);

+		//sents = verifyEnforceStartsUpperCase(sents);

+

+		return sents;

+	}

+

+	public class TextChunk {

+		public TextChunk(String s, int length) {

+			this.text = s;

+			this.len = length;

+		}

+		public String text;

+		public int len;

+	}

+

+	public class TextChunkComparable implements Comparator<TextChunk>

+	{

+		public int compare(TextChunk ch1, TextChunk ch2)

+		{

+			if (ch1.len>ch2.len)

+				return 1;

+			else if (ch1.len<ch2.len)

+				return  -1;

+			else return 0;

+

+		}

+	}

+

+	protected String[] cleanSplitListOfSents(String[] longestSents){

+		float minFragmentLength = 40, minFragmentLengthSpace=4;

+

+		List<String> sentsClean = new ArrayList<String>();

+		for (String sentenceOrMultSent : longestSents)

+		{

+			if (sentenceOrMultSent==null || sentenceOrMultSent.length()<20)

+				continue;

+			if (GeneratedSentenceProcessor.acceptableMinedSentence(sentenceOrMultSent)==null){

+				System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);

+				continue;

+			}

+			// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.

+			int numOfDots = sentenceOrMultSent.replace('.','&').split("&").length;

+			float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+			if ( avgSentenceLengthInTextPortion<minFragmentLength)

+				continue;

+			// o oo o ooo o o o ooo oo ooo o o oo

+			numOfDots = sentenceOrMultSent.replace(' ','&').split("&").length;

+			avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+			if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)

+				continue;

+

+			List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);

+

+			// forced split by ',' somewhere in the middle of sentence

+			// disused - Feb 26 13

+			//furtherSplit = furtherMakeSentencesShorter(furtherSplit);

+			furtherSplit.remove(furtherSplit.size()-1);

+			for(String s : furtherSplit){

+				if (s.indexOf('|')>-1)

+					continue;

+				s = s.replace("<em>"," ").replace("</em>"," ");

+				s = Utils.convertToASCII(s);

+				sentsClean.add(s);

+			}

+		}

+		return (String[]) sentsClean.toArray(new String[0]);

+	}	

+

+	protected String[] cleanSplitListOfSentsFirstSplit(String[] longestSents){

+		float minFragmentLength = 40, minFragmentLengthSpace=4;

+

+		List<String> sentsClean = new ArrayList<String>();

+		for (String sentenceOrMultSent : longestSents)

+		{

+			if (sentenceOrMultSent==null || sentenceOrMultSent.length()<minFragmentLength)

+				continue;

+			List<String> furtherSplit = TextProcessor.splitToSentences(sentenceOrMultSent);

+			for(String sentence: furtherSplit ){

+				if (sentence==null || sentence.length()<20)

+					continue;

+				if (GeneratedSentenceProcessor.acceptableMinedSentence(sentence)==null){

+					//System.out.println("Rejected sentence by GeneratedSentenceProcessor.acceptableMinedSentence = "+sentenceOrMultSent);

+					continue;

+				}

+				// aaa. hhh hhh.  kkk . kkk ll hhh. lll kkk n.

+				int numOfDots = sentence.replace('.','&').split("&").length;

+				float avgSentenceLengthInTextPortion = (float)sentenceOrMultSent.length() /(float) numOfDots;

+				if ( avgSentenceLengthInTextPortion<minFragmentLength)

+					continue;

+				// o oo o ooo o o o ooo oo ooo o o oo

+				numOfDots = sentence.replace(' ','&').split("&").length;

+				avgSentenceLengthInTextPortion = (float)sentence.length() /(float) numOfDots;

+				if ( avgSentenceLengthInTextPortion<minFragmentLengthSpace)

+					continue;

+

+

+

+				// forced split by ',' somewhere in the middle of sentence

+				// disused - Feb 26 13

+				//furtherSplit = furtherMakeSentencesShorter(furtherSplit);

+				//furtherSplit.remove(furtherSplit.size()-1);

+

+				if (sentence.indexOf('|')>-1)

+					continue;

+				sentence = Utils.convertToASCII(sentence);

+				sentsClean.add(sentence);

+			}

+		}

+		return (String[]) sentsClean.toArray(new String[0]);

+	}

+	

+	public static void main(String[] args){

+		String s = "You can grouP   parts  Of your regular expression  In your pattern   You grouP  elements";

+		//with round brackets, e.g., ()." +

+		//		" This allows you to assign a repetition operator to a complete group.";

+		String sr = s.replaceAll("([a-z])(\\s{2,3})([A-Z])", "$1. $3");

+		String sr1 = s.replaceAll("  [A-Z]", ". $0");

+		sr = s.replaceAll("[a-z]  [A-Z]", ". $1");

+		sr1 = s.replaceAll("  [A-Z]", ". $1");

+	}

+

+}

+

+

+

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
index e1f6d77..3e79b7a 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
@@ -25,189 +25,297 @@
 import org.apache.commons.lang.StringUtils;

 

 public class GeneratedSentenceProcessor {

-  public static String acceptableMinedSentence(String sent) {

-    // if too many commas => seo text

 

-    String[] commas = StringUtils.split(sent, ',');

-    String[] spaces = StringUtils.split(sent, ' ');

-    if ((float) commas.length / (float) spaces.length > 0.7) {

-      System.out.println("Rejection: too many commas");

-      return null;

-    }

+	public static String[] occurs = new String[]{ "click here", "wikipedia", "retrieved", "isbn",

+		"http", "www.",

+		"copyright", "advertise",  "(accessed", "[edit]", "[citation needed]",

+		"site map",  "email updates",  "contact us", "rss feeds",  "cite this site",

+		"operating hours", "last modified", "product catalog",

+		"days per week", "leave a comment", "corporate information",  

+		"employment opportunities", "terms of use", "private policy", "parental guidelines", "copyright policy",  "ad choices",

+		"about us",  "about our ads",  "privacy policy",  "terms of use",

+		"click for", "photos",

+		"find the latest",		       

+		"terms of service",

+		"clicking here",

+		"skip to", "sidebar",

+		"Tags:", 

+		"available online",

+		"get online",

+		"buy online",

+		"not valid", "get discount",

+		"official site",

+		"this video",

+		//"this book",

+		"this product",

+		"paperback", "hardcover",

+		"audio cd",

+		"related searches",

+		"permission is granted",

+		"[edit",

+		"edit categories",

+		"free license",

+		"permission is granted",

+		"under the terms",

+		"rights reserved",

+		"wikipedia", 

+		"recipient of", "this message", 

+		"mailing list",  "purchase order",

+		"mon-fri",  "email us",  "privacy pol",  "back to top", 

+		"click here",  "for details",  "assistance?",  "chat live",

+		"free shipping",  "company info",  "satisfaction g",  "contact us",

+		"menu.", "search.",  "sign in", "home.",

+		"additional terms", "may apply"};

 

-    String[] pipes = StringUtils.split(sent, '|');

-    if (StringUtils.split(sent, '|').length > 2

-        || StringUtils.split(sent, '>').length > 2) {

-      System.out.println("Rejection: too many |s or >s ");

-      return null;

-    }

-    String sentTry = sent.toLowerCase();

-    // if too many long spaces

-    String sentSpaces = sentTry.replace("   ", "");

-    if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -

-      // suspicious

-      return null;

+	public static String[] occursStartsWith = new String[]{

+		"fax",  "write","email", "contact",  "conditions",  "chat live",

+		"we ",  "the recipient",  "day return",  "days return",

+		"refund it",  "your money",

+		"purchase orders",

+		"exchange it ",  "return it",  "day return",  "days return",

+		"subscribe","posted by", "below" , "corporate",

+		"this book"};

+	public static String acceptableMinedSentence(String sent) {

+		if (sent==null || sent.length()<40)

+			return null;

+		// if too many commas => seo text

 

-    if (sentTry.indexOf("click here") > -1 || sentTry.indexOf(" wikip") > -1

-        || sentTry.indexOf("copyright") > -1

-        || sentTry.indexOf("operating hours") > -1

-        || sentTry.indexOf("days per week") > -1

-        || sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1

-        || sentTry.indexOf("find the latest") > -1

-        || sentTry.startsWith("subscribe")

-        || sentTry.indexOf("Terms of Service") > -1

-        || sentTry.indexOf("clicking here") > -1

-        || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1

-        || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")

-        || sentTry.indexOf("available online") > -1

-        || sentTry.indexOf("get online") > -1

-        || sentTry.indexOf("buy online") > -1

-        || sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1

-        || sentTry.indexOf("official site") > -1

-        || sentTry.indexOf("this video") > -1

-        || sentTry.indexOf("this book") > -1

-        || sentTry.indexOf("this product") > -1

-        || sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1

-        || sentTry.indexOf("audio cd") > -1

-        || sentTry.indexOf("related searches") > -1

-        || sentTry.indexOf("permission is granted") > -1

-        || sentTry.indexOf("[edit") > -1

-        || sentTry.indexOf("edit categories") > -1

-        || sentTry.indexOf("free license") > -1

-        || sentTry.indexOf("permission is granted") > -1

-        || sentTry.indexOf("under the terms") > -1

-        || sentTry.indexOf("rights reserved") > -1

-        || sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")

-        || sentTry.endsWith("the.") || sentTry.startsWith("below") 

-        || sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 

-        ||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1

-        ||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 

-        ||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1

-        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1

-        ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1

-        ||sentTry.startsWith( "we ") ||sentTry.indexOf( "the recipient")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1

-        

-        ||sentTry.startsWith( "fax") ||sentTry.indexOf( "refund it")>-1 || sentTry.indexOf( "your money")>-1

-        ||sentTry.indexOf( "cond???")>-1 ||sentTry.indexOf( "purchase orders")>-1

-        ||sentTry.startsWith( "exchange it ") ||sentTry.indexOf( "return it")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1

-    )

-      return null;

+		String[] commas = StringUtils.split(sent, ',');

+		String[] spaces = StringUtils.split(sent, ' ');

+		if ((float) commas.length / (float) spaces.length > 0.5) {

+			System.out.println("Rejection: too many commas  in sent ='"+sent);

+			return null;

+		}

 

-    // count symbols indicating wrong parts of page to mine for text

-    // if short and contains too many symbols indicating wrong area: reject

-    String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")

-        .replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")

-        .replace("-", "&&&").replace("%", "&&&");

-    if ((sentWrongSym.length() - sentTry.length()) >= 4

-        && sentTry.length() < 200) // twice ot more

-      return null;

+		String[] periods = StringUtils.split(sent.replace('.', '#'), '#');

+		if ((float) periods.length / (float) spaces.length > 0.2) {

+			System.out.println("Rejection: too many periods in sent ='"+sent);

+			return null;

+		}

+		// commented [x], to avoid rejection sentences with refs[]

+		String[] brakets = StringUtils.split(sent.replace('(', '#').replace(')', '#')/*.replace('[', '#').replace(']', '#')*/, '#');

+		if ((float) periods.length / (float) spaces.length > 0.2) {

+			System.out.println("Rejection: too many brakets in sent ='"+sent);

+			return null;

+		}

+		

+		String[] pipes = StringUtils.split(sent, '|');

+		if (StringUtils.split(sent, '|').length > 2

+				|| StringUtils.split(sent, '>').length > 2) {

+			System.out.println("Rejection: too many |s or >s in sent ='"+sent);

+			return null;

+		}

+		String sentTry = sent.toLowerCase();

+		// if too many long spaces

+		String sentSpaces = sentTry.replace("   ", "");

+		if (sentSpaces.length() - sentTry.length() > 10) // too many spaces -

+			// suspicious

+			return null;

+		if (isProhibitiveWordsOccurOrStartWith(sentTry))

+			return null;

 

-    sent = sent.replace('[', ' ').replace(']', ' ')

-        .replace("_should_find_orig_", "").replace(".   .", ". ")

-        .replace("amp;", " ").replace("1.", " ").replace("2.", " ")

-        .replace("3.", " ").replace("4.", " ").replace("2009", "2011")

-        .replace("2008", "2011").replace("2006", "2011")

-        .replace("2007", "2011").replace("VIDEO:", " ").replace("Video:", " ")

-        .replace("no comments", " ").replace("  ", " ").replace("  ", " ")

-        .replace("(more.)", "").replace("more.", "").replace("<more>", "")

-        .replace("[more]", "").replace(".,", ".").replace("&lt;", "")

-        .replace("p&gt;", "").replace("product description", "");

+		

 

-    // TODO .replace("a.", ".");

+		// count symbols indicating wrong parts of page to mine for text

+		// if short and contains too many symbols indicating wrong area: reject

+		String sentWrongSym = sentTry.replace(">", "&&&").replace("�", "&&&")

+				.replace("|", "&&&").replace(":", "&&&").replace("/", "&&&")

+				.replace("-", "&&&").replace("%", "&&&");

+		if ((sentWrongSym.length() - sentTry.length()) >= 4

+				&& sentTry.length() < 200) // twice ot more

+			return null;

 

-    int endIndex = sent.indexOf(" posted");

-    if (endIndex > 0)

-      sent = sent.substring(0, endIndex);

+		sent = sent.replace('[', ' ').replace(']', ' ')

+				.replace("_should_find_orig_", "").replace(".   .", ". ")

+				.replace("amp;", " ").replace("1.", " ").replace("2.", " ")

+				.replace("3.", " ").replace("4.", " ").

+			/*	.replace("2009", "2011")

+				.replace("2008", "2011").replace("2006", "2011")

+				.replace("2007", "2011").

+			*/	replace("VIDEO:", " ").replace("Video:", " ")

+				.replace("no comments", " ").replace("  ", " ").replace("  ", " ")

+				.replace("(more.)", "").replace("more.", "").replace("<more>", "")

+				.replace("[more]", "").replace(".,", ".").replace("&lt;", "")

+				.replace("p&gt;", "").replace("product description", "");

 

-    return sent;

-  }

+		//sent = sent.replace("Click here. ","").replace("Share this:.","").replace("Facebook.",""). 

+		//		replace("Twitter." Email. Google. Print. Tumblr. Pinterest. More. Digg. LinkedIn. StumbleUpon. Reddit. Like this: Like Loading.. ")

 

-  public static String processSentence(String pageSentence) {

-    if (pageSentence == null)

-      return "";

-    pageSentence = Utils.fullStripHTML(pageSentence);

-    pageSentence = StringUtils.chomp(pageSentence, "..");

-    pageSentence = StringUtils.chomp(pageSentence, ". .");

-    pageSentence = StringUtils.chomp(pageSentence, " .");

-    pageSentence = StringUtils.chomp(pageSentence, ".");

-    pageSentence = StringUtils.chomp(pageSentence, "...");

-    pageSentence = StringUtils.chomp(pageSentence, " ....");

-    pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")

-        .replace("(.)", "");

+		// TODO .replace("a.", ".");

 

-    pageSentence = pageSentence.trim();

-    pageSentence = pageSentence.replaceAll("\\s+", " "); // make single

-    // spaces

-    // everywhere

+		int endIndex = sent.indexOf(" posted");

+		if (endIndex > 0)

+			sent = sent.substring(0, endIndex);

 

-    String[] pipes = StringUtils.split(pageSentence, '|'); // removed

-    // shorter part

-    // of sentence

-    // at the end

-    // after pipe

-    if (pipes.length == 2

-        && ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {

-      int pipePos = pageSentence.indexOf("|");

-      if (pipePos > -1)

-        pageSentence = pageSentence.substring(0, pipePos - 1).trim();

+		return sent;

+	}

 

-    }

+	public static String processSentence(String pageSentence) {

+		if (acceptableMinedSentence(pageSentence)==null){

+			System.out.println("Rejected sentence by GenerSentProc.processSentence.acceptableMinedSentence()");

+			return "";

+		}

+		if (pageSentence == null)

+			return "";

+		pageSentence = Utils.fullStripHTML(pageSentence);

+		pageSentence = StringUtils.chomp(pageSentence, "..");

+		pageSentence = StringUtils.chomp(pageSentence, ". .");

+		pageSentence = StringUtils.chomp(pageSentence, " .");

+		pageSentence = StringUtils.chomp(pageSentence, ".");

+		pageSentence = StringUtils.chomp(pageSentence, "...");

+		pageSentence = StringUtils.chomp(pageSentence, " ....");

+		pageSentence = pageSentence.replace("::", ":").replace(".,", ". ")

+				.replace("(.)", "");

 

-    if (!StringUtils.contains(pageSentence, '.')

-        && !StringUtils.contains(pageSentence, '?')

-        && !StringUtils.contains(pageSentence, '!'))

-      pageSentence = pageSentence + ". ";

+		pageSentence = pageSentence.trim();

+		pageSentence = pageSentence.replaceAll("\\s+", " "); // make single

+		// spaces

+		// everywhere

 

-    pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();

-    if (!pageSentence.endsWith("."))

-      pageSentence += ". ";

-    return pageSentence;

-  }

+		String[] pipes = StringUtils.split(pageSentence, '|'); // removed

+		// shorter part

+		// of sentence

+		// at the end

+		// after pipe

+		if (pipes.length == 2

+				&& ((float) pipes[0].length() / (float) pipes[1].length() > 3.0)) {

+			int pipePos = pageSentence.indexOf("|");

+			if (pipePos > -1)

+				pageSentence = pageSentence.substring(0, pipePos - 1).trim();

 

-  public static void main(String[] args) {

+		}

 

-    String para = "About Albert Einstein     15 External links  16 Credits         Youth and schooling  Albert Einstein was born into a Jewish family";

-    para = "inventions of albert einstein                            what was albert einsteins invention                            invention of einstein                            what were albert einsteins inventions ";

+		if (!StringUtils.contains(pageSentence, '.')

+				&& !StringUtils.contains(pageSentence, '?')

+				&& !StringUtils.contains(pageSentence, '!'))

+			pageSentence = pageSentence + ". ";

 

-    para = para.replaceAll("  [A-Z]", ". $0");

-    System.out.println(para);

+		pageSentence = pageSentence.replace(" .", ".").replace("..", ".").trim();

+		if (!pageSentence.endsWith(".") && !pageSentence.endsWith(":") 

+				&&!pageSentence.endsWith("!") &&!pageSentence.endsWith("."))

+			pageSentence += ". ";

+		return pageSentence;

+	}

 

-    para = "Page 2 of 93";

+	public static boolean isProhibitiveWordsOccurOrStartWith(String sentenceLowercase){

+		for(String o: occurs){

+			if (sentenceLowercase.indexOf(o)>-1){

+				System.out.println("Found prohibited occurrence "+ o +" \n in sentence = "+  sentenceLowercase);

+				return true;

+			}

+		}

 

-    System.exit(0);

-    RelatedSentenceFinder f = new RelatedSentenceFinder();

-    try {

-      List<HitBase> hits = f

-          .findRelatedOpinionsForSentence(

-              "Give me a break, there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader",

-              Arrays

-                  .asList(new String[] { "Give me a break there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader. For example you went to cash in 2008 and stay in cash until now you made nothing. Whereas people who rode out the storm are doing fine so let's quit focusing on the loser who think they are so smart and went to 100% cash and are wondering what happen. Its a market that always moves unlike your mattress.", }));

-      StringBuffer buf = new StringBuffer();

+		for(String o: occursStartsWith){

+			if (sentenceLowercase.startsWith(o)){

+				System.out.println("Found prohibited occurrence Start With  "+ o +" \n in sentence = "+  sentenceLowercase);

+				return true;

+			}

+		}

 

-      for (HitBase h : hits) {

-        List<Fragment> frags = h.getFragments();

-        for (Fragment fr : frags) {

-          if (fr.getResultText() != null && fr.getResultText().length() > 3)

-            buf.append(fr.getResultText());

-        }

-      }

 

-    } catch (Exception e) {

-      // TODO Auto-generated catch block

-      e.printStackTrace();

-    }

 

-  }

+		//  || sentTry.endsWith("the")

+		//  || sentTry.endsWith("the.") || sentTry.startsWith("below") 

+		return false;

+	}

 

-  public static String normalizeForSentenceSplitting(String pageContent) {

-    pageContent.replace("Jan.", "January").replace("Feb.", "February")

-        .replace("Mar.", "March").replace("Apr.", "April")

-        .replace("Jun.", "June").replace("Jul.", "July")

-        .replace("Aug.", "August").replace("Sep.", "September")

-        .replace("Oct.", "October").replace("Nov.", "November")

-        .replace("Dec.", "December");

+	public static void main(String[] args) {

+		

+		String sentence = "Accepted sentence: Educational. Video. About Us menu. Home. Nobel Prizes and Laureates. Nobel Prizes and Laureates. Physics Prize. Chemistry Prize. Medicine Prize. Literature Prize. Peace Prize. Prize in Economic Sciences. Quick Facts. Nomination. Nomination. Physics Prize. Chemistry Prize. Medicine Prize. Literature Prize. Peace Prize. Prize in Economic Sciences. Nomination Archive. Ceremonies. Ceremonies. Ceremony Archive. Nobel Banquet Menus. Nobel Banquet Dress Code. The Queen's Gowns. Eyewitness Reports. Alfred Nobel. Alfred Nobel. Alfred Nobel's Will. Alfred Nobel's Life. Private Library of Alfred Nobel. Books on Alfred Nobel. Events. Events. Nobel Week Dialogue. Nobel Prize Inspiration Initiative. Nobel Prize Concert. Exhibitions at the Nobel Museum. Exhibitions at the Nobel Peace Center. About Us. Nobel Prizes and Laureates. Physics PrizesChemistry PrizesMedicine PrizesLiterature PrizesPeace PrizesPrize in Economic Sciences. About the Nobel Prize in Physics 1921. Albert Einstein. Facts. Biographical. Nobel Lecture. Banquet Speech. Documentary. Photo Gallery. Questions and Answers. Other Resources. All Nobel Prizes in Physics. All Nobel Prizes in 1921. The Nobel Prize in Physics 1921. Albert Einstein. Questions and Answers. Question: When was Albert Einstein born . Answer: Albert Einstein was born on 14 March 1879. Question: Where was he born . Answer: He was born in Ulm, Germany. Question: When did he die . Answer: He died 18 April 1955 in Princeton, New Jersey, USA. Question: Who were his parents . Answer: His father was Hermann Einstein and his mother was Pauline Einstein (born Koch). Question: Did he have any sisters and brothers . Answer: He had one sister named Maja. Question: Did he marry and have children . Answer: He was married to Mileva Mari between 1903 and 1919. They had three children, Lieserl (born 1902), Hans Albert (born 1904) and Eduard (born 1910). He married Elsa L Kwenthal in 1919 and they lived together until her death in 1936. Question: Where did he receive his education . Answer: He received his main education at the following schools:. Catholic elementary school in Munich, Germany (1885-1888). Luitpold Gymnasium in Munich, Germany (1888-1894). Cantonal school in Aarau, Switzerland (1895-1896). Swiss Federal Institute of Technology in Zurich, Switzerland (1896-1900). Ph.D. from Zurich University, Switzerland (1905). Question: When was Albert Einstein awarded the Nobel Prize in Physics . Answer: The Nobel Prize Awarding Institution, the Royal Swedish Academy of Sciences, decided to reserve the Nobel Prize in Physics in 1921, and therefore no Physics Prize was awarded that year.";

+		

+		String res = GeneratedSentenceProcessor.acceptableMinedSentence(sentence);

 

-    return pageContent;

+		String para = "About Albert Einstein     15 External links  16 Credits         Youth and schooling  Albert Einstein was born into a Jewish family";

+		para = "inventions of albert einstein                            what was albert einsteins invention                            invention of einstein                            what were albert einsteins inventions ";

 

-  }

-}
\ No newline at end of file
+		para = para.replaceAll("  [A-Z]", ". $0");

+		System.out.println(para);

+

+		para = "Page 2 of 93";

+

+		System.exit(0);

+		RelatedSentenceFinder f = new RelatedSentenceFinder();

+		try {

+			List<HitBase> hits = f

+					.findRelatedOpinionsForSentence(

+							"Give me a break, there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader",

+							Arrays

+							.asList(new String[] { "Give me a break there is no reason why you can't retire in ten years if you had been a rational investor and not a crazy trader. For example you went to cash in 2008 and stay in cash until now you made nothing. Whereas people who rode out the storm are doing fine so let's quit focusing on the loser who think they are so smart and went to 100% cash and are wondering what happen. Its a market that always moves unlike your mattress.", }));

+			StringBuffer buf = new StringBuffer();

+

+			for (HitBase h : hits) {

+				List<Fragment> frags = h.getFragments();

+				for (Fragment fr : frags) {

+					if (fr.getResultText() != null && fr.getResultText().length() > 3)

+						buf.append(fr.getResultText());

+				}

+			}

+

+		} catch (Exception e) {

+			// TODO Auto-generated catch block

+			e.printStackTrace();

+		}

+

+	}

+

+	public static String normalizeForSentenceSplitting(String pageContent) {

+		pageContent.replace("Jan.", "January").replace("Feb.", "February")

+		.replace("Mar.", "March").replace("Apr.", "April")

+		.replace("Jun.", "June").replace("Jul.", "July")

+		.replace("Aug.", "August").replace("Sep.", "September")

+		.replace("Oct.", "October").replace("Nov.", "November")

+		.replace("Dec.", "December");

+

+		return pageContent;

+

+	}

+}

+

+/*

+

+if (sentTry.indexOf("click here")>-1 || sentTry.indexOf(" wikip") > -1

+|| sentTry.indexOf("copyright") > -1

+|| sentTry.indexOf("operating hours") > -1

+|| sentTry.indexOf("days per week") > -1

+|| sentTry.indexOf("click for") > -1 || sentTry.indexOf("photos") > -1

+|| sentTry.indexOf("find the latest") > -1

+|| sentTry.startsWith("subscribe")

+|| sentTry.indexOf("Terms of Service") > -1

+|| sentTry.indexOf("clicking here") > -1

+|| sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1

+|| sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")

+|| sentTry.indexOf("available online") > -1

+|| sentTry.indexOf("get online") > -1

+|| sentTry.indexOf("buy online") > -1

+|| sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1

+|| sentTry.indexOf("official site") > -1

+|| sentTry.indexOf("this video") > -1

+|| sentTry.indexOf("this book") > -1

+|| sentTry.indexOf("this product") > -1

+|| sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1

+|| sentTry.indexOf("audio cd") > -1

+|| sentTry.indexOf("related searches") > -1

+|| sentTry.indexOf("permission is granted") > -1

+|| sentTry.indexOf("[edit") > -1

+|| sentTry.indexOf("edit categories") > -1

+|| sentTry.indexOf("free license") > -1

+|| sentTry.indexOf("permission is granted") > -1

+|| sentTry.indexOf("under the terms") > -1

+|| sentTry.indexOf("rights reserved") > -1

+|| sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")

+|| sentTry.endsWith("the.") || sentTry.startsWith("below") 

+|| sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 

+||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1

+||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 

+||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1

+||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1

+||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1

+||sentTry.startsWith( "we ") ||sentTry.indexOf( "the recipient")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1

+

+||sentTry.startsWith( "fax") ||sentTry.indexOf( "refund it")>-1 || sentTry.indexOf( "your money")>-1

+||sentTry.indexOf( "cond???")>-1 ||sentTry.indexOf( "purchase orders")>-1

+||sentTry.startsWith( "exchange it ") ||sentTry.indexOf( "return it")>-1 ||sentTry.indexOf( "day return")>-1 ||sentTry.indexOf( "days return")>-1

+)

+return null;

+

+*/
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java
index c8d4d6a..42c1e3b 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java
@@ -26,214 +26,236 @@
 import org.apache.commons.lang.StringUtils;

 

 public class HitBase {

-  private static final Logger LOG = Logger

-      .getLogger("opennlp.tools.similarity.apps.HitBase");

+	private static final Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.HitBase");

 

-  private String abstractText;

+	private String abstractText;

 

-  private String clickUrl;

+	private String clickUrl;

 

-  private String displayUrl;

+	private String displayUrl;

 

-  private String url;

+	private String url;

 

-  private String date;

+	private String date;

 

-  private String title;

+	private String title;

 

-  private Double generWithQueryScore;

+	private Double generWithQueryScore;

 

-  private String source;

+	private String source;

 

-  private List<String> originalSentences;

+	private List<String> originalSentences;

 

-  private String pageContent;

+	private String pageContent;

 

-  private List<Fragment> fragments;

+	private List<Fragment> fragments;

 

-  public HitBase() {

-    super();

-  }

+	public HitBase() {

+		super();

+	}

 

-  public String getPageContent() {

-    return pageContent;

-  }

+	public String getPageContent() {

+		return pageContent;

+	}

 

-  public HitBase(String orig, String[] generateds) {

-    originalSentences = new ArrayList<String>();

-    originalSentences.add(orig);

+	public HitBase(String orig, String[] generateds) {

+		originalSentences = new ArrayList<String>();

+		originalSentences.add(orig);

 

-    fragments = new ArrayList<Fragment>();

-    for (String sent : generateds) {

-      Fragment f = new Fragment(sent, 0.0);

-      fragments.add(f);

-    }

-    // the rest of params are null

-  }

+		fragments = new ArrayList<Fragment>();

+		for (String sent : generateds) {

+			Fragment f = new Fragment(sent, 0.0);

+			fragments.add(f);

+		}

+		// the rest of params are null

+	}

 

-  public void setPageContent(String pageContent) {

-    this.pageContent = pageContent;

-  }

+	public void setPageContent(String pageContent) {

+		this.pageContent = pageContent;

+	}

 

-  public List<Fragment> getFragments() {

-    return fragments;

-  }

+	public List<Fragment> getFragments() {

+		return fragments;

+	}

 

-  public void setFragments(List<Fragment> fragments) {

-    this.fragments = fragments;

-  }

+	public void setFragments(List<Fragment> fragments) {

+		this.fragments = fragments;

+	}

 

-  public String getSource() {

-    return source;

-  }

+	public String getSource() {

+		return source;

+	}

 

-  public void setSource(String source) {

-    this.source = source;

-  }

+	public void setSource(String source) {

+		this.source = source;

+	}

 

-  public List<String> getOriginalSentences() {

-    return originalSentences;

-  }

+	public List<String> getOriginalSentences() {

+		return originalSentences;

+	}

 

-  public void setOriginalSentences(List<String> originalSentences) {

-    this.originalSentences = originalSentences;

-  }

+	public void setOriginalSentences(List<String> originalSentences) {

+		this.originalSentences = originalSentences;

+	}

 

-  public String getTitle() {

-    return title;

-  }

+	public String getTitle() {

+		return title;

+	}

 

-  public void setTitle(String title) {

-    this.title = title;

-  }

+	public void setTitle(String title) {

+		this.title = title;

+	}

 

-  public String getAbstractText() {

-    return abstractText;

-  }

+	public String getAbstractText() {

+		return abstractText;

+	}

 

-  public void setAbstractText(String abstractText) {

-    this.abstractText = abstractText;

-  }

+	public void setAbstractText(String abstractText) {

+		this.abstractText = abstractText;

+	}

 

-  public String getClickUrl() {

-    return clickUrl;

-  }

+	public String getClickUrl() {

+		return clickUrl;

+	}

 

-  public void setClickUrl(String clickUrl) {

-    this.clickUrl = clickUrl;

-  }

+	public void setClickUrl(String clickUrl) {

+		this.clickUrl = clickUrl;

+	}

 

-  public String getDisplayUrl() {

-    return displayUrl;

-  }

+	public String getDisplayUrl() {

+		return displayUrl;

+	}

 

-  public void setDisplayUrl(String displayUrl) {

-    this.displayUrl = displayUrl;

-  }

+	public void setDisplayUrl(String displayUrl) {

+		this.displayUrl = displayUrl;

+	}

 

-  public String getUrl() {

-    return url;

-  }

+	public String getUrl() {

+		return url;

+	}

 

-  public void setUrl(String url) {

-    this.url = url;

-  }

+	public void setUrl(String url) {

+		this.url = url;

+	}

 

-  public String getDate() {

-    return date;

-  }

+	public String getDate() {

+		return date;

+	}

 

-  public void setDate(String date) {

-    this.date = date;

-  }

+	public void setDate(String date) {

+		this.date = date;

+	}

 

-  public Double getGenerWithQueryScore() {

-    return generWithQueryScore;

-  }

+	public Double getGenerWithQueryScore() {

+		return generWithQueryScore;

+	}

 

-  public void setGenerWithQueryScore(Double generWithQueryScore) {

-    this.generWithQueryScore = generWithQueryScore;

-  }

+	public void setGenerWithQueryScore(Double generWithQueryScore) {

+		this.generWithQueryScore = generWithQueryScore;

+	}

 

-  public String toString() {

-    // return "\n"+this.getUrl()+" | " +this.getTitle()+ " | "+

-    // this.abstractText ;

-    if (this.getFragments() != null && this.getFragments().size() > 0)

-      return this.getFragments().toString();

-    else

-      return this.title;

-  }

+	public String toString() {

+		// return "\n"+this.getUrl()+" | " +this.getTitle()+ " | "+

+				// this.abstractText ;

+		if (this.getFragments() != null && this.getFragments().size() > 0)

+			return this.getFragments().toString();

+		else

+			return this.title;

+	}

 

-  public static String toString(List<HitBase> hits) {

-    StringBuffer buf = new StringBuffer();

-    Boolean pBreak = true;

-    for (HitBase hit : hits) {

-      String fragm = (hit.toString());

-      if (fragm.length() > 15) {

-        if (pBreak)

-          buf.append(fragm + " | ");

-        else

-          buf.append(fragm + " | \n");

-        // switch to opposite

-        if (pBreak)

-          pBreak = false;

-        else

-          pBreak = true;

-      }

+	public static String toString(List<HitBase> hits) {

+		StringBuffer buf = new StringBuffer();

+		Boolean pBreak = true;

+		for (HitBase hit : hits) {

+			String fragm = (hit.toString());

+			if (fragm.length() > 15) {

+				if (pBreak)

+					buf.append(fragm + " | ");

+				else

+					buf.append(fragm + " | \n");

+				// switch to opposite

+				if (pBreak)

+					pBreak = false;

+				else

+					pBreak = true;

+			}

 

-    }

-    return buf.toString();

-  }

+		}

+		return buf.toString();

+	}

 

-  public static String toResultantString(List<HitBase> hits) {

-    StringBuffer buf = new StringBuffer();

-    Boolean pBreak = true;

-    for (HitBase hit : hits) {

-      String fragm = hit.getFragments().toString();

-      if (fragm.length() > 15) {

-        if (pBreak)

-          buf.append(fragm + " | 	");

-        else

-          buf.append(fragm + " | \n");

-        // switch to opposite

-        if (pBreak)

-          pBreak = false;

-        else

-          pBreak = true;

-      }

+	public static String toResultantString(List<HitBase> hits) {

+		StringBuffer buf = new StringBuffer();

+		Boolean pBreak = true;

+		for (HitBase hit : hits) {

+			try {

+				if (hit.getFragments()==null)	

+					continue;

+				String fragm = hit.getFragments().toString();

+				if (fragm.length() > 15) {

+					if (pBreak)

+						buf.append(fragm + " | 	");

+					else

+						buf.append(fragm + " | <br>\n");

+					// switch to opposite

+					if (pBreak)

+						pBreak = false;

+					else

+						pBreak = true;

+				}

+			} catch (Exception e) {

+				e.printStackTrace();

+			}

 

-    }

-    return buf.toString().replace("[", "").replace("]", "").replace(" | ", "")

-        .replace(".,", ".").replace(".\"", "\"").replace(". .", ".")

-        .replace(",.", ".");

-  }

+		}

+		return buf.toString().replace("[", "").replace("]", "").replace(" | ", "")

+				.replace(".,", ".").replace(".\"", "\"").replace(". .", ".")

+				.replace(",.", ".");

+	}

+	

+	public static String produceReferenceSection(List<HitBase> hits) {

+		StringBuffer buf = new StringBuffer();

+		for (HitBase hit : hits) {

+			try {

+				if (hit.getUrl()==null)	

+					continue;

+				buf.append(hit.getUrl());					

+			

+			} catch (Exception e) {

+				e.printStackTrace();

+			}

 

-  public static List<HitBase> removeDuplicates(List<HitBase> hits) {

-    StringDistanceMeasurer meas = new StringDistanceMeasurer();

-    double imageDupeThresh = 0.8; // if more similar, then considered dupes

-    List<Integer> idsToRemove = new ArrayList<Integer>();

-    List<HitBase> hitsDedup = new ArrayList<HitBase>();

-    try {

-      for (int i = 0; i < hits.size(); i++)

-        for (int j = i + 1; j < hits.size(); j++) {

-          String title1 = hits.get(i).getTitle();

-          String title2 = hits.get(j).getTitle();

-          if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

-            continue;

-          if (meas.measureStringDistance(title1, title2) > imageDupeThresh) {

-            idsToRemove.add(j); // dupes found, later list member to be deleted

-          }

-        }

-      for (int i = 0; i < hits.size(); i++)

-        if (!idsToRemove.contains(i))

-          hitsDedup.add(hits.get(i));

-      if (hitsDedup.size() < hits.size()) {

-        LOG.info("Removed duplicates from relevant search results, including "

-            + hits.get(idsToRemove.get(0)).getTitle());

-      }

-    } catch (Exception e) {

-      LOG.severe("Problem removing duplicates from relevant images: " + e);

-    }

-    return hitsDedup;

-  }

+		}

+		return buf.toString();

+	}

+

+	public static List<HitBase> removeDuplicates(List<HitBase> hits) {

+		StringDistanceMeasurer meas = new StringDistanceMeasurer();

+		double imageDupeThresh = 0.8; // if more similar, then considered dupes

+		List<Integer> idsToRemove = new ArrayList<Integer>();

+		List<HitBase> hitsDedup = new ArrayList<HitBase>();

+		try {

+			for (int i = 0; i < hits.size(); i++)

+				for (int j = i + 1; j < hits.size(); j++) {

+					String title1 = hits.get(i).getTitle();

+					String title2 = hits.get(j).getTitle();

+					if (StringUtils.isEmpty(title1) || StringUtils.isEmpty(title2))

+						continue;

+					if (meas.measureStringDistance(title1, title2) > imageDupeThresh) {

+						idsToRemove.add(j); // dupes found, later list member to be deleted

+					}

+				}

+			for (int i = 0; i < hits.size(); i++)

+				if (!idsToRemove.contains(i))

+					hitsDedup.add(hits.get(i));

+			if (hitsDedup.size() < hits.size()) {

+				LOG.info("Removed duplicates from relevant search results, including "

+						+ hits.get(idsToRemove.get(0)).getTitle());

+			}

+		} catch (Exception e) {

+			LOG.severe("Problem removing duplicates from relevant images: " + e);

+		}

+		return hitsDedup;

+	}

 }
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java
index 1f1fcc6..b41f8ec 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java
@@ -1,3 +1,19 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

 package opennlp.tools.similarity.apps;

 

 import java.util.Comparator;

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
index 7ff9fc3..bfeff62 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
@@ -19,15 +19,24 @@
 

 import java.util.ArrayList;

 import java.util.Arrays;

+import java.util.Collections;

+import java.util.Comparator;

+import java.util.HashSet;

 import java.util.List;

+import java.util.Set;

 import java.util.logging.Logger;

 

+import opennlp.tools.parse_thicket.Triple;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph.TextChunk;

+import opennlp.tools.parse_thicket.apps.SnippetToParagraph.TextChunkComparable;

 import opennlp.tools.similarity.apps.utils.PageFetcher;

 import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

 import opennlp.tools.similarity.apps.utils.Utils;

 import opennlp.tools.textsimilarity.ParseTreeChunk;

 import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

 import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.TextProcessor;

 import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

 

 import org.apache.commons.lang.StringUtils;

@@ -43,575 +52,952 @@
  */

 

 public class RelatedSentenceFinder {

-  private static Logger LOG = Logger

-      .getLogger("opennlp.tools.similarity.apps.RelatedSentenceFinder");

-  PageFetcher pFetcher = new PageFetcher();

+	private static Logger LOG = Logger

+			.getLogger("opennlp.tools.similarity.apps.RelatedSentenceFinder");

+	PageFetcher pFetcher = new PageFetcher();

+	ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor

+			.getInstance();

+	protected ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+	protected ParseTreeChunk parseTreeChunk = new ParseTreeChunk();

+	protected static StringDistanceMeasurer stringDistanceMeasurer = new StringDistanceMeasurer();

+	protected BingQueryRunner yrunner = new BingQueryRunner();

+	protected int MAX_STEPS = 1;

+	protected int MAX_SEARCH_RESULTS = 1;

+	protected float RELEVANCE_THRESHOLD = 1.1f;

+	protected Set<String> visitedURLs = new HashSet();

 

-  private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

-  private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();

+	// used to indicate that a sentence is an opinion, so more appropriate

+	static List<String> MENTAL_VERBS = new ArrayList<String>(

+			Arrays.asList(new String[] { "want", "know", "believe", "appeal", "ask",

+					"accept", "agree", "allow", "appeal", "ask", "assume", "believe",

+					"check", "confirm", "convince", "deny", "disagree", "explain",

+					"ignore", "inform", "remind", "request", "suggest", "suppose",

+					"think", "threaten", "try", "understand" }));

 

-  static StringDistanceMeasurer STRING_DISTANCE_MEASURER = new StringDistanceMeasurer();

+	private static final int MAX_FRAGMENT_SENTS = 10;

 

-  // used to indicate that a sentence is an opinion, so more appropriate

-  static List<String> MENTAL_VERBS = new ArrayList<String>(

-      Arrays.asList(new String[] { "want", "know", "believe", "appeal", "ask",

-          "accept", "agree", "allow", "appeal", "ask", "assume", "believe",

-          "check", "confirm", "convince", "deny", "disagree", "explain",

-          "ignore", "inform", "remind", "request", "suggest", "suppose",

-          "think", "threaten", "try", "understand" }));

+	public RelatedSentenceFinder(int ms, int msr, float thresh, String key) {

+		this.MAX_STEPS = ms;

+		this.MAX_SEARCH_RESULTS = msr;

+		this.RELEVANCE_THRESHOLD=thresh;

+		yrunner.setKey(key);

+	}

 

-  private static final int MAX_FRAGMENT_SENTS = 10;

+	public RelatedSentenceFinder() {

+		// TODO Auto-generated constructor stub

+	}

+	public void setLang(String lang) {

+		yrunner.setLang(lang);

 

-  public RelatedSentenceFinder() {

+	}

+	public List<HitBase> findRelatedOpinionsForSentenceFastAndDummy(String word,

+			List<String> sents) throws Exception {

 

-  }

+		List<HitBase> searchResult = yrunner.runSearch(word, 100);

+		return searchResult;

+	}

 

-  public List<HitBase> findRelatedOpinionsForSentenceFastAndDummy(String word,

-      List<String> sents) throws Exception {

-    BingWebQueryRunner yrunner = new BingWebQueryRunner();

-    List<HitBase> searchResult = yrunner.runSearch(word, 100);

-    return searchResult;

-  }

+	public List<HitBase> findRelatedOpinionsForSentence(String sentence,

+			List<String> sents) throws Exception {

+		List<HitBase> opinionSentencesToAdd = new ArrayList<HitBase>();

+		System.out.println(" \n\n=== Sentence  = " + sentence);

+		List<String> nounPhraseQueries = buildSearchEngineQueryFromSentence(sentence);

 

-  public List<HitBase> findRelatedOpinionsForSentence(String sentence,

-      List<String> sents) thr