OPENNLP-419
readme.txt + more code comments for similarity component
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
index cad3e85..ee3cfb1 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
@@ -49,7 +49,10 @@
   public BingQueryRunner() {

 

   }

-

+  /*

+   * 

+   */

+  

   private String constructBingUrl(String query, String domainWeb, String lang,

       int numbOfHits) throws Exception {

     String codedQuery = URLEncoder.encode(query, "UTF-8");

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
index dca0211..cd53d9f 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
@@ -33,7 +33,15 @@
 

 import org.apache.commons.lang.StringUtils;

 

-

+/*

+ * This class does content generation by using web mining and syntactic generalization to get sentences from the web, convert and combine them in the form 

+ * expected to be readable by humans.

+ * 

+ * These are examples of generated articles, given the article title

+ * http://www.allvoices.com/contributed-news/9423860/content/81937916-ichie-sings-jazz-blues-contemporary-tunes

+ * http://www.allvoices.com/contributed-news/9415063-britney-spears-femme-fatale-in-north-sf-bay-area

+ * 

+ */

 

 public class RelatedSentenceFinder

 {

@@ -95,12 +103,16 @@
 		opinionSentencesToAdd = removeDuplicatesFromResultantHits(opinionSentencesToAdd);

 		return opinionSentencesToAdd;

 	}

-	/*

-	 * Main content generation function which takes a seed as a rock group name and produce a list of text fragments by web mining for

-	 * this rock group (or other similar entity). 

-	 */

+	

+	/**

+	   * Main content generation function which takes a seed as a person, rock group, or other entity name and produce a list of text fragments by web mining for

+	   *  <br>

+	   * @param String entity name

+	   * @return List<HitBase> of text fragment structures which contain approved (in terms of relevance) mined sentences, as well as original search results objects

+	   * such as doc titles, abstracts, and urls.

+	   */

 

-	public List<HitBase> findActivityDetailsForEventGroupName(String sentence) throws Exception

+	public List<HitBase> generateContentAbout(String sentence) throws Exception

 	{

 		List<HitBase> opinionSentencesToAdd = new ArrayList<HitBase>();

 		System.out.println(" \n=== Entity to write about = " + sentence);

@@ -129,6 +141,13 @@
 		return opinionSentencesToAdd;

 	}

 

+	/**

+	   * Takes a sentence and extracts noun phrases and entity names to from search queries for finding relevant sentences on the web, which are 

+	   * then subject to relevance assessment by Similarity. Search queries should not be too general (irrelevant search results) or too specific (too few 

+	   * search results)

+	   * @param String input sentence to form queries

+	   * @return List<String> of search expressions 

+	   */

 	public static List<String> buildSearchEngineQueryFromSentence(String sentence)

 	{

 		ParseTreeChunk matcher = new ParseTreeChunk();

@@ -209,8 +228,12 @@
 

 	}

 

-	// remove dupes from queries to easy cleaning dupes and repetitive search

-	// afterwards

+	/** remove dupes from queries to easy cleaning dupes and repetitive search

+	 * afterwards

+	 * 

+	 * @param List<String> of sentences (search queries, or search results abstracts, or titles

+	 * @return List<String> of sentences where dupes are removed

+	 */

 	public static List<String> removeDuplicatesFromQueries(List<String> hits)

 	{

 		StringDistanceMeasurer meas = new StringDistanceMeasurer();

@@ -254,6 +277,11 @@
 

 	}

 

+	/** remove dupes from search results

+	 * 

+	 * @param List<HitBase> of search results objects 

+	 * @return List<String> of search results objects  where dupes are removed

+	 */

 	public static List<HitBase> removeDuplicatesFromResultantHits(List<HitBase> hits)

 	{

 		StringDistanceMeasurer meas = new StringDistanceMeasurer();

@@ -293,7 +321,15 @@
 		}

 		return hits;

 	}

-

+	/**

+	 * Takes single search result for an entity which is the subject of the essay to be written and forms essey sentences 

+	 * from the title, abstract, and possibly original page

+	 * @param HitBase item : search result

+	 * @param originalSentence : seed for the essay to be written 

+	 * @param sentsAll: list<String> of other sentences in the seed if it is multi-sentence

+	 * @return search result 

+	 */

+	

 	public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item, String originalSentence,

 			List<String> sentsAll)

 	{

@@ -551,7 +587,7 @@
 			// uncomment the sentence you would like to serve as a seed sentence for content generation for an event description

 

 			// uncomment the sentence you would like to serve as a seed sentence for content generation for an event description

-			hits = f.findActivityDetailsForEventGroupName(

+			hits = f.generateContentAbout(

 					"Albert Einstein"

 					//"Britney Spears - The Femme Fatale Tour"

 					// "Rush Time Machine",

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java
new file mode 100644
index 0000000..2463769
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java
@@ -0,0 +1,149 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+package opennlp.tools.similarity.apps;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.Collections;

+import java.util.List;

+import java.util.logging.Logger;

+

+import opennlp.tools.textsimilarity.ParseTreeChunk;

+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;

+import opennlp.tools.textsimilarity.SentencePairMatchResult;

+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;

+

+

+public class SpeechRecognitionResultsProcessor extends BingWebQueryRunner {

+	private static Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.SpeechRecognitionResultsProcessor");

+	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

+

+	/**

+	 * Gets an expression and tries to find it on the web. If search results are syntactically similar to this phrase, then 

+	 * we conclude that this phrase is meaningful (makes sense, someone have said something similar. If search results ate not similar 

+	 * to this phrase, we conclude that the phrase is meaningless (does not make sense, nobody has ever said something like that)

+	 * @param resp BingResponse, search results for a phrase being assesses with respect to meaningfulness

+	 * @param searchQuery the phrase we are assessing

+	 * @return total similarity score for all search results

+	 */

+	private	double calculateTotalMatchScoreForHits(BingResponse resp, String searchQuery){

+		

+		ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor.getInstance();

+		double totalMatchScore = 0;

+		for(HitBase hit: resp.getHits()){

+			String snapshot = hit.getAbstractText().replace("<b>...</b>", ". ").replace("<b>", "").replace("</b>","");

+			snapshot=snapshot.replace("</B>", "").replace("<B>", "").replace("<br>", "").replace("</br>", "").replace("...", ". ").replace("|", " ").replace(">", " ");

+			snapshot+=" . "+hit.getTitle();

+			Double score = 0.0;

+			try {

+				SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);

+				List<List<ParseTreeChunk>> match = matchRes.getMatchResult();

+				score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);

+				if (score > 1.5) {

+					LOG.info(score + " | "+ match );

+				}

+			}

+			catch (Exception e){

+				LOG.severe("Problem processing snapshot "+snapshot);

+				e.printStackTrace();

+			}

+			 totalMatchScore+=score;

+			

+		}

+		return  totalMatchScore; 

+	}

+	

+	/**

+	 * phrase meaningfulness assessment function which takes a list of phrases which are speech recognition results and 

+	 * re-ranks these phrases according to the meaningfulness score which is determined by 'calculateTotalMatchScoreForHits'

+	 * @param sents list of phrases which are speech recognition results

+	 * @return re-ranked list of phrases which are speech recognition results (from more meaningfulness to less meaningfulness)

+	 */

+	public List<SentenceMeaningfullnessScore> runSearchAndScoreMeaningfulness(List<String> sents) {

+		List<SentenceMeaningfullnessScore> res = new ArrayList<SentenceMeaningfullnessScore>();

+		double bestSentScore = -1; 

+		String bestSent = null;

+		for(String sentence : sents){

+			BingResponse resp = null, // obtained from bing

+			newResp = null; // re-sorted based on similarity

+			try {

+				List<String> resultList = search(sentence, "", "", 10);

+				resp = populateBingHit(resultList.get(0));

+				double scoreForSentence = calculateTotalMatchScoreForHits(resp, sentence);

+				System.out.println("Total meaningfulness score = "+scoreForSentence + " for sentence = "+sentence);

+				if (scoreForSentence > bestSentScore){

+					bestSentScore = scoreForSentence;

+					bestSent = sentence;

+				}

+				res.add(new SentenceMeaningfullnessScore(sentence, scoreForSentence));

+			} catch (Exception e) {

+				// e.printStackTrace();

+				LOG.info("No search results for query '" + sentence);

+				e.printStackTrace();

+				return null;

+			}

+		}

+		return res;

+		

+	}

+

+	public class SentenceMeaningfullnessScore{

+		String sentence;

+		double score;

+		public SentenceMeaningfullnessScore(String sent, double sc){

+			sentence = sent;

+			score = sc;

+		}

+		public String toString(){

+			return "Total meaningfulness score = "+score + " for sentence = "+sentence +"\n";

+		}

+		public double getScore(){

+			return score;

+		}

+	}

+ public static void main(String[] args){

+	 SpeechRecognitionResultsProcessor proc = new  SpeechRecognitionResultsProcessor();

+	 proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{

+			 "meeting with alex at you for not to come over to 8 pm",

+			 "meeting with alex at you for not to come over to eat",

+			 "meeting with alex at il fornaio tomorrow to 8 pm"

+	 }));

+	 

+	 proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{

+			 "remember to buy milk tomorrow for details",

+			 "remember to buy milk tomorrow from trader joes",

+			 "remember to buy milk tomorrow from 3 to jones",

+			 "remember to buy milk tomorrow for for details",

+			 "remember to buy milk tomorrow from third to joes",

+			 "remember to buy milk tomorrow from third to jones",

+			 "remember to buy milk tomorrow from for d jones"

+	 }));

+	 

+	 proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{

+	 "I'm off tomorrow to shop at trader joes",

+	 "number to get milk tomorrow trader joes",

+	 "number 2 finals tomorrow from trader joes",

+	 "number 2 finals tomorrow trader joes",

+	 "number to buy move tomorrow from trader joes",

+	 "number to buy move tomorrow trader joes",

+	 "define move tomorrow from trader joes",

+	 "define move tomorrow trader joes",

+	 }));

+ }

+

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/gen.txt b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/gen.txt
index 4f12309..6d2e186 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/gen.txt
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/gen.txt
@@ -1,24 +1,55 @@
-Albert Einstein was a German-born theoretical physicist who developed the theory of general relativity, effecting a revolution in physics.	Albert Einstein was one of the greatest minds in world history. Einstein is known as a brilliant physicist.

-Their son attended a Catholic elementary school from the age of five until ten. 9 Although Einstein had early speech difficulties, he.	The Einstein refrigerator was an Albert Einstein invention that was patented in 1930.

-A short account of Albert Einstein inventions is presented through the following article. Different theories of this scientists, notable achievements, etc. are mentioned.	Einstein was a theoretical physicist and not an inventor. He gave us the special and general theories of relativity, and he did some important work on the photoelectric.

-Although he was one of the most influential scientists in history, Albert Einstein was not a prolific inventor, in the common sense.	Albert Einstein biography includes facts, inventions, life, accomplishments, childhood & timeline of Albert Einstein. This short biography of Albert Einstein gives an. This short biography of Albert Einstein gives an _should_find_orig_.

-Albert Einstein, a famous scientist is well known for his brilliant contributions in the field of physics and in particular, famous for the theory of relativity.	Albert Einstein E = M C 2 Albert Einstein The most beautiful thing we can experience is the mysterious.

-Quotes from the genius Albert Einstein, Einstein Quotes on relativity, religion, and war.	New discussion topics go at the.

-A hundred times every day I remind myself that my inner and outer life are based on the labors of other men, living and dead, and that I must exert myself in order to.	For this achievement, Einstein.

-He did not have an patents or inventions, though he did work in the Swiss Patent Office for a number of years.	How Albert Einstein Saw Things A Little Differently. _should_find_orig_ Albert Einstein had just administered an examination to an advanced class of Physics students. Albert Einstein had just administered an examination to an advanced class of Physics students.

-For more information and related articles about Albert Einstein, check out and explore these links.	The Albert Einstein Award (sometimes mistakenly called the Albert Einstein Medal because it was accompanied with a gold medal) was an award in theoretical physics.

-Of course, he made a great contribution into the development.	A series of Albert Einstein quotes collected by the staff at Quotes and Sayigns.com.

-Albert Einstein From Wikiquote Albert Einstein (14 March 1879 18 April 1955) was a German. Albert Einstein From Wikiquote Albert Einstein (14 March 1879 18 April 1955) was a German _should_find_orig_.	"I have started to write before many times, only to tear the letter into bits. For you are such a brillant sic person. _should_find_orig_ I am just an average twelve year old girl _should_find_orig_. I am just an average twelve year old girl.

-Biography Average: 0 Born on March 14, 1879, in Ulm, Germany to Hermann Einstein, a salesman and engineer and Pauline Einstein (n e Koch).	Best Answer: On his deathbed he announced that he felt God might exist. It is possible to merge beliefs and just not.

-Albert Einstein on WN Network delivers the latest Videos and Editable pages for News & Events, including Entertainment, Music, Sports, Science and more, Sign up and share.	Learn English Online 4 FREE - learn english with games, grammar tests, american slang, TOEFL, esl forum, efl chat, pictures, puzzles.

-Popularly regarded as the most important scientist of the 20th.	Text of the Letters Einstein's First Letter to Roosevelt Notes: The letter that launched the arms race.

-Too few of my compatriots truly recognize that while theoretical physics may have been what Albert Einstein is best known for, he was also an incredible philosopher and.	Six weeks later the family moved to Munich, where he later on began his.

-March 14, 1879 - April 18, 1955 Physicist and Mathematician Nobel Laureate for Physics 1921 "There are only two ways to live your life.	He is best known for his theories of special relativity and general relativity.

-And then you have to play better than anyone else" - Albert Einstein.	Albert Einstein commended that the fourth world war would be fought with sticks and stones Do you agree with his comment Give reason for your answer .

-Just noticed that a link to List of Pantheists was added and removed. From what I know of Einstein's religion, it's probably misleading to associate him with any.	We are looking for a sharp motivated person that enjoys.

-Einstein the Creationist Dispatches from the Culture Wars (ScienceBlogs Channel : Politics) This is what happens when we elect the virulently ignorant to public.	by Laura Knight-Jadczyk I want to talk about death here. But I have in mind some very interesting deaths that.

-Great Minds That Shaped Our Civilisation: Albert Einstein.	This website, www.alberteinsteinsite.com, is dedicated to the brilliant physicist.

-Albert Einstein Theory of Relativity, Physics: Albert Einstein's Theory of Special and General Relativity is explained by the Spherical Standing Wave Structure of Matter.	A happy man is too satisfied with the present to dwell too much on the future. "My Future Plans" an essay written at age 17 for school exam (18 September 1896) The.

-Our primary mission is to provide.	His gift to the world was infinite knowledge, and his name.

-YouTube Videos matching query: Einstein Albert. _should_find_orig_ ALBERT EINSTEIN - JESSE VENTURA " STRIKING SIMILARITY "May 16, 2008 7:03 PM.	First collected on Vodpod.com by MCMM on Feb 9, 2011. Sign Up Now Watch the best videos collected by MCMM. Join 5 others following their collection of 155 videos.

-This site is mainly about fiction and non fiction regarding Albert Einstein, Niels Bohr, and comic book maven Stan Lee.	

+Albert Einstein was born on March 14, 1879 in Ulm, Germany., As it turned out, Albert preferred to learn on his own and had taught himself advanced mathematics and science by the time he was a teenager. Another interesting fact is that between the ages of six to thirteen, he studied the violin.	Albert Einstein Biography - In this video, learn about the life of the Nobel Prize winner, Albert Einstein.

+Einstein's high school transcript Albert Einstein was born in Ulm, in the Kingdom of Württemberg in the German Empire on 14 March 1879. 8., With a few friends he met in Bern, Einstein started a small discussion group, self-mockingly named "The Olympia Academy", which met regularly to discuss science and philosophy. Their readings included the works of Henri Poincaré, Ernst Mach, and David Hume, which influenced his scientific and philosophical outlook.	Albert Einstein biography includes facts, inventions, life, accomplishments, childhood & timeline of Albert Einstein.

+Albert Einstein , Niels Bohr, Physics: Wave Structure of Matter (WSM) explains Bohr and Einstein's Discussion on Epistemology of Physics.	With friends he met in Bern, Einstein formed a weekly discussion club on. science and philosophy, which he jokingly named "The Olympia Academy.".

+Albert Einstein was born into a Jewish family in Ulm, Württemberg, Germany. He is best known for his theory of relativity and specifically the equation E = mc2, which indicates the relationship between mass and energy (or mass-energy equivalence)., In 1946, Einstein collaborated with Rabbi Israel Goldstein, Middlesex heir C. Ruggles Smith, and activist attorney George Alpert on the Albert Einstein Foundation for Higher Learning, Inc. which was formed to create a Jewish-sponsored secular university, open to all students, on the grounds of the former Middlesex College in Waltham, Massachusetts. Middlesex was chosen in part because it was accessible from both Boston and New York City, Jewish cultural centers of the USA.	Einstein was born in Germany on March 14, 1879.As a kid he had trouble learning to speak.

+German-American physicist who, in 1905, published three papers, each of which had a profound effect on the development of physics.	"Why is it that nobody understands me and everybody likes me " ' Albert Einstein Well, this very quote from the man himself,.

+Interview with Albert Einstein.	Albert Einstein was born into a Jewish family in Ulm, W lrttemberg, Germany.

+Einstein's God Albert Einstein's Quest As a Scientist and As a Jew to Replace a Forsaken God. First given in 1979, the award is presented to people who have "rendered outstanding services" in connection with Einstein.	Albert graduated in 1900 with a degree in physics., Albert Einstein's most noted contribution to the world is his theory of relativity. By 1902, Einstein was working on combining time and space, matter and energy., secondary school at Aarau planning to use this route to enter the ETH in. While at Aarau he wrote an essay (for which was only given a.

+Brief and Straightforward Guide: Who Was Albert Einstein ., This led to him going back to secondary school in Aarau, Switzerland, which he graduated in 1896.	Albert Einstein was best known for his theory of relativity, E=mc., Albert Einstein was born in Germany in the late 1800's., When he was in school there he was always getting into trouble. He was constantly playing pranks on his teachers and classmates in school.

+Albert Einstein is universally recognized as the greatest physicist of our age., theory of relativity, Einstein's other work would have made him the second greatest physicist of his time. theory of relativity, Einstein's other work would have made him the second greatest physicist of his time.	All free essays submitted and graded by high school and college students.

+The Great Scientist Albert Einstein was born on 14th march 1879 in Ulm, Kingdom of Wurttemberg German Empire.	What is the invention of (has been invented by) Albert Einstein (14 March 1879 18 April 1955), the German theoretical physicist who discovered the theory of general relativity, effecting a revolution in physics . special relativity (the physical theory of the relation between space and time).

+Although his professors did not think highly of him, Einstein graduated from school in 1900 by studying the notes of a classmate.	Albert Einstein was born on March 14, 1879, in Ulm, Germany, but he grew., interested in and better suited for physics than mathematics Einstein passed his examination to graduate from the FIT in 1900, but.

+Einstein was now able to move to Prague with his wife and two sons, Hans Albert and Eduard. Finally, after being promoted to a professor, Einstein and his family were able to enjoy a good standard of living, but the job's main advantage was that it allowed Einstein to have access to an enormous library., In 1900 he graduated from the Institute and then achieved citizenship to Switzerland.	Summary Fifty-four love letters portray the caring relationship between Albert Einstein and his first wife by showing how Maric acted as the genius's intellectual confidant during his isolated years at Princeton. Amazon.com description: Product Description: In 1903, despite the vehement objections of his parents, Albert Einstein married Mileva Maric, the companion, colleague and confidante whose influence on his most creative years has given rise to much speculation.

+2011 Albert Einstein College of Medicine of Yeshiva University. The research, which appears in the January 2 advance online issue of The Journal of Clinical Investigation, could lead to new strategies for treating Parkinson's and other neurodegenerative diseases.	This book explores the life and work of Albert Einstein , the Nobel Prize-winning scientist.

+The Life and Works of Albert Einstein essays written by students., Edited and approved by our internal editing staff.	The WP entry merely says "Einstein graduated in 1900 from ETH with a degree in physics.". The entry's source footnote links to a biographical site which states is no more helpful: he ".enrolled at the Swiss Federal Polytechnic School in Zurich.

+Biography Albert Einstein, a German-American physicist, was born in Ulm (Württemberg, Germany) on March 14, 1879, and spent his youth in Munich, where his family owned a small electric machinery shop. He didn't talk until the age of 3, but even as a youth he showed a briljant curiosity about nature and a great understandinf of difficult mathematical concepts.	Albert Einstein Quotes Quotations on Philosophy, Physics, Religion, Science, Metaphysics, Humanity, War, Peace, Education, Knowledge, Morality and Freedom.

+These two "wonders" ' the encounter with a compass and a geometry book have strongly influenced his further way of life.	- Exchange of letters in 1932, which postulated both biological and social factors encouraging war and aggression.

+Albert Einstein 'Moral Decay', Out of My Later Years (1937, 1995), 9., For example, it is found without citation in Albert Einstein , Jerry Mayer and John P.	Albert Einstein Online: The collected papers of Albert Einstein and Information about Albert Einstein., The written works of Albert Einstein known as The World as I see it.

+Too few of my compatriots truly recognize that while theoretical physics may have been what Albert Einstein is best known for, he was also an incredible philosopher and wise man. A happy man is too satisfied with the present to dwell too much on the future.	on March 22, 2010, 3:09 PM Dr. Kaku, I also have great respect for Albert Einstein and his mind bending capacity for out side of the box thinking. It was my respect for Einstein's creative thinking ability and that of many other great creative thinkers that inspired me to seek out creative thinking methods that the rest of us could apply to unleash our creative thinking abilities.

+The insight of Albert Einstein into the Theory of relativity occurred when he created a visual image of chasing after and matching the speed of a beam light (Kosslyn and Koenig, 1992). Later he turned this visual image into "words and symbols.".	This is definitely an improvement in some respects (my prose was not beautiful :), but it's also potentially misleading, because it could be interpreted as saying "MM shows: where there is no medium, there is no light". I've tried to improve on the original wording, while avoiding the misinterpretation, with "light waves could not be travelling through a medium".

+Albert Einstein not only changed the scientific community forever, but changed everyday life as we know it. Bonus.com (Beakman and Jack), the PBS website, and Joe's room have several activities related to Einstein and his theories that have solved centuries old problems in physics and rocked even non physicists' view of the world., Albert Einstein not only changed the scientific community forever, but changed everyday life as we know it. Bonus.com (Beakman and Jack), the PBS website, and Joe's room have several activities related to Einstein and his theories that have solved centuries old problems in physics and rocked even non physicists' view of the world.	A hundred times every day I remind myself that my inner and outer life are based on the labors of other men, living and dead, and that I must exert myself in order to give in the same measure as I have received and am still receiving.. Albert Einstein (14 March 1879 - 18 April 1955) German-born Swiss-American physicist.

+sign up to the Free 'The Power' reports. This larger intelligence field is monitored by such experiments as the Global Consciousness Project run at Princeton University.and if enough people feel positive emotions such as love and peace, it becomes easier for everyone, like the 100th monkey theory (or the holographic mind theory).	In the middle 1920s, Einstein and another giant of modern physics, Leo Szilard, collaborated to invent a novel method of refrigeration which was a hit at trade shows but lacked financial backing. Other essays cover the idea of 'gravitational lensing, confirmation of which brought Einstein world renown after the Eddington eclipse expedition of 1919.

+Relevancy Score: 54 Albert Einstein said that "if at first the idea is not absurd, then there is no hope for it.". In this vein, here's a list of some of my favourite ideas and innovations from the past 18 months, together with a few comments and.READL., Imagination and innovation are our greatest weapons., Lack of using them is the only thing strong enough to stop any of us from achieving our goals. Factually, we can each get more done in less time if we simply take note on establishing WHERE we are going.READL.	Albert Einstein had a part in alerting the United States government to the possibility of building an atomic bomb, but his theory of relativity is not required in discussing fission. The theory of fission is what physicists call a non-relativistic theory, meaning that relativistic effects are too small to affect the dynamics of the fission process significantly. . -Robert Serber, Manhattan project scientist., It's multiple stylish innovation and technologies are far-off human understanding even until recently. I am in contentious with myself that the gene (DNA) of Tesela is most probably something different from MANKIND.

+with Sir Isaac Newton and Albert Einstein for the fundamental nature of his contributions.	And if you pay attention to his work and his most famous statements about it, you might just think he was talking about us,.

+During his time in Berlin he was working on numerous technical inventions.	7.5 Alternative 2A: Albert Einstein should have main credit for General Relativity. 2.7.6 Alternative 2B: David Hilbert should have main credit for General Relativity., In Feb 2011, the talk page of Albert Einstein filled up with a discussion of an alleged relativity priority dispute. Mainstream historians say this has been put to rest; others hotly disagree, as became very evident.

+The Albert Einstein Award (sometimes called the Albert Einstein Medal because it is accompanied with a gold medal is an award in Theoretical physics, that was established The Albert Einstein Peace Prize is given yearly by the Chicago -based Albert Einstein Peace Prize Foundation. The Walhalla Hall of Fame and Honor is a neo-classical Hall of fame located on the Danube River 10 km east of Regensburg, in Bavaria 102.	Einstein's high school transcript Albert Einstein was born in Ulm, in the Kingdom of Württemberg in the German Empire on 14 March 1879. 8., Einstein later gave his impressions of the Japanese in a letter to his sons: 39 :307 "Of all the people I have met, I like the Japanese most, as they are modest, intelligent, considerate, and have a feel for art." 39 :308. On his return voyage, he also visited Palestine for 12 days in what would become his only visit to that region.

+Albert Einstein was born in Ulm, in the Kingdom of W lrttemberg in the German Empire on 14., Einstein later gave his impressions of the Japanese in a letter to.	The Albert Einstein Medal is an award presented by the Albert Einstein Society in Bern, Switzerland.

+Albert Einstein was born into a Jewish family in Ulm, Württemberg, Germany. Youth and schooling Young Albert before the Einsteins moved from Germany to Italy.	Around 1886 Albert Einstein began his school career in Munich.

+What Bergson believed, the physicist Albert Einstein demonstrated in his.	Personally, I would put him on the podium along with these other science popularisers:Albert Einstein, Richard P. Feynman and Carl Sagan. (Also, NASA's shining glory of the 1960's and pre-twentieth/twenty-first century scientists).

+And we're farther than ever, it seems, from a momentously difficult truth that Albert Einstein uttered during its first years, when the U.S. government still held a monopoly on the split atom. 'This basic power of the universe cannot be fitted into the outmoded concept of narrow nationalisms, he wrote.	Einstein's answer to a conferee at a meeting at.

+Walt Disney and Albert Einstein were two great men who applied their. creative imaginative power to change their lives and the world.	Albert Einstein explaining energy and mass:. Albert EinsteinenergyGodmassquotesReligionspirituality Tweet All religions, arts and sciences are branches of the same tree.

+Albert Einstein's clarity on the terms cold and darkness were important distinctions and ones that cannot be ignored. . They affect how we view those terms and how we deal with them.	Albert Einstein knew the value of the unconscious mind.

+There is no genius in the world as revered and as loved as Albert Einstein., Albert Einstein himself admitted that the reason he was so smart was because he played the violin. One friend, G.J. Withrow, confided that the way Einstein dealt with problems and equations was by improvising on the violin .	(But Time does not exist as a 'thing in itself' like Newton thought!) Let us now consider Albert Einstein's analysis of Newton's Mechanics (which is lucid and logical as reflects the greatness of Albert Einstein) The first attempt to lay a uniform theoretical foundation was the work of Newton. In his system everything is reduced to the following concepts:., (Albert Einstein, 1954) Albert Einstein considered matter to be spatially extended (and represented by Spherical Force Fields) thus he did not believe in the existence of a fundamental Space or Time that was separate from Matter. As with Leibniz and Mach, Albert Einstein believed that all motion of matter in Space could instead be understood as motion of matter relative to other matter, thus the concept of an absolute Space became unnecessary In Newtonian physics the elementary theoretical concept on which the theoretical description of material bodies is based is the material point, or particle.

+Albert Einstein I am enough of an artist to draw freely upon my imagination., - Albert Einstein Imagination is more important than knowledge, for knowledge is limited while imagination embraces the entire world. - Albert Einstein In order to be an immaculate member of a flock of sheep, one must above all be a sheep oneself.	in the Hand by Paul Gabriel Tesla and Albert Einstein came via.

+when you admire Charlie Chaplin and Albert Einstein , when you follow the democracy of the.	Sushmita Dutta "I'm not an atheist and I don't think I can call myself a pantheist.

+Nearly a century ago, Albert Einstein suggested that time should move.	The experiments will celebrate the 100th anniversary of Albert Einstein's discoveries.

+state of matter was first theorized by Albert Einstein.	between Albert Einstein and Pablo Picasso in this clever and humorous play set in the famed Lapin Agile bar in the Montmartre section of Paris. between Albert Einstein and Pablo Picasso in this clever and humorous play set in the famed Lapin Agile bar in the Montmartre section of Paris.

+he was trying to think of who the third person (besides Albert Einstein and himself) was.	It has since been used to refer to other years, especially to 1905, when Albert Einstein made equally revolutionary discoveries concerning the photoelectric effect, Brownian motion and the special theory of relativity. annus terribilis dreadful year Used to describe 1348, the year the Black Death began to afflict Europe.

+Since 1974, the scientist has worked on marrying the two cornerstones of modern physics -- Albert Einstein's General Theory of Relativity, which concerns gravity and large-scale phenomena, and quantum theory, which covers subatomic particles. His latest comments suggest he has broken away from previous views he has expressed on religion.	Matter (WSM) explains Albert Einstein's Light Quanta 'Photon' / Photoelectric Effect of Quantum Theory.

+Thanks to a generous grant, the Hebrew University of Jerusalem is beginning to digitize their Einstein collection, making it available to everyone, everywhere.	Many people's last words are not heard or are not captured for posterity. DAGwyn 00:42, 23 October 2007 (UTC) I'd argue it is true (in the Wikipedia sense of the word) and it's not useless informationit's interesting.

+location and attach with a string as you discuss each time Albert Einstein moved.	view original Albert Einstein Best Quotes on Success. Albert Einstein Quotes on Success. Question by Anouk : What are your thoughts on this Albert Einstein quote .

+Indigo Books & Music is a Canadian bookseller committed to providing a stress-free approach to satisfying the booklover.	The talk will introduce the concept of harms reduction with a focus on hepatitis C and how community-based screenings have affected the current situation. City has a higher prevalence of hepatitis C than the entire United States overall.

+The works of Albert Einstein caused an upheaval throughout the science world by dispensing with Newton's 'clockwork universe and visualizing a relativistic universe where time and space become one. I have taught science and mathematics for twenty years, but rarely seize the opportunity to introduce Einstein's world to my students.	The most comprehensive collection of Albert Einstein quotes online., I am not interested in this or that phenomenon, in the spectrum of this or that element. I want to know His thoughts; the rest are details." - Albert Einstein.

+The following article by Albert Einstein appeared in the New York Times Magazine on November 9, 1930 pp 1-. It has been reprinted in Ideas and Opinions, Crown Publishers, Inc. 1954, pp 36 - 40., A little consideration will suffice to show us that the most varying emotions preside over the birth of religious thought and experience. With primitive man it is above all fear that evokes religious notions - fear of hunger, wild beasts, sickness, death.	Einstein's high school transcript Albert Einstein was born in Ulm, in the Kingdom of Württemberg in the German Empire on 14 March 1879. 8., ^ Schilpp (Ed.), P. A. (1979), Albert Einstein Autobiographical Notes, Open Court Publishing Company, pp. 89 . ^ Dudley Herschbach, "Einstein as a Student", Department of Chemistry and Chemical Biology, Harvard University, Cambridge, MA, USA, page 3, web: HarvardChem-Einstein-PDF: Max Talmud visited on Thursdays for six years.

+Einstein accepted the position, and in a show of support to the new democracy, Einstein reacquired German citizenship. In the years after the war, Einstein was very vocal in his support for Germany.	dice with the universe,& quot; gets quoted for vastly different purposes., I wanted to understand what Einstein meant as a physicist when he said that.

+The film footage showing what appears to be a woman on a cell phone from Charlie Chaplin's 1928 movie, The Circus, is causing a lot of people to ask questions.	Einstein's Cosmos has 196 ratings and 38 reviews., Gary said: Yes, it's taken me this long to get round to reading it., I bought it as cheap, damaged stock around 2005. Yesterday I slid it off the shelf, blew off the dust and started reading.

+Albert Einstein Middle School staff recognizes. that full participation in all promotion ceremonies is a privilege to be earned.	List of all the questions and answers.

+A few weeks ago, I visited the school where Albert Einstein got his first steady job., He was an Aushilfslehrer, an assistant lecturer.	following article appeared in the New York Times Magazine on November 9,. following article appeared in the New York Times Magazine on November 9,., A little consideration will suffice to show us that the most. varying emotions preside over the birth of religious thought and.

+Albert Einstein led an interesting life, from his beginnings as a mathematical prodigy, to his heyday when he popularized physics, to his old age where his status as a living legend afforded him many opportunities. Folsing does a great job detailing Einstein the man in each of these sections.	In September 1915, Einstein met Romain Rolland in Switzerland., This speech was met with enthusiasm by pacifists, and Einstein quickly became an international hero in the eyes of the peace movement. There were however also critical comments, such as the remark made by Romain Rolland: "Einstein seems to overlook the fact that the technique of war has changed since 1914, and is still changing.

+6. 7 Let's have a poll on Hilbert & Poincare!. 6. 7.1 Alternative 1A: Albert Einstein should have main credit for Special Relativity. 6.2.7.2 Alternative 1B: Henri Poincare should have main credit for Special Relativity.	Sharp founded the Albert Einstein Institution in 1983, dedicated to advancing the study and utilization of nonviolent conflict in defense of freedom, justice, and democracy. Long considered the foremost authority in his field, Sharp has inspired generations of progressive peace, labor, feminist, environmental, and social justice activists in the United States and around the world.

+I intend to insert material into the Albert Einstein entry near the existing claim that Albert Einstein was a zionist. I intend to elaborate further, citing the volume of his writings titled About Zionism:.	Like this: Like Be the first to like this post Tagged with: albert einstein, albert einstein famous quotes, albert einstein most famous quote, albert einstein most famous quotes, albert einstein most popular quote, einstein, famous quotes by einstein about love, most famous quote by einstein, most famous quote by einstein about life, most famous quotes by albert einstein, most popular quote by albert einstein 1 comment. Most Famous William Shakespeare Quotes in William Shakespeare Quote. Friedrich Nietzsche Most Famous Quote in Quotes by Friedrich Nietzsche L.

+Einstein quote: The most beautiful thing we can experience is the.	There are no authorities to follow blindly: Facts are valid if they can be realized in the spiritual realm or can be reproduced in experimental science. A remarkable characteristic of Einstein's and the Dalai Lama's writings and speeches is that words come from their own minds and experiences.

+A group of experienced writers will produce any term paper you request in a matter of hours. Research Links Top 50 Essay Sites Contact Us Order your custom term paper for only $12.95 a page!., Albert Einstein: Man of Vision Albert Einstein, perhaps the greatest mind ever to have walked the face of the earth, was born on March 14, 1879 in Ulm, Germany. As a boy, he hated school, and felt that the regimented and repetitive nature of schooling in Germany at that time had any promise of helping his future.	Richard B. Lipton, M.D. the Lotti and Bernard Benson Faculty Scholar in Alzheimer's Disease and professor and vice chair of The Saul R. Korey Department of Neurology, will continue to lead an interdisciplinary team of researchers and health care professionals working to discover dementia's causes and potential therapies. 'We are grateful for the longstanding support of this research program from the NIA and Einstein, said Dr. Lipton, who has been the director of EAS since 1992.

+Albert Einstein was probably the greatest physicist of the 20th century., Roni Grosz, curator of the Albert Einstein Archives of the Hebrew University in Jerusalem, tells Gelf, "There is no proof of Einstein ever having said or written it.". While Grosz notes that it is extremely difficult to disprove a quote, he "could not remember even one reference to bees in Einstein's writings.".	Einstein's high school transcript Albert Einstein was born in Ulm, in the Kingdom of Württemberg in the German Empire on 14 March 1879. 8., Einstein collaborated with others to produce a model of a wormhole.

+of the future would be able to discover what made Einstein so intelligent.	It is worth to continue to read on as Albert Einstein was both a Scientist as well as a British Process or Perennial Philosopher and mathematician like Whitehead. Frankly put, a Scientist must be a precise process or perennial philosopher in order to analyse and synthesize his invention or novelty., Both Gotama Buddha and Albert Einstein discover the dynamic Unity of Reality of the necessary interconnection of all myriad things in the universe. . . 'Human being is part of the whole called by us universe, a part limited in time and space.

+further more increase the company s competitiveness and quality of its produce.

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/stemmer/PorterStemmer.java b/opennlp-similarity/src/main/java/opennlp/tools/stemmer/PorterStemmer.java
new file mode 100644
index 0000000..e23da90
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/stemmer/PorterStemmer.java
@@ -0,0 +1,521 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.stemmer;

+

+

+	import java.io.IOException;

+	import java.io.InputStream;

+	import java.io.FileInputStream;

+

+	import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;

+	import org.apache.lucene.util.ArrayUtil;

+

+	/**

+	 *

+	 * Stemmer, implementing the Porter Stemming Algorithm

+	 *

+	 * The Stemmer class transforms a word into its root form.  The input

+	 * word can be provided a character at time (by calling add()), or at once

+	 * by calling one of the various stem(something) methods.

+	 */

+

+	public class PorterStemmer

+	{

+	  private char[] b;

+	  private int i,    /* offset into b */

+	    j, k, k0;

+	  private boolean dirty = false;

+	  private static final int INITIAL_SIZE = 50;

+

+	  public PorterStemmer() {

+	    b = new char[INITIAL_SIZE];

+	    i = 0;

+	  }

+

+	  /**

+	   * reset() resets the stemmer so it can stem another word.  If you invoke

+	   * the stemmer by calling add(char) and then stem(), you must call reset()

+	   * before starting another word.

+	   */

+	  public void reset() { i = 0; dirty = false; }

+

+	  /**

+	   * Add a character to the word being stemmed.  When you are finished

+	   * adding characters, you can call stem(void) to process the word.

+	   */

+	  public void add(char ch) {

+	    if (b.length <= i) {

+	      b = ArrayUtil.grow(b, i+1);

+	    }

+	    b[i++] = ch;

+	  }

+

+	  /**

+	   * After a word has been stemmed, it can be retrieved by toString(),

+	   * or a reference to the internal buffer can be retrieved by getResultBuffer

+	   * and getResultLength (which is generally more efficient.)

+	   */

+	  @Override

+	  public String toString() { return new String(b,0,i); }

+

+	  /**

+	   * Returns the length of the word resulting from the stemming process.

+	   */

+	  public int getResultLength() { return i; }

+

+	  /**

+	   * Returns a reference to a character buffer containing the results of

+	   * the stemming process.  You also need to consult getResultLength()

+	   * to determine the length of the result.

+	   */

+	  public char[] getResultBuffer() { return b; }

+

+	  /* cons(i) is true <=> b[i] is a consonant. */

+

+	  private final boolean cons(int i) {

+	    switch (b[i]) {

+	    case 'a': case 'e': case 'i': case 'o': case 'u':

+	      return false;

+	    case 'y':

+	      return (i==k0) ? true : !cons(i-1);

+	    default:

+	      return true;

+	    }

+	  }

+

+	  /* m() measures the number of consonant sequences between k0 and j. if c is

+	     a consonant sequence and v a vowel sequence, and <..> indicates arbitrary

+	     presence,

+

+	          <c><v>       gives 0

+	          <c>vc<v>     gives 1

+	          <c>vcvc<v>   gives 2

+	          <c>vcvcvc<v> gives 3

+	          ....

+	  */

+

+	  private final int m() {

+	    int n = 0;

+	    int i = k0;

+	    while(true) {

+	      if (i > j)

+	        return n;

+	      if (! cons(i))

+	        break;

+	      i++;

+	    }

+	    i++;

+	    while(true) {

+	      while(true) {

+	        if (i > j)

+	          return n;

+	        if (cons(i))

+	          break;

+	        i++;

+	      }

+	      i++;

+	      n++;

+	      while(true) {

+	        if (i > j)

+	          return n;

+	        if (! cons(i))

+	          break;

+	        i++;

+	      }

+	      i++;

+	    }

+	  }

+

+	  /* vowelinstem() is true <=> k0,...j contains a vowel */

+

+	  private final boolean vowelinstem() {

+	    int i;

+	    for (i = k0; i <= j; i++)

+	      if (! cons(i))

+	        return true;

+	    return false;

+	  }

+

+	  /* doublec(j) is true <=> j,(j-1) contain a double consonant. */

+

+	  private final boolean doublec(int j) {

+	    if (j < k0+1)

+	      return false;

+	    if (b[j] != b[j-1])

+	      return false;

+	    return cons(j);

+	  }

+

+	  /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant

+	     and also if the second c is not w,x or y. this is used when trying to

+	     restore an e at the end of a short word. e.g.

+

+	          cav(e), lov(e), hop(e), crim(e), but

+	          snow, box, tray.

+

+	  */

+

+	  private final boolean cvc(int i) {

+	    if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2))

+	      return false;

+	    else {

+	      int ch = b[i];

+	      if (ch == 'w' || ch == 'x' || ch == 'y') return false;

+	    }

+	    return true;

+	  }

+

+	  private final boolean ends(String s) {

+	    int l = s.length();

+	    int o = k-l+1;

+	    if (o < k0)

+	      return false;

+	    for (int i = 0; i < l; i++)

+	      if (b[o+i] != s.charAt(i))

+	        return false;

+	    j = k-l;

+	    return true;

+	  }

+

+	  /* setto(s) sets (j+1),...k to the characters in the string s, readjusting

+	     k. */

+

+	  void setto(String s) {

+	    int l = s.length();

+	    int o = j+1;

+	    for (int i = 0; i < l; i++)

+	      b[o+i] = s.charAt(i);

+	    k = j+l;

+	    dirty = true;

+	  }

+

+	  /* r(s) is used further down. */

+

+	  void r(String s) { if (m() > 0) setto(s); }

+

+	  /* step1() gets rid of plurals and -ed or -ing. e.g.

+

+	           caresses  ->  caress

+	           ponies    ->  poni

+	           ties      ->  ti

+	           caress    ->  caress

+	           cats      ->  cat

+

+	           feed      ->  feed

+	           agreed    ->  agree

+	           disabled  ->  disable

+

+	           matting   ->  mat

+	           mating    ->  mate

+	           meeting   ->  meet

+	           milling   ->  mill

+	           messing   ->  mess

+

+	           meetings  ->  meet

+

+	  */

+

+	  private final void step1() {

+	    if (b[k] == 's') {

+	      if (ends("sses")) k -= 2;

+	      else if (ends("ies")) setto("i");

+	      else if (b[k-1] != 's') k--;

+	    }

+	    if (ends("eed")) {

+	      if (m() > 0)

+	        k--;

+	    }

+	    else if ((ends("ed") || ends("ing")) && vowelinstem()) {

+	      k = j;

+	      if (ends("at")) setto("ate");

+	      else if (ends("bl")) setto("ble");

+	      else if (ends("iz")) setto("ize");

+	      else if (doublec(k)) {

+	        int ch = b[k--];

+	        if (ch == 'l' || ch == 's' || ch == 'z')

+	          k++;

+	      }

+	      else if (m() == 1 && cvc(k))

+	        setto("e");

+	    }

+	  }

+

+	  /* step2() turns terminal y to i when there is another vowel in the stem. */

+

+	  private final void step2() {

+	    if (ends("y") && vowelinstem()) {

+	      b[k] = 'i';

+	      dirty = true;

+	    }

+	  }

+

+	  /* step3() maps double suffices to single ones. so -ization ( = -ize plus

+	     -ation) maps to -ize etc. note that the string before the suffix must give

+	     m() > 0. */

+

+	  private final void step3() {

+	    if (k == k0) return; /* For Bug 1 */

+	    switch (b[k-1]) {

+	    case 'a':

+	      if (ends("ational")) { r("ate"); break; }

+	      if (ends("tional")) { r("tion"); break; }

+	      break;

+	    case 'c':

+	      if (ends("enci")) { r("ence"); break; }

+	      if (ends("anci")) { r("ance"); break; }

+	      break;

+	    case 'e':

+	      if (ends("izer")) { r("ize"); break; }

+	      break;

+	    case 'l':

+	      if (ends("bli")) { r("ble"); break; }

+	      if (ends("alli")) { r("al"); break; }

+	      if (ends("entli")) { r("ent"); break; }

+	      if (ends("eli")) { r("e"); break; }

+	      if (ends("ousli")) { r("ous"); break; }

+	      break;

+	    case 'o':

+	      if (ends("ization")) { r("ize"); break; }

+	      if (ends("ation")) { r("ate"); break; }

+	      if (ends("ator")) { r("ate"); break; }

+	      break;

+	    case 's':

+	      if (ends("alism")) { r("al"); break; }

+	      if (ends("iveness")) { r("ive"); break; }

+	      if (ends("fulness")) { r("ful"); break; }

+	      if (ends("ousness")) { r("ous"); break; }

+	      break;

+	    case 't':

+	      if (ends("aliti")) { r("al"); break; }

+	      if (ends("iviti")) { r("ive"); break; }

+	      if (ends("biliti")) { r("ble"); break; }

+	      break;

+	    case 'g':

+	      if (ends("logi")) { r("log"); break; }

+	    }

+	  }

+

+	  /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */

+

+	  private final void step4() {

+	    switch (b[k]) {

+	    case 'e':

+	      if (ends("icate")) { r("ic"); break; }

+	      if (ends("ative")) { r(""); break; }

+	      if (ends("alize")) { r("al"); break; }

+	      break;

+	    case 'i':

+	      if (ends("iciti")) { r("ic"); break; }

+	      break;

+	    case 'l':

+	      if (ends("ical")) { r("ic"); break; }

+	      if (ends("ful")) { r(""); break; }

+	      break;

+	    case 's':

+	      if (ends("ness")) { r(""); break; }

+	      break;

+	    }

+	  }

+

+	  /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */

+

+	  private final void step5() {

+	    if (k == k0) return; /* for Bug 1 */

+	    switch (b[k-1]) {

+	    case 'a':

+	      if (ends("al")) break;

+	      return;

+	    case 'c':

+	      if (ends("ance")) break;

+	      if (ends("ence")) break;

+	      return;

+	    case 'e':

+	      if (ends("er")) break; return;

+	    case 'i':

+	      if (ends("ic")) break; return;

+	    case 'l':

+	      if (ends("able")) break;

+	      if (ends("ible")) break; return;

+	    case 'n':

+	      if (ends("ant")) break;

+	      if (ends("ement")) break;

+	      if (ends("ment")) break;

+	      /* element etc. not stripped before the m */

+	      if (ends("ent")) break;

+	      return;

+	    case 'o':

+	      if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;

+	      /* j >= 0 fixes Bug 2 */

+	      if (ends("ou")) break;

+	      return;

+	      /* takes care of -ous */

+	    case 's':

+	      if (ends("ism")) break;

+	      return;

+	    case 't':

+	      if (ends("ate")) break;

+	      if (ends("iti")) break;

+	      return;

+	    case 'u':

+	      if (ends("ous")) break;

+	      return;

+	    case 'v':

+	      if (ends("ive")) break;

+	      return;

+	    case 'z':

+	      if (ends("ize")) break;

+	      return;

+	    default:

+	      return;

+	    }

+	    if (m() > 1)

+	      k = j;

+	  }

+

+	  /* step6() removes a final -e if m() > 1. */

+

+	  private final void step6() {

+	    j = k;

+	    if (b[k] == 'e') {

+	      int a = m();

+	      if (a > 1 || a == 1 && !cvc(k-1))

+	        k--;

+	    }

+	    if (b[k] == 'l' && doublec(k) && m() > 1)

+	      k--;

+	  }

+

+

+	  /**

+	   * Stem a word provided as a String.  Returns the result as a String.

+	   */

+	  public String stem(String s) {

+	    if (stem(s.toCharArray(), s.length()))

+	      return toString();

+	    else

+	      return s;

+	  }

+

+	  /** Stem a word contained in a char[].  Returns true if the stemming process

+	   * resulted in a word different from the input.  You can retrieve the

+	   * result with getResultLength()/getResultBuffer() or toString().

+	   */

+	  public boolean stem(char[] word) {

+	    return stem(word, word.length);

+	  }

+

+	  /** Stem a word contained in a portion of a char[] array.  Returns

+	   * true if the stemming process resulted in a word different from

+	   * the input.  You can retrieve the result with

+	   * getResultLength()/getResultBuffer() or toString().

+	   */

+	  public boolean stem(char[] wordBuffer, int offset, int wordLen) {

+	    reset();

+	    if (b.length < wordLen) {

+	      b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];

+	    }

+	    System.arraycopy(wordBuffer, offset, b, 0, wordLen);

+	    i = wordLen;

+	    return stem(0);

+	  }

+

+	  /** Stem a word contained in a leading portion of a char[] array.

+	   * Returns true if the stemming process resulted in a word different

+	   * from the input.  You can retrieve the result with

+	   * getResultLength()/getResultBuffer() or toString().

+	   */

+	  public boolean stem(char[] word, int wordLen) {

+	    return stem(word, 0, wordLen);

+	  }

+

+	  /** Stem the word placed into the Stemmer buffer through calls to add().

+	   * Returns true if the stemming process resulted in a word different

+	   * from the input.  You can retrieve the result with

+	   * getResultLength()/getResultBuffer() or toString().

+	   */

+	  public boolean stem() {

+	    return stem(0);

+	  }

+

+	  public boolean stem(int i0) {

+	    k = i - 1;

+	    k0 = i0;

+	    if (k > k0+1) {

+	      step1(); step2(); step3(); step4(); step5(); step6();

+	    }

+	    // Also, a word is considered dirty if we lopped off letters

+	    // Thanks to Ifigenia Vairelles for pointing this out.

+	    if (i != k+1)

+	      dirty = true;

+	    i = k+1;

+	    return dirty;

+	  }

+

+	  /** Test program for demonstrating the Stemmer.  It reads a file and

+	   * stems each word, writing the result to standard out.

+	   * Usage: Stemmer file-name

+	   */

+	  public static void main(String[] args) {

+	    PorterStemmer s = new PorterStemmer();

+

+	    for (int i = 0; i < args.length; i++) {

+	      try {

+	        InputStream in = new FileInputStream(args[i]);

+	        byte[] buffer = new byte[1024];

+	        int bufferLen, offset, ch;

+

+	        bufferLen = in.read(buffer);

+	        offset = 0;

+	        s.reset();

+

+	        while(true) {

+	          if (offset < bufferLen)

+	            ch = buffer[offset++];

+	          else {

+	            bufferLen = in.read(buffer);

+	            offset = 0;

+	            if (bufferLen < 0)

+	              ch = -1;

+	            else

+	              ch = buffer[offset++];

+	          }

+

+	          if (Character.isLetter((char) ch)) {

+	            s.add(Character.toLowerCase((char) ch));

+	          }

+	          else {

+	             s.stem();

+	             System.out.print(s.toString());

+	             s.reset();

+	             if (ch < 0)

+	               break;

+	             else {

+	               System.out.print((char) ch);

+	             }

+	           }

+	        }

+

+	        in.close();

+	      }

+	      catch (IOException e) {

+	        System.out.println("error reading " + args[i]);

+	      }

+	    }

+	  }

+	}

+

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
index f9f8e0d..b0bd02b 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
@@ -28,6 +28,12 @@
   private LemmaFormManager lemmaFormManager = new LemmaFormManager();

 

   private POSManager posManager = new POSManager();

+  /**

+   * key matching function which takes two phrases, aligns them and finds a set of maximum common sub-phrase

+   * @param chunk1

+   * @param chunk2

+   * @return

+   */

 

   public List<ParseTreeChunk> generalizeTwoGroupedPhrasesDeterministic(

       ParseTreeChunk chunk1, ParseTreeChunk chunk2) {

@@ -196,13 +202,18 @@
     return results;

   }

 

-  // main function to generalize two expressions grouped by phrase types

-  // returns a list of generalizations for each phrase type with filtered

-  // sub-expressions

+  /** main function to generalize two expressions grouped by phrase types

+   * returns a list of generalizations for each phrase type with filtered

+   * sub-expressions

+   * 

+   * @param sent1

+   * @param sent2

+   * @return  List<List<ParseTreeChunk>> list of list of POS-words pairs for each resultant matched / overlapped phrase

+   */

   public List<List<ParseTreeChunk>> matchTwoSentencesGroupedChunksDeterministic(

       List<List<ParseTreeChunk>> sent1, List<List<ParseTreeChunk>> sent2) {

     List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();

-    // first irerate through component

+    // first iterate through component

     for (int comp = 0; comp < 2 && // just np & vp

         comp < sent1.size() && comp < sent2.size(); comp++) {

       List<ParseTreeChunk> resultComps = new ArrayList<ParseTreeChunk>();

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextSimilarityBagOfWords.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextSimilarityBagOfWords.java
new file mode 100644
index 0000000..66aa9c0
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextSimilarityBagOfWords.java
@@ -0,0 +1,898 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreements.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+package opennlp.tools.textsimilarity;

+

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.List;

+

+public class TextSimilarityBagOfWords

+{

+	public double assessRelevanceAndGetScore(String para1, String para2)

+	{

+		List<String> wordsOfPara1 = TextProcessor.fastTokenize(para1, false); 

+		List<String> wordsOfPara2 = TextProcessor.fastTokenize(para2, false); 

+		List<String> overlap = new ArrayList<String>(wordsOfPara1);

+		overlap.retainAll(wordsOfPara2);

+		overlap.removeAll(Arrays.asList(stopList));

+		

+		return overlap.size();

+	}

+	

+	public String[] stopList = new String[]{

+			"a",

+

+			"about",

+

+			"above",

+

+			"across",

+

+			"after",

+

+			"again",

+

+			"against",

+

+			"all",

+

+			"almost",

+

+			"alone",

+

+			"along",

+

+			"already",

+

+			"also",

+

+			"although",

+

+			"always",

+

+			"among",

+

+			"an",

+

+			"and",

+

+			"another",

+

+			"any",

+

+			"anybody",

+

+			"anyone",

+

+			"anything",

+

+			"anywhere",

+

+			"are",

+

+			"area",

+

+			"areas",

+

+			"around",

+

+			"as",

+

+			"ask",

+

+			"asked",

+

+			"asking",

+

+			"asks",

+

+			"at",

+

+			"away",

+

+			"b",

+

+			"back",

+

+			"backed",

+

+			"backing",

+

+			"backs",

+

+			"be",

+

+			"became",

+

+			"because",

+

+			"become",

+

+			"becomes",

+

+			"been",

+

+			"before",

+

+			"began",

+

+			"behind",

+

+			"being",

+

+			"beings",

+

+			"best",

+

+			"better",

+

+			"between",

+

+			"big",

+

+			"both",

+

+			"but",

+

+			"by",

+

+			"c",

+

+			"came",

+

+			"can",

+

+			"cannot",

+

+			"case",

+

+			"cases",

+

+			"certain",

+

+			"certainly",

+

+			"clear",

+

+			"clearly",

+

+			"come",

+

+			"could",

+

+			"d",

+

+			"did",

+

+			"differ",

+

+			"different",

+

+			"differently",

+

+			"do",

+

+			"does",

+

+			"done",

+

+			"down",

+

+			"down",

+

+			"downed",

+

+			"downing",

+

+			"downs",

+

+			"during",

+

+			"e",

+

+			"each",

+

+			"early",

+

+			"either",

+

+			"end",

+

+			"ended",

+

+			"ending",

+

+			"ends",

+

+			"enough",

+

+			"even",

+

+			"evenly",

+

+			"ever",

+

+			"every",

+

+			"everybody",

+

+			"everyone",

+

+			"everything",

+

+			"everywhere",

+

+			"f",

+

+			"face",

+

+			"faces",

+

+			"fact",

+

+			"facts",

+

+			"far",

+

+			"felt",

+

+			"few",

+

+			"find",

+

+			"finds",

+

+			"first",

+

+			"for",

+

+			"four",

+

+			"from",

+

+			"full",

+

+			"fully",

+

+			"further",

+

+			"furthered",

+

+			"furthering",

+

+			"furthers",

+

+			"g",

+

+			"gave",

+

+			"general",

+

+			"generally",

+

+			"get",

+

+			"gets",

+

+			"give",

+

+			"given",

+

+			"gives",

+

+			"go",

+

+			"going",

+

+			"good",

+

+			"goods",

+

+			"got",

+

+			"great",

+

+			"greater",

+

+			"greatest",

+

+			"group",

+

+			"grouped",

+

+			"grouping",

+

+			"groups",

+

+			"h",

+

+			"had",

+

+			"has",

+

+			"have",

+

+			"having",

+

+			"he",

+

+			"her",

+

+			"here",

+

+			"herself",

+

+			"high",

+

+			"high",

+

+			"high",

+

+			"higher",

+

+			"highest",

+

+			"him",

+

+			"himself",

+

+			"his",

+

+			"how",

+

+			"however",

+

+			"i",

+

+			"if",

+

+			"important",

+

+			"in",

+

+			"interest",

+

+			"interested",

+

+			"interesting",

+

+			"interests",

+

+			"into",

+

+			"is",

+

+			"it",

+

+			"its",

+

+			"itself",

+

+			"j",

+

+			"just",

+

+			"k",

+

+			"keep",

+

+			"keeps",

+

+			"kind",

+

+			"knew",

+

+			"know",

+

+			"known",

+

+			"knows",

+

+			"l",

+

+			"large",

+

+			"largely",

+

+			"last",

+

+			"later",

+

+			"latest",

+

+			"least",

+

+			"less",

+

+			"let",

+

+			"lets",

+

+			"like",

+

+			"likely",

+

+			"long",

+

+			"longer",

+

+			"longest",

+

+			"m",

+

+			"made",

+

+			"make",

+

+			"making",

+

+			"man",

+

+			"many",

+

+			"may",

+

+			"me",

+

+			"member",

+

+			"members",

+

+			"men",

+

+			"might",

+

+			"more",

+

+			"most",

+

+			"mostly",

+

+			"mr",

+

+			"mrs",

+

+			"much",

+

+			"must",

+

+			"my",

+

+			"myself",

+

+			"n",

+

+			"necessary",

+

+			"need",

+

+			"needed",

+

+			"needing",

+

+			"needs",

+

+			"never",

+

+			"new",

+

+			"new",

+

+			"newer",

+

+			"newest",

+

+			"next",

+

+			"no",

+

+			"nobody",

+

+			"non",

+

+			"noone",

+

+			"not",

+

+			"nothing",

+

+			"now",

+

+			"nowhere",

+

+			"number",

+

+			"numbers",

+

+			"o",

+

+			"of",

+

+			"off",

+

+			"often",

+

+			"old",

+

+			"older",

+

+			"oldest",

+

+			"on",

+

+			"once",

+

+			"one",

+

+			"only",

+

+			"open",

+

+			"opened",

+

+			"opening",

+

+			"opens",

+

+			"or",

+

+			"order",

+

+			"ordered",

+

+			"ordering",

+

+			"orders",

+

+			"other",

+

+			"others",

+

+			"our",

+

+			"out",

+

+			"over",

+

+			"p",

+

+			"part",

+

+			"parted",

+

+			"parting",

+

+			"parts",

+

+			"per",

+

+			"perhaps",

+

+			"place",

+

+			"places",

+

+			"point",

+

+			"pointed",

+

+			"pointing",

+

+			"points",

+

+			"possible",

+

+			"present",

+

+			"presented",

+

+			"presenting",

+

+			"presents",

+

+			"problem",

+

+			"problems",

+

+			"put",

+

+			"puts",

+

+			"q",

+

+			"quite",

+

+			"r",

+

+			"rather",

+

+			"really",

+

+			"right",

+

+			"right",

+

+			"room",

+

+			"rooms",

+

+			"s",

+

+			"said",

+

+			"same",

+

+			"saw",

+

+			"say",

+

+			"says",

+

+			"second",

+

+			"seconds",

+

+			"see",

+

+			"seem",

+

+			"seemed",

+

+			"seeming",

+

+			"seems",

+

+			"sees",

+

+			"several",

+

+			"shall",

+

+			"she",

+

+			"should",

+

+			"show",

+

+			"showed",

+

+			"showing",

+

+			"shows",

+

+			"side",

+

+			"sides",

+

+			"since",

+

+			"small",

+

+			"smaller",

+

+			"smallest",

+

+			"so",

+

+			"some",

+

+			"somebody",

+

+			"someone",

+

+			"something",

+

+			"somewhere",

+

+			"state",

+

+			"states",

+

+			"still",

+

+			"still",

+

+			"such",

+

+			"sure",

+

+			"t",

+

+			"take",

+

+			"taken",

+

+			"than",

+

+			"that",

+

+			"the",

+

+			"their",

+

+			"them",

+

+			"then",

+

+			"there",

+

+			"therefore",

+

+			"these",

+

+			"they",

+

+			"thing",

+

+			"things",

+

+			"think",

+

+			"thinks",

+

+			"this",

+

+			"those",

+

+			"though",

+

+			"thought",

+

+			"thoughts",

+

+			"three",

+

+			"through",

+

+			"thus",

+

+			"to",

+

+			"today",

+

+			"together",

+

+			"too",

+

+			"took",

+

+			"toward",

+

+			"turn",

+

+			"turned",

+

+			"turning",

+

+			"turns",

+

+			"two",

+

+			"u",

+

+			"under",

+

+			"until",

+

+			"up",

+

+			"upon",

+

+			"us",

+

+			"use",

+

+			"used",

+

+			"uses",

+

+			"v",

+

+			"very",

+

+			"w",

+

+			"want",

+

+			"wanted",

+

+			"wanting",

+

+			"wants",

+

+			"was",

+

+			"way",

+

+			"ways",

+

+			"we",

+

+			"well",

+

+			"wells",

+

+			"went",

+

+			"were",

+

+			"what",

+

+			"when",

+

+			"where",

+

+			"whether",

+

+			"which",

+

+			"while",

+

+			"who",

+

+			"whole",

+

+			"whose",

+

+			"why",

+

+			"will",

+

+			"with",

+

+			"within",

+

+			"without",

+

+			"work",

+

+			"worked",

+

+			"working",

+

+			"works",

+

+			"would",

+

+			"x",

+

+			"y",

+

+			"year",

+

+			"years",

+

+			"yet",

+

+			"you",

+

+			"young",

+

+			"younger",

+

+			"youngest",

+

+			"your",

+

+			"yours",

+

+			"z" };

+

+	

+

+}

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
index a4033e0..0de06b4 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
@@ -92,6 +92,10 @@
 		initializeChunker();
 	}
 
+	/**
+	 * singleton method of instantiating the processor
+	 * @return the instance
+	 */
 	public synchronized static ParserChunker2MatcherProcessor getInstance() {
 		if (instance == null)
 			instance = new ParserChunker2MatcherProcessor();
@@ -99,6 +103,11 @@
 		return instance;
 	}
 
+	/**
+	 * General parsing function, which returns lists of parses for a portion of text
+	 * @param text to be parsed
+	 * @return lists of parses
+	 */
 	public List<List<Parse>> parseTextNlp(String text) {
 		if (text == null || text.trim().length() == 0)
 			return null;
@@ -178,6 +187,11 @@
 			return null;
 	}
 
+	/**
+	 * 
+	 * @param para input text string which is assumed to be a paragraph and is split into sentences
+	 * @return a list of lists of phrases with their POS tags for each phrase type (noun, verb etc.)
+	 */
 
 	public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(String para){
 		List<List<ParseTreeChunk>> listOfChunksAccum = new ArrayList<List<ParseTreeChunk>>();
@@ -202,7 +216,11 @@
 		return listOfChunksAccum;
 	}
 
-
+	/**
+	 * 
+	 * @param para input text string which is assumed to be a sentence
+	 * @return a list of lists of phrases with their POS tags for each phrase type (noun, verb etc.)
+	 */
 	public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForSentence(String sentence) {
 		if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH)
 			return null;
@@ -619,7 +637,13 @@
 		return childrenNodeList;
 	}
 
-	
+	/**
+	 * The key function of similarity component which takes two portions of text and does similarity assessment by finding the set of all maximum common subtrees
+	 * of the set of parse trees for each portion of text
+	 * @param input text 1
+	 * @param input text 2
+	 * @return the matching results structure, which includes the similarity score
+	 */
 	public SentencePairMatchResult assessRelevance(String para1, String para2)
 	{
 		List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(para1), 
@@ -633,6 +657,7 @@
 		return new SentencePairMatchResult(res, origChunks1);
 
 	}
+	
 	protected List<LemmaPair> listListParseTreeChunk2ListLemmaPairs(
 			List<List<ParseTreeChunk>> sent1GrpLst) {
 		List<LemmaPair>  results = new ArrayList<LemmaPair>();
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/readme.txt b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/readme.txt
new file mode 100644
index 0000000..41765dd
--- /dev/null
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/readme.txt
@@ -0,0 +1,120 @@
+/*

+ * Licensed to the Apache Software Foundation (ASF) under one or more

+ * contributor license agreemnets.  See the NOTICE file distributed with

+ * this work for additional information regarding copyright ownership.

+ * The ASF licenses this file to You under the Apache License, Version 2.0

+ * (the "License"); you may not use this file except in compliance with

+ * the License. You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+

+SIMILARITY COMPONENT of OpenNLP

+

+1. Introduction

+This component does text relevance assessment. It takes two portions of texts (phrases, sentences, paragraphs) and returns a similarity score.

+Similarity component can be used on top of search to improve relevance, computing similarity score between a question and all search results (snippets). 

+Also, this component is useful for web mining of images, videos, forums, blogs, and other media with textual descriptions. Such applications as content generation 

+and filtering meaningless speech recognition results are included in the sample applications of this component.

+   Relevance assessment is based on machine learning of syntactic parse trees (constituency trees, http://en.wikipedia.org/wiki/Parse_tree). 

+The similarity score is calculated as the size of all maximal common sub-trees for sentences from a pair of texts (

+www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3971/4187, www.aaai.org/ocs/index.php/FLAIRS/FLAIRS11/paper/download/2573/3018,

+www.aaai.org/ocs/index.php/SSS/SSS10/paper/download/1146/1448).

+   The objective of Similarity component is to give an application engineer as tool for text relevance which can be used as a black box, no need to understand 

+ computational linguistics or machine learning. 

+ 

+ 2. Installation

+ Please refer to OpenNLP installation instructions

+ 

+ 3. First use case of Similarity component: search

+ 

+ To start with this component, please refer to SearchResultsProcessorTest.java in package opennlp.tools.similarity.apps

+   public void testSearchOrder() runs web search using Bing API and improves search relevance.

+   Look at the code of 

+      public List<HitBase> runSearch(String query) 

+   and then at 

+      private	BingResponse calculateMatchScoreResortHits(BingResponse resp, String searchQuery)

+   which gets search results from Bing and re-ranks them based on computed similarity score.

+ 

+   The main entry to Similarity component is 

+    SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, searchQuery);

+    where we pass the search query and the snapshot and obtain the similarity assessment structure which includes the similarity score.

+   

+   To run this test you need to obtain search API key from Bing at www.bing.com/developers/s/APIBasics.html and specify it in public class BingQueryRunner in

+  protected static final String APP_ID. 

+  

+  4. Solving a unique problem: content generation

+  To demonstrate the usability of Similarity component to tackle a problem which is hard to solve without a linguistic-based technology, 

+  we introduce a content generation component:

+   RelatedSentenceFinder.java

+   

+   The entry point here is the function call

+   hits = f.generateContentAbout("Albert Einstein");

+   which writes a biography of Albert Einstein by finding sentences on the web about various kinds of his activities (such as 'born', 'graduate', 'invented' etc.).

+   The key here is to compute similarity between the seed expression like "Albert Einstein invented relativity theory" and search result like 

+   "Albert Einstein College of Medicine | Medical Education | Biomedical ...

+    www.einstein.yu.edu/Albert Einstein College of Medicine is one of the nation's premier institutions for medical education, ..."

+    and filter out irrelevant search results.

+   

+   This is done in function 

+   public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item, String originalSentence,

+			List<String> sentsAll)

+			

+   	  SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence + " " + title, originalSentence);

+   You can consult the results in gen.txt, where an essay on Einstein bio is written.

+   

+   These are examples of generated articles, given the article title

+     http://www.allvoices.com/contributed-news/9423860/content/81937916-ichie-sings-jazz-blues-contemporary-tunes

+     http://www.allvoices.com/contributed-news/9415063-britney-spears-femme-fatale-in-north-sf-bay-area

+     

+  5. Solving a high-importance problem: filtering out meaningless speech recognition results.

+  Speech recognitions SDKs usually produce a number of phrases as results, such as 

+  			 "remember to buy milk tomorrow from trader joes",

+			 "remember to buy milk tomorrow from 3 to jones"

+  One can see that the former is meaningful, and the latter is meaningless (although similar in terms of how it is pronounced).

+  We use web mining and Similarity component to detect a meaningful option (a mistake caused by trying to interpret meaningless 

+  request by a query understanding system such as Siri for iPhone can be costly).

+ 

+  SpeechRecognitionResultsProcessor.java does the job:

+  public List<SentenceMeaningfullnessScore> runSearchAndScoreMeaningfulness(List<String> sents)

+  re-ranks the phrases in the order of decrease of meaningfulness.

+  

+  6. Similarity component internals

+  in the package   opennlp.tools.textsimilarity.chunker2matcher

+  ParserChunker2MatcherProcessor.java does parsing of two portions of text and matching the resultant parse trees to assess similarity between 

+  these portions of text.

+  To run ParserChunker2MatcherProcessor

+     private static String MODEL_DIR = "resources/models";

+  needs to be specified

+  

+  The key function

+  public SentencePairMatchResult assessRelevance(String para1, String para2)

+  takes two portions of text and does similarity assessment by finding the set of all maximum common subtrees 

+  of the set of parse trees for each portion of text

+  

+  It splits paragraphs into sentences, parses them, obtained chunking information and produces grouped phrases (noun, evrn, prepositional etc.):

+  public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(String para)

+  

+  and then attempts to find common subtrees:

+  in ParseTreeMatcherDeterministic.java

+		List<List<ParseTreeChunk>> res = md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst)

+  

+  Phrase matching functionality is in package opennlp.tools.textsimilarity;

+  ParseTreeMatcherDeterministic.java:

+  Here's the key matching function which takes two phrases, aligns them and finds a set of maximum common sub-phrase

+  public List<ParseTreeChunk> generalizeTwoGroupedPhrasesDeterministic

+  

+  7. Package structure

+  	opennlp.tools.similarity.apps : 3 main applications

+	opennlp.tools.similarity.apps.utils: utilities for above applications

+	

+	opennlp.tools.textsimilarity.chunker2matcher: parser which converts text into a form for matching parse trees

+	opennlp.tools.textsimilarity: parse tree matching functionality

+