opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package opennlp.tools.similarity.apps.utils;

 import java.util.ArrayList;
 import java.util.List;

 import opennlp.tools.stemmer.PStemmer;

 public class StringDistanceMeasurer {
   // external tools
   private final PStemmer ps; // stemmer

   private static final int MIN_STRING_LENGTH_FOR_WORD = 4;

   protected final int MIN_STRING_LENGTH_FOR_DISTORTED_WORD = 6;

   protected static final int ACCEPTABLE_DEVIATION_IN_CHAR = 2;

   private static final double MIN_SCORE_FOR_LING = 100; // 0.7;

   public StringDistanceMeasurer() {
     // first get stemmer
     ps = new PStemmer();
     if (MIN_SCORE_FOR_LING > 1.0)
       return;

   }

   // gets string array and process numbers, applies stemming and forms a list
   protected List<String> filterWordArray(String[] strWords) {
     List<String> strList = new ArrayList<>();
     for (String w : strWords) {
       boolean bInteger = true;
       try {
         Integer.parseInt(w);
       } catch (Exception e) {
         bInteger = false;
       }
       if (w.length() < MIN_STRING_LENGTH_FOR_WORD && !bInteger) // only
                                                                 // non-integer
                                                                 // short
         // string like preposition is uninteresting
         continue;
       try {
         w = ps.stem(w.toLowerCase()).toString();
       } catch (Exception e) {
         // do nothing, just have original term
       }
       if (w.startsWith("Invalid"))
         continue;
       strList.add(w);
     }
     return strList;
   }

   protected List<String> filterWordArrayNoStem(String[] strWords) {
     List<String> strList = new ArrayList<>();
     for (String w : strWords) {
       boolean bInteger = true;
       try {
         Integer.parseInt(w);
       } catch (Exception e) {
         bInteger = false;
       }
       if (w.length() < MIN_STRING_LENGTH_FOR_WORD && !bInteger) // only
                                                                 // non-integer
                                                                 // short
         // string like preposition is uninteresting
         continue;
       w = w.toLowerCase();

       strList.add(w);
     }
     return strList;
   }

   // main entry point. Gets two strings and applies string match
   // and also linguistic match if score > a threshold
   public double measureStringDistance(String str1, String str2) {
     double result;
     try {
       str1 = StringCleaner.processSnapshotForMatching(str1);
       str2 = StringCleaner.processSnapshotForMatching(str2);
       if (str1.equals(str2)) // || str1.endsWith(str2) || str2.endsWith(str1))
                              // bg 03-2011
         return 1.0;

       String[] str1Words = str1.split(" ");
       String[] str2Words = str2.split(" ");
       List<String> str1List = filterWordArray(str1Words), str2List = filterWordArray(str2Words);

       int l1 = str1List.size(), l2 = str2List.size();
       if (l1 < 2)
         l1 = str1Words.length;
       if (l2 < 2)
         l2 = str2Words.length;

       int lOverlap = 0;
       List<String> strListOverlap = new ArrayList<>(str1List);
       strListOverlap.retainAll(str2List);
       for (String w : strListOverlap) {
         if (w.toLowerCase().equals(w)) // no special interest word
           lOverlap++;
         else
           lOverlap += 2; // if capitalized, or specific word => important so
                          // double score
       }
       result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
           / (double) l2, 0.4);

       // now we try to find similar words which are long or Upper case
       int countSimilar = 0;
       str1List.removeAll(strListOverlap);
       str2List.removeAll(strListOverlap);
       for (String w1 : str1List) {
         for (String w2 : str2List) {
           if (w1.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
               || !w1.toLowerCase().equals(w1))
             if (w2.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
                 || !w2.toLowerCase().equals(w2))
               if (LevensteinDistanceFinder.levensteinDistance(w1, w2, 1, 10, 1,
                   10) <= ACCEPTABLE_DEVIATION_IN_CHAR)
                 countSimilar++;
         }
       }
       lOverlap += countSimilar;
       result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
           / (double) l2, 0.4);
       if (result > 1)
         result = 1.0;

       // double ld = LevensteinDistanceFinder. levensteinDistance(str1, str2, 1,
       // 10, 1, 10);
       // System.out.println(ld);

     } catch (Exception e) {
       e.printStackTrace();
       return -1.0;
     }

     Double linguisticScore = -1.0;
     // to be developed - employs linguistic processor
     /*
      * if (result>MIN_SCORE_FOR_LING) { List<List<ParseTreeChunk>> matchResult =
      * pos.matchOrigSentencesCache(str1, str2); linguisticScore =
      * ParseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
      * System.out.println(matchResult);
      *
      * // magic formula for 0.7 string match and 0.3 linguistic match result =
      * result*0.7 + linguisticScore/6.0* 0.3; }
      */
     return result;
   }

   public double measureStringDistanceNoStemming(String str1, String str2) {
     double result;
     try {
       str1 = StringCleaner.processSnapshotForMatching(str1);
       str2 = StringCleaner.processSnapshotForMatching(str2);
       if (str1.equals(str2)) // || str1.endsWith(str2) || str2.endsWith(str1))
                              // bg 03-2011
         return 1.0;

       String[] str1Words = str1.split(" ");
       String[] str2Words = str2.split(" ");
       List<String> str1List = filterWordArrayNoStem(str1Words), str2List = filterWordArrayNoStem(str2Words);

       int l1 = str1List.size(), l2 = str2List.size();
       if (l1 < 2)
         l1 = str1Words.length;
       if (l2 < 2)
         l2 = str2Words.length;

       int lOverlap = 0;
       List<String> strListOverlap = new ArrayList<>(str1List);
       strListOverlap.retainAll(str2List);
       for (String w : strListOverlap) {
         if (w.toLowerCase().equals(w)) // no special interest word
           lOverlap++;
         else
           lOverlap += 2; // if capitalized, or specific word => important so
                          // double score
       }
       result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
           / (double) l2, 0.4);

       // now we try to find similar words which are long or Upper case
       int countSimilar = 0;
       str1List.removeAll(strListOverlap);
       str2List.removeAll(strListOverlap);
       for (String w1 : str1List) {
         for (String w2 : str2List) {
           if (w1.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
               || !w1.toLowerCase().equals(w1))
             if (w2.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
                 || !w2.toLowerCase().equals(w2))
               if (LevensteinDistanceFinder.levensteinDistance(w1, w2, 1, 10, 1,
                   10) <= ACCEPTABLE_DEVIATION_IN_CHAR)
                 countSimilar++;
         }
       }
       lOverlap += countSimilar;
       result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
           / (double) l2, 0.4);
       if (result > 1)
         result = 1.0;

       // double ld = LevensteinDistanceFinder. levensteinDistance(str1, str2, 1,
       // 10, 1, 10);
       // System.out.println(ld);

     } catch (Exception e) {
       e.printStackTrace();
       return -1.0;
     }

     Double linguisticScore = -1.0;
     // to be developed - employs linguistic processor
     /*
      * if (result>MIN_SCORE_FOR_LING) { List<List<ParseTreeChunk>> matchResult =
      * pos.matchOrigSentencesCache(str1, str2); linguisticScore =
      * ParseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
      * System.out.println(matchResult);
      *
      * // magic formula for 0.7 string match and 0.3 linguistic match result =
      * result*0.7 + linguisticScore/6.0* 0.3; }
      */
     return result;
   }

   public static void main(String[] args) {
     StringDistanceMeasurer meas = new StringDistanceMeasurer();

     // String sent1 =
     // "estoy en LA,California y no encuentro tu album en NINGUNA parte!!! " +
     // "NO MANCHES!!!lo tengo que comprar por internet!! " ;

     // redunction of announcement
     String sent2a = "Tomarow come check us out if your in the area show starts at 6:00pm "
         + "2404 E. La Palma Anaheim, California 92806 Cost:$3";
     String sent2b = "Tomorrow you can check us if you area will show start at 6 pm "
         + "2404 East La Palma Anaheim, $3";
     // common sub-chunk = [VBZ-starts IN-at NNP-* NNP-* ]

     // original posting and its yahoo.com search snapshot
     String sent4a = "Fliers may have to wait years for new liquid screening equipment";
     String sent4b = "for screening checkpoints and equipment; improving ... "
         + "Wait times are not just a problem at large airports";

     // slang and search snapshot
     String sent5a = "hell yea i stay in california. and hell no lol LA sucks hella bad, "
         + "i lived there for a while and hated it sooo much, so boring! ";

     String sent5b = "My life is so boring without Tree Hill and the OC. America is sooo "
         + "racist I LOVE YOU SO MUCH. TO everyone that has hurt me...no one in the ..... Yeah sucks I know";

     String sent6a = "I think its gonna be in the east coast as well. California is pretty but way "
         + "to close to LA and helicopters are gonna ruin it";
     String sent6b = "could be in east coast as well. California is pretty but way "
         + "to close to LA and choppers will ruin it";
     // common sub-chunk = [JJ-east NN-coast ]

     String sent7a = "Iran nuke document called 'alarming'. Their Program started in the 50s with our help!";
     String sent7b = "nuke project of Iran is alarming' Program started in 1950s with our help";
     // common sub-chunk = [VBD-started IN-in NNS-50s IN-with PRP$-our NN-help ]

     // News title for the same event
     String sent8a = "Pakistan slaps travel ban on defence minister";
     String sent8b = "Pakistan corruption fall-out threatens stability";
     String sent8c = "Pakistan defence minister 'barred from leaving country'";
     String sent8d = "Pakistani defence minister banned from travel";
     String sent8dd = "Pakistani defence minister banned from travel"; // to
                                                                       // check
                                                                       // the
                                                                       // case of
                                                                       // 1.0

     // common sub-chunk = [NN-defence NN-minister ]

     List<Double> matchRes = new ArrayList<>();
     matchRes.add(meas.measureStringDistance(sent2a, sent2b));
     matchRes.add(meas.measureStringDistance(sent4a, sent4b));
     matchRes.add(meas.measureStringDistance(sent5a, sent5b));
     matchRes.add(meas.measureStringDistance(sent6a, sent6b));
     matchRes.add(meas.measureStringDistance(sent7a, sent7b));

     System.out.println(matchRes);
     // [0.8178702752867737, 0.21082473737065027, 0.27594593229224296,
     // 0.7517586466500455, 0.9100766715907641]

     matchRes = new ArrayList<>();
     matchRes.add(meas.measureStringDistance(sent8a, sent8b));
     matchRes.add(meas.measureStringDistance(sent8a, sent8c));
     matchRes.add(meas.measureStringDistance(sent8a, sent8d));
     matchRes.add(meas.measureStringDistance(sent8b, sent8c));
     matchRes.add(meas.measureStringDistance(sent8b, sent8d));
     matchRes.add(meas.measureStringDistance(sent8c, sent8d));

     System.out.println(matchRes);
     // [0.48044977359257246, 0.8365116420730185, 0.8365116420730185,
     // 0.48044977359257246, 0.27594593229224296,
     // 0.6391010941257969]

     matchRes = new ArrayList<>();
     // to verify that the same sentence gives 1
     matchRes.add(meas.measureStringDistance(sent8dd, sent8d));
     // to verify that totally different sentences give 0
     matchRes.add(meas.measureStringDistance(sent2a, sent8d));

     System.out.println("Now testing 1 and 0: \n" + matchRes);
     // Now testing 1 and 0:
     // [1.0, 0.0]
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package opennlp.tools.similarity.apps.utils;

	import java.util.ArrayList;
	import java.util.List;

	import opennlp.tools.stemmer.PStemmer;

	public class StringDistanceMeasurer {
	// external tools
	private final PStemmer ps; // stemmer

	private static final int MIN_STRING_LENGTH_FOR_WORD = 4;

	protected final int MIN_STRING_LENGTH_FOR_DISTORTED_WORD = 6;

	protected static final int ACCEPTABLE_DEVIATION_IN_CHAR = 2;

	private static final double MIN_SCORE_FOR_LING = 100; // 0.7;

	public StringDistanceMeasurer() {
	// first get stemmer
	ps = new PStemmer();
	if (MIN_SCORE_FOR_LING > 1.0)
	return;

	}

	// gets string array and process numbers, applies stemming and forms a list
	protected List<String> filterWordArray(String[] strWords) {
	List<String> strList = new ArrayList<>();
	for (String w : strWords) {
	boolean bInteger = true;
	try {
	Integer.parseInt(w);
	} catch (Exception e) {
	bInteger = false;
	}
	if (w.length() < MIN_STRING_LENGTH_FOR_WORD && !bInteger) // only
	// non-integer
	// short
	// string like preposition is uninteresting
	continue;
	try {
	w = ps.stem(w.toLowerCase()).toString();
	} catch (Exception e) {
	// do nothing, just have original term
	}
	if (w.startsWith("Invalid"))
	continue;
	strList.add(w);
	}
	return strList;
	}

	protected List<String> filterWordArrayNoStem(String[] strWords) {
	List<String> strList = new ArrayList<>();
	for (String w : strWords) {
	boolean bInteger = true;
	try {
	Integer.parseInt(w);
	} catch (Exception e) {
	bInteger = false;
	}
	if (w.length() < MIN_STRING_LENGTH_FOR_WORD && !bInteger) // only
	// non-integer
	// short
	// string like preposition is uninteresting
	continue;
	w = w.toLowerCase();

	strList.add(w);
	}
	return strList;
	}

	// main entry point. Gets two strings and applies string match
	// and also linguistic match if score > a threshold
	public double measureStringDistance(String str1, String str2) {
	double result;
	try {
	str1 = StringCleaner.processSnapshotForMatching(str1);
	str2 = StringCleaner.processSnapshotForMatching(str2);
	if (str1.equals(str2)) // \|\| str1.endsWith(str2) \|\| str2.endsWith(str1))
	// bg 03-2011
	return 1.0;

	String[] str1Words = str1.split(" ");
	String[] str2Words = str2.split(" ");
	List<String> str1List = filterWordArray(str1Words), str2List = filterWordArray(str2Words);

	int l1 = str1List.size(), l2 = str2List.size();
	if (l1 < 2)
	l1 = str1Words.length;
	if (l2 < 2)
	l2 = str2Words.length;

	int lOverlap = 0;
	List<String> strListOverlap = new ArrayList<>(str1List);
	strListOverlap.retainAll(str2List);
	for (String w : strListOverlap) {
	if (w.toLowerCase().equals(w)) // no special interest word
	lOverlap++;
	else
	lOverlap += 2; // if capitalized, or specific word => important so
	// double score
	}
	result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
	/ (double) l2, 0.4);

	// now we try to find similar words which are long or Upper case
	int countSimilar = 0;
	str1List.removeAll(strListOverlap);
	str2List.removeAll(strListOverlap);
	for (String w1 : str1List) {
	for (String w2 : str2List) {
	if (w1.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
	\|\| !w1.toLowerCase().equals(w1))
	if (w2.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
	\|\| !w2.toLowerCase().equals(w2))
	if (LevensteinDistanceFinder.levensteinDistance(w1, w2, 1, 10, 1,
	10) <= ACCEPTABLE_DEVIATION_IN_CHAR)
	countSimilar++;
	}
	}
	lOverlap += countSimilar;
	result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
	/ (double) l2, 0.4);
	if (result > 1)
	result = 1.0;

	// double ld = LevensteinDistanceFinder. levensteinDistance(str1, str2, 1,
	// 10, 1, 10);
	// System.out.println(ld);

	} catch (Exception e) {
	e.printStackTrace();
	return -1.0;
	}

	Double linguisticScore = -1.0;
	// to be developed - employs linguistic processor
	/*
	* if (result>MIN_SCORE_FOR_LING) { List<List<ParseTreeChunk>> matchResult =
	* pos.matchOrigSentencesCache(str1, str2); linguisticScore =
	* ParseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
	* System.out.println(matchResult);
	*
	* // magic formula for 0.7 string match and 0.3 linguistic match result =
	* result0.7 + linguisticScore/6.0 0.3; }
	*/
	return result;
	}

	public double measureStringDistanceNoStemming(String str1, String str2) {
	double result;
	try {
	str1 = StringCleaner.processSnapshotForMatching(str1);
	str2 = StringCleaner.processSnapshotForMatching(str2);
	if (str1.equals(str2)) // \|\| str1.endsWith(str2) \|\| str2.endsWith(str1))
	// bg 03-2011
	return 1.0;

	String[] str1Words = str1.split(" ");
	String[] str2Words = str2.split(" ");
	List<String> str1List = filterWordArrayNoStem(str1Words), str2List = filterWordArrayNoStem(str2Words);

	int l1 = str1List.size(), l2 = str2List.size();
	if (l1 < 2)
	l1 = str1Words.length;
	if (l2 < 2)
	l2 = str2Words.length;

	int lOverlap = 0;
	List<String> strListOverlap = new ArrayList<>(str1List);
	strListOverlap.retainAll(str2List);
	for (String w : strListOverlap) {
	if (w.toLowerCase().equals(w)) // no special interest word
	lOverlap++;
	else
	lOverlap += 2; // if capitalized, or specific word => important so
	// double score
	}
	result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
	/ (double) l2, 0.4);

	// now we try to find similar words which are long or Upper case
	int countSimilar = 0;
	str1List.removeAll(strListOverlap);
	str2List.removeAll(strListOverlap);
	for (String w1 : str1List) {
	for (String w2 : str2List) {
	if (w1.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
	\|\| !w1.toLowerCase().equals(w1))
	if (w2.length() > MIN_STRING_LENGTH_FOR_DISTORTED_WORD
	\|\| !w2.toLowerCase().equals(w2))
	if (LevensteinDistanceFinder.levensteinDistance(w1, w2, 1, 10, 1,
	10) <= ACCEPTABLE_DEVIATION_IN_CHAR)
	countSimilar++;
	}
	}
	lOverlap += countSimilar;
	result = Math.pow((double) (lOverlap * lOverlap) / (double) l1
	/ (double) l2, 0.4);
	if (result > 1)
	result = 1.0;

	// double ld = LevensteinDistanceFinder. levensteinDistance(str1, str2, 1,
	// 10, 1, 10);
	// System.out.println(ld);

	} catch (Exception e) {
	e.printStackTrace();
	return -1.0;
	}

	Double linguisticScore = -1.0;
	// to be developed - employs linguistic processor
	/*
	* if (result>MIN_SCORE_FOR_LING) { List<List<ParseTreeChunk>> matchResult =
	* pos.matchOrigSentencesCache(str1, str2); linguisticScore =
	* ParseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
	* System.out.println(matchResult);
	*
	* // magic formula for 0.7 string match and 0.3 linguistic match result =
	* result0.7 + linguisticScore/6.0 0.3; }
	*/
	return result;
	}

	public static void main(String[] args) {
	StringDistanceMeasurer meas = new StringDistanceMeasurer();

	// String sent1 =
	// "estoy en LA,California y no encuentro tu album en NINGUNA parte!!! " +
	// "NO MANCHES!!!lo tengo que comprar por internet!! " ;

	// redunction of announcement
	String sent2a = "Tomarow come check us out if your in the area show starts at 6:00pm "
	+ "2404 E. La Palma Anaheim, California 92806 Cost:$3";
	String sent2b = "Tomorrow you can check us if you area will show start at 6 pm "
	+ "2404 East La Palma Anaheim, $3";
	// common sub-chunk = [VBZ-starts IN-at NNP-* NNP-* ]

	// original posting and its yahoo.com search snapshot
	String sent4a = "Fliers may have to wait years for new liquid screening equipment";
	String sent4b = "for screening checkpoints and equipment; improving ... "
	+ "Wait times are not just a problem at large airports";

	// slang and search snapshot
	String sent5a = "hell yea i stay in california. and hell no lol LA sucks hella bad, "
	+ "i lived there for a while and hated it sooo much, so boring! ";

	String sent5b = "My life is so boring without Tree Hill and the OC. America is sooo "
	+ "racist I LOVE YOU SO MUCH. TO everyone that has hurt me...no one in the ..... Yeah sucks I know";

	String sent6a = "I think its gonna be in the east coast as well. California is pretty but way "
	+ "to close to LA and helicopters are gonna ruin it";
	String sent6b = "could be in east coast as well. California is pretty but way "
	+ "to close to LA and choppers will ruin it";
	// common sub-chunk = [JJ-east NN-coast ]

	String sent7a = "Iran nuke document called 'alarming'. Their Program started in the 50s with our help!";
	String sent7b = "nuke project of Iran is alarming' Program started in 1950s with our help";
	// common sub-chunk = [VBD-started IN-in NNS-50s IN-with PRP$-our NN-help ]

	// News title for the same event
	String sent8a = "Pakistan slaps travel ban on defence minister";
	String sent8b = "Pakistan corruption fall-out threatens stability";
	String sent8c = "Pakistan defence minister 'barred from leaving country'";
	String sent8d = "Pakistani defence minister banned from travel";
	String sent8dd = "Pakistani defence minister banned from travel"; // to
	// check
	// the
	// case of
	// 1.0

	// common sub-chunk = [NN-defence NN-minister ]

	List<Double> matchRes = new ArrayList<>();
	matchRes.add(meas.measureStringDistance(sent2a, sent2b));
	matchRes.add(meas.measureStringDistance(sent4a, sent4b));
	matchRes.add(meas.measureStringDistance(sent5a, sent5b));
	matchRes.add(meas.measureStringDistance(sent6a, sent6b));
	matchRes.add(meas.measureStringDistance(sent7a, sent7b));

	System.out.println(matchRes);
	// [0.8178702752867737, 0.21082473737065027, 0.27594593229224296,
	// 0.7517586466500455, 0.9100766715907641]

	matchRes = new ArrayList<>();
	matchRes.add(meas.measureStringDistance(sent8a, sent8b));
	matchRes.add(meas.measureStringDistance(sent8a, sent8c));
	matchRes.add(meas.measureStringDistance(sent8a, sent8d));
	matchRes.add(meas.measureStringDistance(sent8b, sent8c));
	matchRes.add(meas.measureStringDistance(sent8b, sent8d));
	matchRes.add(meas.measureStringDistance(sent8c, sent8d));

	System.out.println(matchRes);
	// [0.48044977359257246, 0.8365116420730185, 0.8365116420730185,
	// 0.48044977359257246, 0.27594593229224296,
	// 0.6391010941257969]

	matchRes = new ArrayList<>();
	// to verify that the same sentence gives 1
	matchRes.add(meas.measureStringDistance(sent8dd, sent8d));
	// to verify that totally different sentences give 0
	matchRes.add(meas.measureStringDistance(sent2a, sent8d));

	System.out.println("Now testing 1 and 0: \n" + matchRes);
	// Now testing 1 and 0:
	// [1.0, 0.0]
	}
	}