src/main/java/org/apache/commons/text/similarity/JaroWrinklerSimilarity.java - commons-text - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.text.similarity;

 /**
  * A similarity algorithm indicating the percentage of matched characters between two character sequences.
  *
  * <p>
  * The Jaro measure is the weighted sum of percentage of matched characters
  * from each file and transposed characters. Winkler increased this measure
  * for matching initial characters.
  * </p>
  *
  * <p>
  * This implementation is based on the Jaro Winkler similarity algorithm
  * from <a href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance">
  * http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>.
  * </p>
  *
  * <p>
  * This code has been adapted from Apache Commons Lang 3.3.
  * </p>
  */
 public class JaroWrinklerSimilarity implements EditDistance<Double> {

     /**
      * The default prefix length limit set to four.
      */
     private static final int PREFIX_LENGTH_LIMIT = 4;
     /**
      * Represents a failed index search.
      */
     public static final int INDEX_NOT_FOUND = -1;

     /**
      * Find the Jaro Winkler Distance which indicates the similarity score
      * between two CharSequences.
      *
      * <pre>
      * distance.apply(null, null)          = IllegalArgumentException
      * distance.apply("","")               = 0.0
      * distance.apply("","a")              = 0.0
      * distance.apply("aaapppp", "")       = 0.0
      * distance.apply("frog", "fog")       = 0.93
      * distance.apply("fly", "ant")        = 0.0
      * distance.apply("elephant", "hippo") = 0.44
      * distance.apply("hippo", "elephant") = 0.44
      * distance.apply("hippo", "zzzzzzzz") = 0.0
      * distance.apply("hello", "hallo")    = 0.88
      * distance.apply("ABC Corporation", "ABC Corp") = 0.91
      * distance.apply("D N H Enterprises Inc", "D &amp; H Enterprises, Inc.") = 0.93
      * distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94
      * distance.apply("PENNSYLVANIA", "PENNCISYLVNIA")    = 0.9
      * </pre>
      *
      * @param left the first String, must not be null
      * @param right the second String, must not be null
      * @return result distance
      * @throws IllegalArgumentException if either String input {@code null}
      */
     @Override
     public Double apply(CharSequence left, CharSequence right) {
         final double defaultScalingFactor = 0.1;
         final double percentageRoundValue = 100.0;

         if (left == null || right == null) {
             throw new IllegalArgumentException("Strings must not be null");
         }

         final double jaro = score(left, right);
         final int cl = commonPrefixLength(left, right);
         final double matchScore = Math.round((jaro + defaultScalingFactor
                 * cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue;

         return matchScore;
     }

     /**
      * Calculates the number of characters from the beginning of the strings
      * that match exactly one-to-one, up to a maximum of four (4) characters.
      *
      * @param first The first string.
      * @param second The second string.
      * @return A number between 0 and 4.
      */
     private static int commonPrefixLength(final CharSequence first,
             final CharSequence second) {
         final int result = getCommonPrefix(first.toString(), second.toString())
                 .length();

         // Limit the result to 4.
         return result > PREFIX_LENGTH_LIMIT ? PREFIX_LENGTH_LIMIT : result;
     }

     /**
      * Compares all Strings in an array and returns the initial sequence of
      * characters that is common to all of them.
      *
      * <p>
      * For example,
      * <code>getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) -&gt; "i am a "</code>
      * </p>
      *
      * <pre>
      * getCommonPrefix(null) = ""
      * getCommonPrefix(new String[] {}) = ""
      * getCommonPrefix(new String[] {"abc"}) = "abc"
      * getCommonPrefix(new String[] {null, null}) = ""
      * getCommonPrefix(new String[] {"", ""}) = ""
      * getCommonPrefix(new String[] {"", null}) = ""
      * getCommonPrefix(new String[] {"abc", null, null}) = ""
      * getCommonPrefix(new String[] {null, null, "abc"}) = ""
      * getCommonPrefix(new String[] {"", "abc"}) = ""
      * getCommonPrefix(new String[] {"abc", ""}) = ""
      * getCommonPrefix(new String[] {"abc", "abc"}) = "abc"
      * getCommonPrefix(new String[] {"abc", "a"}) = "a"
      * getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"
      * getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"
      * getCommonPrefix(new String[] {"abcde", "xyz"}) = ""
      * getCommonPrefix(new String[] {"xyz", "abcde"}) = ""
      * getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a "
      * </pre>
      *
      * @param strs array of String objects, entries may be null
      * @return the initial sequence of characters that are common to all Strings
      *         in the array; empty String if the array is null, the elements are
      *         all null or if there is no common prefix.
      */
     public static String getCommonPrefix(final String... strs) {
         if (strs == null || strs.length == 0) {
             return "";
         }
         final int smallestIndexOfDiff = indexOfDifference(strs);
         if (smallestIndexOfDiff == INDEX_NOT_FOUND) {
             // all strings were identical
             if (strs[0] == null) {
                 return "";
             }
             return strs[0];
         } else if (smallestIndexOfDiff == 0) {
             // there were no common initial characters
             return "";
         } else {
             // we found a common initial character sequence
             return strs[0].substring(0, smallestIndexOfDiff);
         }
     }

     /**
      * This method returns the Jaro-Winkler score for string matching.
      *
      * @param first the first string to be matched
      * @param second the second string to be machted
      * @return matching score without scaling factor impact
      */
     protected static double score(final CharSequence first,
             final CharSequence second) {
         String shorter;
         String longer;

         // Determine which String is longer.
         if (first.length() > second.length()) {
             longer = first.toString().toLowerCase();
             shorter = second.toString().toLowerCase();
         } else {
             longer = second.toString().toLowerCase();
             shorter = first.toString().toLowerCase();
         }

         // Calculate the half length() distance of the shorter String.
         final int halflength = shorter.length() / 2 + 1;

         // Find the set of matching characters between the shorter and longer
         // strings. Note that
         // the set of matching characters may be different depending on the
         // order of the strings.
         final String m1 = getSetOfMatchingCharacterWithin(shorter, longer,
                 halflength);
         final String m2 = getSetOfMatchingCharacterWithin(longer, shorter,
                 halflength);

         // If one or both of the sets of common characters is empty, then
         // there is no similarity between the two strings.
         if (m1.length() == 0 || m2.length() == 0) {
             return 0.0;
         }

         // If the set of common characters is not the same size, then
         // there is no similarity between the two strings, either.
         if (m1.length() != m2.length()) {
             return 0.0;
         }

         // Calculate the number of transposition between the two sets
         // of common characters.
         final int transpositions = transpositions(m1, m2);

         final double defaultDenominator = 3.0;

         // Calculate the distance.
         final double dist = (m1.length() / ((double) shorter.length())
                 + m2.length() / ((double) longer.length()) + (m1.length() - transpositions)
                 / ((double) m1.length())) / defaultDenominator;
         return dist;
     }

     /**
      * Calculates the number of transposition between two strings.
      *
      * @param first The first string.
      * @param second The second string.
      * @return The number of transposition between the two strings.
      */
     protected static int transpositions(final CharSequence first,
             final CharSequence second) {
         int transpositions = 0;
         for (int i = 0; i < first.length(); i++) {
             if (first.charAt(i) != second.charAt(i)) {
                 transpositions++;
             }
         }
         return transpositions / 2;
     }

     /**
      * Compares all CharSequences in an array and returns the index at which the
      * CharSequences begin to differ.
      *
      * <p>
      * For example,
      * <code>indexOfDifference(new String[] {"i am a machine", "i am a robot"}) -&gt; 7</code>
      * </p>
      *
      * <pre>
      * distance.indexOfDifference(null) = -1
      * distance.indexOfDifference(new String[] {}) = -1
      * distance.indexOfDifference(new String[] {"abc"}) = -1
      * distance.indexOfDifference(new String[] {null, null}) = -1
      * distance.indexOfDifference(new String[] {"", ""}) = -1
      * distance.indexOfDifference(new String[] {"", null}) = 0
      * distance.indexOfDifference(new String[] {"abc", null, null}) = 0
      * distance.indexOfDifference(new String[] {null, null, "abc"}) = 0
      * distance.indexOfDifference(new String[] {"", "abc"}) = 0
      * distance.indexOfDifference(new String[] {"abc", ""}) = 0
      * distance.indexOfDifference(new String[] {"abc", "abc"}) = -1
      * distance.indexOfDifference(new String[] {"abc", "a"}) = 1
      * distance.indexOfDifference(new String[] {"ab", "abxyz"}) = 2
      * distance.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2
      * distance.indexOfDifference(new String[] {"abcde", "xyz"}) = 0
      * distance.indexOfDifference(new String[] {"xyz", "abcde"}) = 0
      * distance.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7
      * </pre>
      *
      * @param css array of CharSequences, entries may be null
      * @return the index where the strings begin to differ; -1 if they are all
      *         equal
      */
     protected static int indexOfDifference(final CharSequence... css) {
         if (css == null || css.length <= 1) {
             return INDEX_NOT_FOUND;
         }
         boolean anyStringNull = false;
         boolean allStringsNull = true;
         final int arrayLen = css.length;
         int shortestStrLen = Integer.MAX_VALUE;
         int longestStrLen = 0;

         // find the min and max string lengths; this avoids checking to make
         // sure we are not exceeding the length of the string each time through
         // the bottom loop.
         for (int i = 0; i < arrayLen; i++) {
             if (css[i] == null) {
                 anyStringNull = true;
                 shortestStrLen = 0;
             } else {
                 allStringsNull = false;
                 shortestStrLen = Math.min(css[i].length(), shortestStrLen);
                 longestStrLen = Math.max(css[i].length(), longestStrLen);
             }
         }

         // handle lists containing all nulls or all empty strings
         if (allStringsNull || longestStrLen == 0 && !anyStringNull) {
             return INDEX_NOT_FOUND;
         }

         // handle lists containing some nulls or some empty strings
         if (shortestStrLen == 0) {
             return 0;
         }

         // find the position with the first difference across all strings
         int firstDiff = -1;
         for (int stringPos = 0; stringPos < shortestStrLen; stringPos++) {
             final char comparisonChar = css[0].charAt(stringPos);
             for (int arrayPos = 1; arrayPos < arrayLen; arrayPos++) {
                 if (css[arrayPos].charAt(stringPos) != comparisonChar) {
                     firstDiff = stringPos;
                     break;
                 }
             }
             if (firstDiff != -1) {
                 break;
             }
         }

         if (firstDiff == -1 && shortestStrLen != longestStrLen) {
             // we compared all of the characters up to the length of the
             // shortest string and didn't find a match, but the string lengths
             // vary, so return the length of the shortest string.
             return shortestStrLen;
         }
         return firstDiff;
     }

     /**
      * Gets a set of matching characters between two strings.
      *
      * <p>
      * Two characters from the first string and the second string are
      * considered matching if the character's respective positions are no
      * farther than the limit value.
      * </p>
      *
      * @param first The first string.
      * @param second The second string.
      * @param limit The maximum distance to consider.
      * @return A string contain the set of common characters.
      */
     protected static String getSetOfMatchingCharacterWithin(
             final CharSequence first, final CharSequence second, final int limit) {
         final StringBuilder common = new StringBuilder();
         final StringBuilder copy = new StringBuilder(second);

         for (int i = 0; i < first.length(); i++) {
             final char ch = first.charAt(i);
             boolean found = false;

             // See if the character is within the limit positions away from the
             // original position of that character.
             for (int j = Math.max(0, i - limit); !found
                     && j < Math.min(i + limit, second.length()); j++) {
                 if (copy.charAt(j) == ch) {
                     found = true;
                     common.append(ch);
                     copy.setCharAt(j, '*');
                 }
             }
         }
         return common.toString();
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.commons.text.similarity;

	/**
	* A similarity algorithm indicating the percentage of matched characters between two character sequences.
	*
	* <p>
	* The Jaro measure is the weighted sum of percentage of matched characters
	* from each file and transposed characters. Winkler increased this measure
	* for matching initial characters.
	* </p>
	*
	* <p>
	* This implementation is based on the Jaro Winkler similarity algorithm
	* from <a href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance">
	* http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>.
	* </p>
	*
	* <p>
	* This code has been adapted from Apache Commons Lang 3.3.
	* </p>
	*/
	public class JaroWrinklerSimilarity implements EditDistance<Double> {

	/**
	* The default prefix length limit set to four.
	*/
	private static final int PREFIX_LENGTH_LIMIT = 4;
	/**
	* Represents a failed index search.
	*/
	public static final int INDEX_NOT_FOUND = -1;

	/**
	* Find the Jaro Winkler Distance which indicates the similarity score
	* between two CharSequences.
	*
	* <pre>
	* distance.apply(null, null) = IllegalArgumentException
	* distance.apply("","") = 0.0
	* distance.apply("","a") = 0.0
	* distance.apply("aaapppp", "") = 0.0
	* distance.apply("frog", "fog") = 0.93
	* distance.apply("fly", "ant") = 0.0
	* distance.apply("elephant", "hippo") = 0.44
	* distance.apply("hippo", "elephant") = 0.44
	* distance.apply("hippo", "zzzzzzzz") = 0.0
	* distance.apply("hello", "hallo") = 0.88
	* distance.apply("ABC Corporation", "ABC Corp") = 0.91
	* distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.93
	* distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94
	* distance.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.9
	* </pre>
	*
	* @param left the first String, must not be null
	* @param right the second String, must not be null
	* @return result distance
	* @throws IllegalArgumentException if either String input {@code null}
	*/
	@Override
	public Double apply(CharSequence left, CharSequence right) {
	final double defaultScalingFactor = 0.1;
	final double percentageRoundValue = 100.0;

	if (left == null \|\| right == null) {
	throw new IllegalArgumentException("Strings must not be null");
	}

	final double jaro = score(left, right);
	final int cl = commonPrefixLength(left, right);
	final double matchScore = Math.round((jaro + defaultScalingFactor
	* cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue;

	return matchScore;
	}

	/**
	* Calculates the number of characters from the beginning of the strings
	* that match exactly one-to-one, up to a maximum of four (4) characters.
	*
	* @param first The first string.
	* @param second The second string.
	* @return A number between 0 and 4.
	*/
	private static int commonPrefixLength(final CharSequence first,
	final CharSequence second) {
	final int result = getCommonPrefix(first.toString(), second.toString())
	.length();

	// Limit the result to 4.
	return result > PREFIX_LENGTH_LIMIT ? PREFIX_LENGTH_LIMIT : result;
	}

	/**
	* Compares all Strings in an array and returns the initial sequence of
	* characters that is common to all of them.
	*
	* <p>
	* For example,
	* <code>getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) -> "i am a "</code>
	* </p>
	*
	* <pre>
	* getCommonPrefix(null) = ""
	* getCommonPrefix(new String[] {}) = ""
	* getCommonPrefix(new String[] {"abc"}) = "abc"
	* getCommonPrefix(new String[] {null, null}) = ""
	* getCommonPrefix(new String[] {"", ""}) = ""
	* getCommonPrefix(new String[] {"", null}) = ""
	* getCommonPrefix(new String[] {"abc", null, null}) = ""
	* getCommonPrefix(new String[] {null, null, "abc"}) = ""
	* getCommonPrefix(new String[] {"", "abc"}) = ""
	* getCommonPrefix(new String[] {"abc", ""}) = ""
	* getCommonPrefix(new String[] {"abc", "abc"}) = "abc"
	* getCommonPrefix(new String[] {"abc", "a"}) = "a"
	* getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"
	* getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"
	* getCommonPrefix(new String[] {"abcde", "xyz"}) = ""
	* getCommonPrefix(new String[] {"xyz", "abcde"}) = ""
	* getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a "
	* </pre>
	*
	* @param strs array of String objects, entries may be null
	* @return the initial sequence of characters that are common to all Strings
	* in the array; empty String if the array is null, the elements are
	* all null or if there is no common prefix.
	*/
	public static String getCommonPrefix(final String... strs) {
	if (strs == null \|\| strs.length == 0) {
	return "";
	}
	final int smallestIndexOfDiff = indexOfDifference(strs);
	if (smallestIndexOfDiff == INDEX_NOT_FOUND) {
	// all strings were identical
	if (strs[0] == null) {
	return "";
	}
	return strs[0];
	} else if (smallestIndexOfDiff == 0) {
	// there were no common initial characters
	return "";
	} else {
	// we found a common initial character sequence
	return strs[0].substring(0, smallestIndexOfDiff);
	}
	}

	/**
	* This method returns the Jaro-Winkler score for string matching.
	*
	* @param first the first string to be matched
	* @param second the second string to be machted
	* @return matching score without scaling factor impact
	*/
	protected static double score(final CharSequence first,
	final CharSequence second) {
	String shorter;
	String longer;

	// Determine which String is longer.
	if (first.length() > second.length()) {
	longer = first.toString().toLowerCase();
	shorter = second.toString().toLowerCase();
	} else {
	longer = second.toString().toLowerCase();
	shorter = first.toString().toLowerCase();
	}

	// Calculate the half length() distance of the shorter String.
	final int halflength = shorter.length() / 2 + 1;

	// Find the set of matching characters between the shorter and longer
	// strings. Note that
	// the set of matching characters may be different depending on the
	// order of the strings.
	final String m1 = getSetOfMatchingCharacterWithin(shorter, longer,
	halflength);
	final String m2 = getSetOfMatchingCharacterWithin(longer, shorter,
	halflength);

	// If one or both of the sets of common characters is empty, then
	// there is no similarity between the two strings.
	if (m1.length() == 0 \|\| m2.length() == 0) {
	return 0.0;
	}

	// If the set of common characters is not the same size, then
	// there is no similarity between the two strings, either.
	if (m1.length() != m2.length()) {
	return 0.0;
	}

	// Calculate the number of transposition between the two sets
	// of common characters.
	final int transpositions = transpositions(m1, m2);

	final double defaultDenominator = 3.0;

	// Calculate the distance.
	final double dist = (m1.length() / ((double) shorter.length())
	+ m2.length() / ((double) longer.length()) + (m1.length() - transpositions)
	/ ((double) m1.length())) / defaultDenominator;
	return dist;
	}

	/**
	* Calculates the number of transposition between two strings.
	*
	* @param first The first string.
	* @param second The second string.
	* @return The number of transposition between the two strings.
	*/
	protected static int transpositions(final CharSequence first,
	final CharSequence second) {
	int transpositions = 0;
	for (int i = 0; i < first.length(); i++) {
	if (first.charAt(i) != second.charAt(i)) {
	transpositions++;
	}
	}
	return transpositions / 2;
	}

	/**
	* Compares all CharSequences in an array and returns the index at which the
	* CharSequences begin to differ.
	*
	* <p>
	* For example,
	* <code>indexOfDifference(new String[] {"i am a machine", "i am a robot"}) -> 7</code>
	* </p>
	*
	* <pre>
	* distance.indexOfDifference(null) = -1
	* distance.indexOfDifference(new String[] {}) = -1
	* distance.indexOfDifference(new String[] {"abc"}) = -1
	* distance.indexOfDifference(new String[] {null, null}) = -1
	* distance.indexOfDifference(new String[] {"", ""}) = -1
	* distance.indexOfDifference(new String[] {"", null}) = 0
	* distance.indexOfDifference(new String[] {"abc", null, null}) = 0
	* distance.indexOfDifference(new String[] {null, null, "abc"}) = 0
	* distance.indexOfDifference(new String[] {"", "abc"}) = 0
	* distance.indexOfDifference(new String[] {"abc", ""}) = 0
	* distance.indexOfDifference(new String[] {"abc", "abc"}) = -1
	* distance.indexOfDifference(new String[] {"abc", "a"}) = 1
	* distance.indexOfDifference(new String[] {"ab", "abxyz"}) = 2
	* distance.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2
	* distance.indexOfDifference(new String[] {"abcde", "xyz"}) = 0
	* distance.indexOfDifference(new String[] {"xyz", "abcde"}) = 0
	* distance.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7
	* </pre>
	*
	* @param css array of CharSequences, entries may be null
	* @return the index where the strings begin to differ; -1 if they are all
	* equal
	*/
	protected static int indexOfDifference(final CharSequence... css) {
	if (css == null \|\| css.length <= 1) {
	return INDEX_NOT_FOUND;
	}
	boolean anyStringNull = false;
	boolean allStringsNull = true;
	final int arrayLen = css.length;
	int shortestStrLen = Integer.MAX_VALUE;
	int longestStrLen = 0;

	// find the min and max string lengths; this avoids checking to make
	// sure we are not exceeding the length of the string each time through
	// the bottom loop.
	for (int i = 0; i < arrayLen; i++) {
	if (css[i] == null) {
	anyStringNull = true;
	shortestStrLen = 0;
	} else {
	allStringsNull = false;
	shortestStrLen = Math.min(css[i].length(), shortestStrLen);
	longestStrLen = Math.max(css[i].length(), longestStrLen);
	}
	}

	// handle lists containing all nulls or all empty strings
	if (allStringsNull \|\| longestStrLen == 0 && !anyStringNull) {
	return INDEX_NOT_FOUND;
	}

	// handle lists containing some nulls or some empty strings
	if (shortestStrLen == 0) {
	return 0;
	}

	// find the position with the first difference across all strings
	int firstDiff = -1;
	for (int stringPos = 0; stringPos < shortestStrLen; stringPos++) {
	final char comparisonChar = css[0].charAt(stringPos);
	for (int arrayPos = 1; arrayPos < arrayLen; arrayPos++) {
	if (css[arrayPos].charAt(stringPos) != comparisonChar) {
	firstDiff = stringPos;
	break;
	}
	}
	if (firstDiff != -1) {
	break;
	}
	}

	if (firstDiff == -1 && shortestStrLen != longestStrLen) {
	// we compared all of the characters up to the length of the
	// shortest string and didn't find a match, but the string lengths
	// vary, so return the length of the shortest string.
	return shortestStrLen;
	}
	return firstDiff;
	}

	/**
	* Gets a set of matching characters between two strings.
	*
	* <p>
	* Two characters from the first string and the second string are
	* considered matching if the character's respective positions are no
	* farther than the limit value.
	* </p>
	*
	* @param first The first string.
	* @param second The second string.
	* @param limit The maximum distance to consider.
	* @return A string contain the set of common characters.
	*/
	protected static String getSetOfMatchingCharacterWithin(
	final CharSequence first, final CharSequence second, final int limit) {
	final StringBuilder common = new StringBuilder();
	final StringBuilder copy = new StringBuilder(second);

	for (int i = 0; i < first.length(); i++) {
	final char ch = first.charAt(i);
	boolean found = false;

	// See if the character is within the limit positions away from the
	// original position of that character.
	for (int j = Math.max(0, i - limit); !found
	&& j < Math.min(i + limit, second.length()); j++) {
	if (copy.charAt(j) == ch) {
	found = true;
	common.append(ch);
	copy.setCharAt(j, '*');
	}
	}
	}
	return common.toString();
	}

	}