blob: e7b67f4d73e75aba47ab5beafce2549a8a674954 [file] [log] [blame]
package joshua.util;
public final class Algorithms {
/**
* Calculates the Levenshtein Distance for a candidate paraphrase given the source.
*
* The code is based on the example by Michael Gilleland found at
* http://www.merriampark.com/ld.htm.
*
*/
public static final int levenshtein(String[] candidate, String[] source) {
// First check to see whether either of the arrays
// is empty, in which case the least cost is simply
// the length of the other array (which would correspond
// to inserting that many elements.
if (source.length == 0) return candidate.length;
if (candidate.length == 0) return source.length;
// Initialize a table to the minimum edit distances between
// any two points in the arrays. The size of the table is set
// to be one beyond the lengths of the two arrays, and the first
// row and first column are set to be zero to avoid complicated
// checks for out of bounds exceptions.
int distances[][] = new int[source.length + 1][candidate.length + 1];
for (int i = 0; i <= source.length; i++)
distances[i][0] = i;
for (int j = 0; j <= candidate.length; j++)
distances[0][j] = j;
// Walk through each item in the source and target arrays
// and find the minimum cost to move from the previous points
// to here.
for (int i = 1; i <= source.length; i++) {
Object sourceItem = source[i - 1];
for (int j = 1; j <= candidate.length; j++) {
Object targetItem = candidate[j - 1];
int cost;
if (sourceItem.equals(targetItem))
cost = 0;
else
cost = 1;
int deletionCost = distances[i - 1][j] + 1;
int insertionCost = distances[i][j - 1] + 1;
int substitutionCost = distances[i - 1][j - 1] + cost;
distances[i][j] = minimum(insertionCost, deletionCost, substitutionCost);
}
}
// The point at the end will be the minimum edit distance.
return distances[source.length][candidate.length];
}
/**
* Returns the minimum of the three values.
*/
private static final int minimum(int a, int b, int c) {
int minimum;
minimum = a;
if (b < minimum) minimum = b;
if (c < minimum) minimum = c;
return minimum;
}
}