SANDBOX-488 Use an interface for EditDistance, and leave separate classes as utility objects
diff --git a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java index f9fcf39..98ef49e 100644 --- a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
@@ -28,9 +28,9 @@ * <p>It utilizes the CosineSimilarity to compute the distance. Character sequences * are converted into vectors through a simple tokenizer that works with </p> * - * @since 0.1 + * @since 1.0 */ -public class CosineDistance implements StringMetric<Double> { +public class CosineDistance implements EditDistance<Double> { /** * Tokenizer used to convert the character sequence into a vector. */
diff --git a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java index 4b29a04..cf21186 100644 --- a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java +++ b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
@@ -29,7 +29,7 @@ * http://en.wikipedia.org/wiki/Cosine_similarity. * </p> * - * @since 0.1 + * @since 1.0 */ public class CosineSimilarity {
diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetric.java b/src/main/java/org/apache/commons/text/similarity/EditDistance.java similarity index 78% rename from src/main/java/org/apache/commons/text/similarity/StringMetric.java rename to src/main/java/org/apache/commons/text/similarity/EditDistance.java index 2d1adfa..824522a 100644 --- a/src/main/java/org/apache/commons/text/similarity/StringMetric.java +++ b/src/main/java/org/apache/commons/text/similarity/EditDistance.java
@@ -17,11 +17,11 @@ package org.apache.commons.text.similarity; /** - * Interface for <a href='http://en.wikipedia.org/wiki/String_metric'>String Metrics</a>. + * Interface for <a href="http://en.wikipedia.org/wiki/Edit_distance">Edit Distances</a>. * * <p> - * A string metric measures the similarity between two character sequences. Depending on - * the algorithm, higher values can mean closer strings, or more distant strings. + * A edit distance measures the similarity between two character sequences. Closer strings + * have shorter distances, and vice-versa. * </p> * * <p> @@ -31,9 +31,10 @@ * and returns an <code>R</code> type similarity score. * </p> * - * @param <R> The type of similarity score unit used by this StringMetric. + * @param <R> The type of similarity score unit used by this EditDistance. + * @since 1.0 */ -public interface StringMetric<R> { +public interface EditDistance<R> { /** * Compares two CharSequences.
diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java similarity index 66% rename from src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java rename to src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java index 3b2a871..710eace 100644 --- a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java +++ b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java
@@ -18,7 +18,7 @@ /** * <p> - * This stores a {@link StringMetric} implementation and a {@link CharSequence} "left" string. + * This stores a {@link EditDistance} implementation and a {@link CharSequence} "left" string. * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the * comparison function for the pair of strings. * </p> @@ -27,15 +27,15 @@ * The following is an example which finds the most similar string: * </p> * <pre> - * StringMetric<Integer> metric = new LevenshteinDistance(); + * EditDistance<Integer> editDistance = new LevenshteinDistance(); * String target = "Apache"; - * StringMetricFrom<Integer> metricFrom = - * new StringMetricFrom<Integer>(metric, target); + * EditDistanceFrom<Integer> editDistanceFrom = + * new EditDistanceFrom<Integer>(editDistance, target); * String mostSimilar = null; * Integer shortestDistance = null; * * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) { - * Integer distance = metricFrom.apply(test); + * Integer distance = editDistanceFrom.apply(test); * if (shortestDistance == null || distance < shortestDistance) { * shortestDistance = distance; * mostSimilar = test; @@ -47,47 +47,48 @@ * + "its distance is only " + shortestDistance + "."); * </pre> * - * @param <R> This is the type of similarity score used by the StringMetric function. + * @param <R> This is the type of similarity score used by the EditDistance function. + * @since 1.0 */ -public class StringMetricFrom<R> { +public class EditDistanceFrom<R> { /** - * String metric. + * Edit distance. */ - private final StringMetric<R> metric; + private final EditDistance<R> editDistance; /** * Left parameter used in distance function. */ private final CharSequence left; /** - * <p>This accepts the metric implementation and the "left" string.</p> + * <p>This accepts the edit distance implementation and the "left" string.</p> * - * @param metric This may not be null. + * @param editDistance This may not be null. * @param left This may be null here, - * but the StringMetric#compare(CharSequence left, CharSequence right) + * but the EditDistance#compare(CharSequence left, CharSequence right) * implementation may not accept nulls. */ - public StringMetricFrom(final StringMetric<R> metric, final CharSequence left) { - if (metric == null) { - throw new IllegalArgumentException("The metric may not be null."); + public EditDistanceFrom(final EditDistance<R> editDistance, final CharSequence left) { + if (editDistance == null) { + throw new IllegalArgumentException("The edit distance may not be null."); } - this.metric = metric; + this.editDistance = editDistance; this.left = left; } /** * <p> * This compares "left" field against the "right" parameter - * using the "metric" implementation. + * using the "edit distance" implementation. * </p> * * @param right the second CharSequence * @return the similarity score between two CharSequences */ public R apply(CharSequence right) { - return metric.apply(left, right); + return editDistance.apply(left, right); } /** @@ -100,12 +101,12 @@ } /** - * Gets the right parameter. + * Gets the edit distance. * - * @return the right parameter + * @return the edit distance */ - public StringMetric<R> getMetric() { - return metric; + public EditDistance<R> getEditDistance() { + return editDistance; } }
diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java index 73b282a..32b557a 100644 --- a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
@@ -30,8 +30,10 @@ * <p> * This code has been adapted from Apache Commons Lang 3.3. * </p> + * + * @since 1.0 */ -public class FuzzyScore implements StringMetric<Integer> { +public class FuzzyScore { /** * Locale used to change the case of text. @@ -61,14 +63,14 @@ * </p> * * <pre> - * score.apply(null, null, null) = IllegalArgumentException - * score.apply("", "", Locale.ENGLISH) = 0 - * score.apply("Workshop", "b", Locale.ENGLISH) = 0 - * score.apply("Room", "o", Locale.ENGLISH) = 1 - * score.apply("Workshop", "w", Locale.ENGLISH) = 1 - * score.apply("Workshop", "ws", Locale.ENGLISH) = 2 - * score.apply("Workshop", "wo", Locale.ENGLISH) = 4 - * score.apply("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 + * score.fuzzyScore(null, null, null) = IllegalArgumentException + * score.fuzzyScore("", "", Locale.ENGLISH) = 0 + * score.fuzzyScore("Workshop", "b", Locale.ENGLISH) = 0 + * score.fuzzyScore("Room", "o", Locale.ENGLISH) = 1 + * score.fuzzyScore("Workshop", "w", Locale.ENGLISH) = 1 + * score.fuzzyScore("Workshop", "ws", Locale.ENGLISH) = 2 + * score.fuzzyScore("Workshop", "wo", Locale.ENGLISH) = 4 + * score.fuzzyScore("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 * </pre> * * @param term a full term that should be matched against, must not be null @@ -78,8 +80,7 @@ * @throws IllegalArgumentException if either String input {@code null} or * Locale input {@code null} */ - @Override - public Integer apply(CharSequence term, CharSequence query) { + public Integer fuzzyScore(CharSequence term, CharSequence query) { if (term == null || query == null) { throw new IllegalArgumentException("Strings must not be null"); }
diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java index 94d0aad..a62cfa5 100644 --- a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
@@ -24,8 +24,10 @@ * For further explanation about the Hamming Distance, take a look at its * Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance. * </p> + * + * @since 1.0 */ -public class HammingDistance implements StringMetric<Integer> { +public class HammingDistance implements EditDistance<Integer> { /** * Find the Hamming Distance between two strings with the same
diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java index b96b83b..df9d6b2 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
@@ -34,8 +34,10 @@ * <p> * This code has been adapted from Apache Commons Lang 3.3. * </p> + * + * @since 1.0 */ -public class JaroWrinklerDistance implements StringMetric<Double> { +public class JaroWrinklerDistance implements EditDistance<Double> { /** * The default prefix length limit set to four. @@ -83,8 +85,8 @@ final double jaro = score(left, right); final int cl = commonPrefixLength(left, right); - final double matchScore = Math.round((jaro + (defaultScalingFactor - * cl * (1.0 - jaro))) * percentageRoundValue) / percentageRoundValue; + final double matchScore = Math.round((jaro + defaultScalingFactor + * cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue; return matchScore; }
diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java index f776cce..d94fa47 100644 --- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
@@ -30,8 +30,10 @@ * <p> * This code has been adapted from Apache Commons Lang 3.3. * </p> + * + * @since 1.0 */ -public class LevenshteinDistance implements StringMetric<Integer> { +public class LevenshteinDistance implements EditDistance<Integer> { /** * Default instance.
diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java index 44c2eeb..60bc802 100644 --- a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java +++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
@@ -31,29 +31,29 @@ @Test public void testGetFuzzyScore() throws Exception { - assertEquals(0, (int) ENGLISH_SCORE.apply("", "")); - assertEquals(0, (int) ENGLISH_SCORE.apply("Workshop", "b")); - assertEquals(1, (int) ENGLISH_SCORE.apply("Room", "o")); - assertEquals(1, (int) ENGLISH_SCORE.apply("Workshop", "w")); - assertEquals(2, (int) ENGLISH_SCORE.apply("Workshop", "ws")); - assertEquals(4, (int) ENGLISH_SCORE.apply("Workshop", "wo")); - assertEquals(3, (int) ENGLISH_SCORE.apply( + assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("", "")); + assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "b")); + assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Room", "o")); + assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "w")); + assertEquals(2, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "ws")); + assertEquals(4, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "wo")); + assertEquals(3, (int) ENGLISH_SCORE.fuzzyScore( "Apache Software Foundation", "asf")); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_StringNullLocale() throws Exception { - ENGLISH_SCORE.apply("not null", null); + ENGLISH_SCORE.fuzzyScore("not null", null); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_NullStringLocale() throws Exception { - ENGLISH_SCORE.apply(null, "not null"); + ENGLISH_SCORE.fuzzyScore(null, "not null"); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_NullNullLocale() throws Exception { - ENGLISH_SCORE.apply(null, null); + ENGLISH_SCORE.fuzzyScore(null, null); } @Test(expected = IllegalArgumentException.class)
diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java similarity index 82% rename from src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java rename to src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java index 36c03bb..5a4d6d1 100644 --- a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java +++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
@@ -27,24 +27,24 @@ import org.junit.runners.Parameterized.Parameters; /** - * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}. + * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}. * - * @param <R> The {@link StringMetric} return type. + * @param <R> The {@link EditDistance} return type. */ @RunWith(Parameterized.class) -public class ParameterizedStringMetricFromTest<R> { +public class ParameterizedEditDistanceFromTest<R> { - private final StringMetric<R> metric; + private final EditDistance<R> editDistance; private final CharSequence left; private final CharSequence right; private final R distance; - public ParameterizedStringMetricFromTest( - final StringMetric<R> metric, + public ParameterizedEditDistanceFromTest( + final EditDistance<R> editDistance, final CharSequence left, final CharSequence right, final R distance) { - this.metric = metric; + this.editDistance = editDistance; this.left = left; this.right = right; this.distance = distance; @@ -70,7 +70,7 @@ { new LevenshteinDistance(), "go", "go", 0 }, { - new StringMetric<Boolean>() { + new EditDistance<Boolean>() { public Boolean apply(CharSequence left, CharSequence right) { return left == right || (left != null && left.equals(right)); } @@ -85,8 +85,8 @@ @Test public void test() { - StringMetricFrom<R> metricFrom = new StringMetricFrom<R>(metric, left); - assertThat(metricFrom.apply(right), equalTo(distance)); + EditDistanceFrom<R> editDistanceFrom = new EditDistanceFrom<R>(editDistance, left); + assertThat(editDistanceFrom.apply(right), equalTo(distance)); } }
diff --git a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java index e268366..de59452 100644 --- a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java +++ b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
@@ -22,17 +22,17 @@ import org.junit.Test; /** - * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}. + * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}. */ public class StringMetricFromTest { @Test public void testEquivalence() { - StringMetric<Integer> metric = new LevenshteinDistance(); + EditDistance<Integer> metric = new LevenshteinDistance(); String left = "Apache"; String right = "a patchy"; Integer distance = 4; - StringMetricFrom<Integer> metricFrom = new StringMetricFrom<Integer>(metric, left); + EditDistanceFrom<Integer> metricFrom = new EditDistanceFrom<Integer>(metric, left); assertThat(metricFrom.apply(right), equalTo(distance)); assertThat(metricFrom.apply(right), equalTo(metric.apply(left, right))); @@ -40,10 +40,10 @@ @Test public void testJavadocExample() { - StringMetric<Integer> metric = new LevenshteinDistance(); + EditDistance<Integer> metric = new LevenshteinDistance(); String target = "Apache"; - StringMetricFrom<Integer> metricFrom = - new StringMetricFrom<Integer>(metric, target); + EditDistanceFrom<Integer> metricFrom = + new EditDistanceFrom<Integer>(metric, target); String mostSimilar = null; Integer shortestDistance = null; @@ -65,7 +65,7 @@ @Test(expected = IllegalArgumentException.class) public void testMissingMetric() { - new StringMetricFrom<Number>(null, "no go"); + new EditDistanceFrom<Number>(null, "no go"); } }