SANDBOX-488 Use an interface for EditDistance, and leave separate classes as utility objects
diff --git a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
index f9fcf39..98ef49e 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
@@ -28,9 +28,9 @@
* <p>It utilizes the CosineSimilarity to compute the distance. Character sequences
* are converted into vectors through a simple tokenizer that works with </p>
*
- * @since 0.1
+ * @since 1.0
*/
-public class CosineDistance implements StringMetric<Double> {
+public class CosineDistance implements EditDistance<Double> {
/**
* Tokenizer used to convert the character sequence into a vector.
*/
diff --git a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
index 4b29a04..cf21186 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
@@ -29,7 +29,7 @@
* http://en.wikipedia.org/wiki/Cosine_similarity.
* </p>
*
- * @since 0.1
+ * @since 1.0
*/
public class CosineSimilarity {
diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetric.java b/src/main/java/org/apache/commons/text/similarity/EditDistance.java
similarity index 78%
rename from src/main/java/org/apache/commons/text/similarity/StringMetric.java
rename to src/main/java/org/apache/commons/text/similarity/EditDistance.java
index 2d1adfa..824522a 100644
--- a/src/main/java/org/apache/commons/text/similarity/StringMetric.java
+++ b/src/main/java/org/apache/commons/text/similarity/EditDistance.java
@@ -17,11 +17,11 @@
package org.apache.commons.text.similarity;
/**
- * Interface for <a href='http://en.wikipedia.org/wiki/String_metric'>String Metrics</a>.
+ * Interface for <a href="http://en.wikipedia.org/wiki/Edit_distance">Edit Distances</a>.
*
* <p>
- * A string metric measures the similarity between two character sequences. Depending on
- * the algorithm, higher values can mean closer strings, or more distant strings.
+ * A edit distance measures the similarity between two character sequences. Closer strings
+ * have shorter distances, and vice-versa.
* </p>
*
* <p>
@@ -31,9 +31,10 @@
* and returns an <code>R</code> type similarity score.
* </p>
*
- * @param <R> The type of similarity score unit used by this StringMetric.
+ * @param <R> The type of similarity score unit used by this EditDistance.
+ * @since 1.0
*/
-public interface StringMetric<R> {
+public interface EditDistance<R> {
/**
* Compares two CharSequences.
diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java
similarity index 66%
rename from src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java
rename to src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java
index 3b2a871..710eace 100644
--- a/src/main/java/org/apache/commons/text/similarity/StringMetricFrom.java
+++ b/src/main/java/org/apache/commons/text/similarity/EditDistanceFrom.java
@@ -18,7 +18,7 @@
/**
* <p>
- * This stores a {@link StringMetric} implementation and a {@link CharSequence} "left" string.
+ * This stores a {@link EditDistance} implementation and a {@link CharSequence} "left" string.
* The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the
* comparison function for the pair of strings.
* </p>
@@ -27,15 +27,15 @@
* The following is an example which finds the most similar string:
* </p>
* <pre>
- * StringMetric<Integer> metric = new LevenshteinDistance();
+ * EditDistance<Integer> editDistance = new LevenshteinDistance();
* String target = "Apache";
- * StringMetricFrom<Integer> metricFrom =
- * new StringMetricFrom<Integer>(metric, target);
+ * EditDistanceFrom<Integer> editDistanceFrom =
+ * new EditDistanceFrom<Integer>(editDistance, target);
* String mostSimilar = null;
* Integer shortestDistance = null;
*
* for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
- * Integer distance = metricFrom.apply(test);
+ * Integer distance = editDistanceFrom.apply(test);
* if (shortestDistance == null || distance < shortestDistance) {
* shortestDistance = distance;
* mostSimilar = test;
@@ -47,47 +47,48 @@
* + "its distance is only " + shortestDistance + ".");
* </pre>
*
- * @param <R> This is the type of similarity score used by the StringMetric function.
+ * @param <R> This is the type of similarity score used by the EditDistance function.
+ * @since 1.0
*/
-public class StringMetricFrom<R> {
+public class EditDistanceFrom<R> {
/**
- * String metric.
+ * Edit distance.
*/
- private final StringMetric<R> metric;
+ private final EditDistance<R> editDistance;
/**
* Left parameter used in distance function.
*/
private final CharSequence left;
/**
- * <p>This accepts the metric implementation and the "left" string.</p>
+ * <p>This accepts the edit distance implementation and the "left" string.</p>
*
- * @param metric This may not be null.
+ * @param editDistance This may not be null.
* @param left This may be null here,
- * but the StringMetric#compare(CharSequence left, CharSequence right)
+ * but the EditDistance#compare(CharSequence left, CharSequence right)
* implementation may not accept nulls.
*/
- public StringMetricFrom(final StringMetric<R> metric, final CharSequence left) {
- if (metric == null) {
- throw new IllegalArgumentException("The metric may not be null.");
+ public EditDistanceFrom(final EditDistance<R> editDistance, final CharSequence left) {
+ if (editDistance == null) {
+ throw new IllegalArgumentException("The edit distance may not be null.");
}
- this.metric = metric;
+ this.editDistance = editDistance;
this.left = left;
}
/**
* <p>
* This compares "left" field against the "right" parameter
- * using the "metric" implementation.
+ * using the "edit distance" implementation.
* </p>
*
* @param right the second CharSequence
* @return the similarity score between two CharSequences
*/
public R apply(CharSequence right) {
- return metric.apply(left, right);
+ return editDistance.apply(left, right);
}
/**
@@ -100,12 +101,12 @@
}
/**
- * Gets the right parameter.
+ * Gets the edit distance.
*
- * @return the right parameter
+ * @return the edit distance
*/
- public StringMetric<R> getMetric() {
- return metric;
+ public EditDistance<R> getEditDistance() {
+ return editDistance;
}
}
diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
index 73b282a..32b557a 100644
--- a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
+++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java
@@ -30,8 +30,10 @@
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
+ *
+ * @since 1.0
*/
-public class FuzzyScore implements StringMetric<Integer> {
+public class FuzzyScore {
/**
* Locale used to change the case of text.
@@ -61,14 +63,14 @@
* </p>
*
* <pre>
- * score.apply(null, null, null) = IllegalArgumentException
- * score.apply("", "", Locale.ENGLISH) = 0
- * score.apply("Workshop", "b", Locale.ENGLISH) = 0
- * score.apply("Room", "o", Locale.ENGLISH) = 1
- * score.apply("Workshop", "w", Locale.ENGLISH) = 1
- * score.apply("Workshop", "ws", Locale.ENGLISH) = 2
- * score.apply("Workshop", "wo", Locale.ENGLISH) = 4
- * score.apply("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
+ * score.fuzzyScore(null, null, null) = IllegalArgumentException
+ * score.fuzzyScore("", "", Locale.ENGLISH) = 0
+ * score.fuzzyScore("Workshop", "b", Locale.ENGLISH) = 0
+ * score.fuzzyScore("Room", "o", Locale.ENGLISH) = 1
+ * score.fuzzyScore("Workshop", "w", Locale.ENGLISH) = 1
+ * score.fuzzyScore("Workshop", "ws", Locale.ENGLISH) = 2
+ * score.fuzzyScore("Workshop", "wo", Locale.ENGLISH) = 4
+ * score.fuzzyScore("Apache Software Foundation", "asf", Locale.ENGLISH) = 3
* </pre>
*
* @param term a full term that should be matched against, must not be null
@@ -78,8 +80,7 @@
* @throws IllegalArgumentException if either String input {@code null} or
* Locale input {@code null}
*/
- @Override
- public Integer apply(CharSequence term, CharSequence query) {
+ public Integer fuzzyScore(CharSequence term, CharSequence query) {
if (term == null || query == null) {
throw new IllegalArgumentException("Strings must not be null");
}
diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
index 94d0aad..a62cfa5 100644
--- a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java
@@ -24,8 +24,10 @@
* For further explanation about the Hamming Distance, take a look at its
* Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance.
* </p>
+ *
+ * @since 1.0
*/
-public class HammingDistance implements StringMetric<Integer> {
+public class HammingDistance implements EditDistance<Integer> {
/**
* Find the Hamming Distance between two strings with the same
diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
index b96b83b..df9d6b2 100644
--- a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java
@@ -34,8 +34,10 @@
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
+ *
+ * @since 1.0
*/
-public class JaroWrinklerDistance implements StringMetric<Double> {
+public class JaroWrinklerDistance implements EditDistance<Double> {
/**
* The default prefix length limit set to four.
@@ -83,8 +85,8 @@
final double jaro = score(left, right);
final int cl = commonPrefixLength(left, right);
- final double matchScore = Math.round((jaro + (defaultScalingFactor
- * cl * (1.0 - jaro))) * percentageRoundValue) / percentageRoundValue;
+ final double matchScore = Math.round((jaro + defaultScalingFactor
+ * cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue;
return matchScore;
}
diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
index f776cce..d94fa47 100644
--- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java
@@ -30,8 +30,10 @@
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
+ *
+ * @since 1.0
*/
-public class LevenshteinDistance implements StringMetric<Integer> {
+public class LevenshteinDistance implements EditDistance<Integer> {
/**
* Default instance.
diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
index 44c2eeb..60bc802 100644
--- a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java
@@ -31,29 +31,29 @@
@Test
public void testGetFuzzyScore() throws Exception {
- assertEquals(0, (int) ENGLISH_SCORE.apply("", ""));
- assertEquals(0, (int) ENGLISH_SCORE.apply("Workshop", "b"));
- assertEquals(1, (int) ENGLISH_SCORE.apply("Room", "o"));
- assertEquals(1, (int) ENGLISH_SCORE.apply("Workshop", "w"));
- assertEquals(2, (int) ENGLISH_SCORE.apply("Workshop", "ws"));
- assertEquals(4, (int) ENGLISH_SCORE.apply("Workshop", "wo"));
- assertEquals(3, (int) ENGLISH_SCORE.apply(
+ assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("", ""));
+ assertEquals(0, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "b"));
+ assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Room", "o"));
+ assertEquals(1, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "w"));
+ assertEquals(2, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "ws"));
+ assertEquals(4, (int) ENGLISH_SCORE.fuzzyScore("Workshop", "wo"));
+ assertEquals(3, (int) ENGLISH_SCORE.fuzzyScore(
"Apache Software Foundation", "asf"));
}
@Test(expected = IllegalArgumentException.class)
public void testGetFuzzyScore_StringNullLocale() throws Exception {
- ENGLISH_SCORE.apply("not null", null);
+ ENGLISH_SCORE.fuzzyScore("not null", null);
}
@Test(expected = IllegalArgumentException.class)
public void testGetFuzzyScore_NullStringLocale() throws Exception {
- ENGLISH_SCORE.apply(null, "not null");
+ ENGLISH_SCORE.fuzzyScore(null, "not null");
}
@Test(expected = IllegalArgumentException.class)
public void testGetFuzzyScore_NullNullLocale() throws Exception {
- ENGLISH_SCORE.apply(null, null);
+ ENGLISH_SCORE.fuzzyScore(null, null);
}
@Test(expected = IllegalArgumentException.class)
diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
similarity index 82%
rename from src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java
rename to src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
index 36c03bb..5a4d6d1 100644
--- a/src/test/java/org/apache/commons/text/similarity/ParameterizedStringMetricFromTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedEditDistanceFromTest.java
@@ -27,24 +27,24 @@
import org.junit.runners.Parameterized.Parameters;
/**
- * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}.
+ * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}.
*
- * @param <R> The {@link StringMetric} return type.
+ * @param <R> The {@link EditDistance} return type.
*/
@RunWith(Parameterized.class)
-public class ParameterizedStringMetricFromTest<R> {
+public class ParameterizedEditDistanceFromTest<R> {
- private final StringMetric<R> metric;
+ private final EditDistance<R> editDistance;
private final CharSequence left;
private final CharSequence right;
private final R distance;
- public ParameterizedStringMetricFromTest(
- final StringMetric<R> metric,
+ public ParameterizedEditDistanceFromTest(
+ final EditDistance<R> editDistance,
final CharSequence left, final CharSequence right,
final R distance) {
- this.metric = metric;
+ this.editDistance = editDistance;
this.left = left;
this.right = right;
this.distance = distance;
@@ -70,7 +70,7 @@
{ new LevenshteinDistance(), "go", "go", 0 },
{
- new StringMetric<Boolean>() {
+ new EditDistance<Boolean>() {
public Boolean apply(CharSequence left, CharSequence right) {
return left == right || (left != null && left.equals(right));
}
@@ -85,8 +85,8 @@
@Test
public void test() {
- StringMetricFrom<R> metricFrom = new StringMetricFrom<R>(metric, left);
- assertThat(metricFrom.apply(right), equalTo(distance));
+ EditDistanceFrom<R> editDistanceFrom = new EditDistanceFrom<R>(editDistance, left);
+ assertThat(editDistanceFrom.apply(right), equalTo(distance));
}
}
diff --git a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
index e268366..de59452 100644
--- a/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/StringMetricFromTest.java
@@ -22,17 +22,17 @@
import org.junit.Test;
/**
- * Unit tests for {@link org.apache.commons.text.similarity.StringMetricFrom}.
+ * Unit tests for {@link org.apache.commons.text.similarity.EditDistanceFrom}.
*/
public class StringMetricFromTest {
@Test
public void testEquivalence() {
- StringMetric<Integer> metric = new LevenshteinDistance();
+ EditDistance<Integer> metric = new LevenshteinDistance();
String left = "Apache";
String right = "a patchy";
Integer distance = 4;
- StringMetricFrom<Integer> metricFrom = new StringMetricFrom<Integer>(metric, left);
+ EditDistanceFrom<Integer> metricFrom = new EditDistanceFrom<Integer>(metric, left);
assertThat(metricFrom.apply(right), equalTo(distance));
assertThat(metricFrom.apply(right), equalTo(metric.apply(left, right)));
@@ -40,10 +40,10 @@
@Test
public void testJavadocExample() {
- StringMetric<Integer> metric = new LevenshteinDistance();
+ EditDistance<Integer> metric = new LevenshteinDistance();
String target = "Apache";
- StringMetricFrom<Integer> metricFrom =
- new StringMetricFrom<Integer>(metric, target);
+ EditDistanceFrom<Integer> metricFrom =
+ new EditDistanceFrom<Integer>(metric, target);
String mostSimilar = null;
Integer shortestDistance = null;
@@ -65,7 +65,7 @@
@Test(expected = IllegalArgumentException.class)
public void testMissingMetric() {
- new StringMetricFrom<Number>(null, "no go");
+ new EditDistanceFrom<Number>(null, "no go");
}
}