[TEXT-158]: empty strings must have similarity of 1, and distance of 0 (i.e. identical)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 109d740..a88d0f1 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,6 +45,7 @@
</properties>
<body>
<release version="1.9.1" date="202Y-MM-DD" description="Release 1.9.1. Requires Java 8.">
+ <action issue="TEXT-158" type="fix" dev="kinow">Incorrect values for Jaccard similarity with empty strings</action>
<action issue="TEXT-185" type="add" dev="ggregory" due-to="Larry West, Gary Gregory">Release Notes page hasn't been updated for 1.9 release yet.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Update spotbugs.plugin.version 4.0.0 to 4.0.4.</action>
</release>
diff --git a/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java b/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java
index d1478cb..4f29139 100644
--- a/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java
+++ b/src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java
@@ -64,6 +64,9 @@
private Double calculateJaccardSimilarity(final CharSequence left, final CharSequence right) {
final int leftLength = left.length();
final int rightLength = right.length();
+ if (leftLength == 0 && rightLength == 0) {
+ return 1d;
+ }
if (leftLength == 0 || rightLength == 0) {
return 0d;
}
diff --git a/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java
index 979354f..7b43665 100644
--- a/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/JaccardDistanceTest.java
@@ -37,7 +37,7 @@
@Test
public void testGettingJaccardDistance() {
// Expected Jaccard distance = 1.0 - (intersect / union)
- assertEquals(1.0, classBeingTested.apply("", ""));
+ assertEquals(0.0, classBeingTested.apply("", ""));
assertEquals(1.0, classBeingTested.apply("left", ""));
assertEquals(1.0, classBeingTested.apply("", "right"));
assertEquals(1.0 - (3.0 / 4), classBeingTested.apply("frog", "fog"));
diff --git a/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java b/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java
index bb46122..827a35f 100644
--- a/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java
+++ b/src/test/java/org/apache/commons/text/similarity/JaccardSimilarityTest.java
@@ -37,7 +37,7 @@
@Test
public void testGettingJaccardSimilarity() {
// Expected Jaccard similarity = (intersect / union)
- assertEquals(0.0, classBeingTested.apply("", ""));
+ assertEquals(1.0, classBeingTested.apply("", ""));
assertEquals(0.0, classBeingTested.apply("left", ""));
assertEquals(0.0, classBeingTested.apply("", "right"));
assertEquals(3.0 / 4, classBeingTested.apply("frog", "fog"));