[GH-2406] Make ST_Distance return null instead of 0.0 when arg is empty geom (#2447)
diff --git a/common/src/main/java/org/apache/sedona/common/Functions.java b/common/src/main/java/org/apache/sedona/common/Functions.java
index eb67674..a9d59b2 100644
--- a/common/src/main/java/org/apache/sedona/common/Functions.java
+++ b/common/src/main/java/org/apache/sedona/common/Functions.java
@@ -575,7 +575,10 @@
return geometry.getEnvelope();
}
- public static double distance(Geometry left, Geometry right) {
+ public static Double distance(Geometry left, Geometry right) {
+ if (left.isEmpty() || right.isEmpty()) {
+ return null;
+ }
return left.distance(right);
}
diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
index cdf1e5d..15bff34 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
@@ -1473,6 +1473,19 @@
}
@Test
+ public void distance_empty_geometries() throws ParseException {
+ Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(90, 0));
+ LineString lineString = GEOMETRY_FACTORY.createLineString(coordArray(0, 0, 0, 90));
+
+ // Ensure operations involving empty geometries return null
+ Geometry emptyGeom1 = GEOMETRY_FACTORY.createPoint();
+ Geometry emptyGeom2 = GEOMETRY_FACTORY.createGeometryCollection();
+ assertNull(Functions.distance(emptyGeom1, emptyGeom2));
+ assertNull(Functions.distance(point, emptyGeom1));
+ assertNull(Functions.distance(emptyGeom1, lineString));
+ }
+
+ @Test
public void haversineDistance() {
// Basic check
Point p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(90, 0));
diff --git a/python/tests/geopandas/test_match_geopandas_series.py b/python/tests/geopandas/test_match_geopandas_series.py
index e1be0cf..e0f713c 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -1044,11 +1044,6 @@
def test_distance(self):
for geom, geom2 in self.pairs:
- if geom[0].is_empty or geom2[0].is_empty:
- # Sedona's ST_Distance returns 0.0 instead of null
- # when an empty geometry is involved
- # https://github.com/apache/sedona/issues/2406
- continue
sgpd_result = GeoSeries(geom).distance(GeoSeries(geom2), align=True)
gpd_result = gpd.GeoSeries(geom).distance(gpd.GeoSeries(geom2), align=True)
self.check_pd_series_equal(sgpd_result, gpd_result)
diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/SpatialJoinSuite.scala b/spark/common/src/test/scala/org/apache/sedona/sql/SpatialJoinSuite.scala
index 543cf5e..9f4fd30 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/SpatialJoinSuite.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/SpatialJoinSuite.scala
@@ -268,6 +268,18 @@
assert(resultRows.isEmpty)
}
}
+
+ it("ST_Distance involving empty geometries should work as a predicate") {
+ // ST_Distance returns null when either arg is an empty geometry,
+ // while this test doesn't involve an actual spatial join, it tests that
+ // a distance-based spatial join doesn't fail due to this edge case.
+ val result1 = sparkSession.sql(
+ "SELECT * FROM df1 WHERE ST_Distance(df1.geom, ST_GeomFromText('POINT EMPTY')) < 1")
+ assert(result1.count() == 0)
+ val result2 = sparkSession.sql(
+ "SELECT * FROM df2 WHERE ST_Distance(df2.geom, ST_GeomFromText('POINT EMPTY')) < 1")
+ assert(result2.count() == 0)
+ }
}
private def withOptimizationMode(mode: String)(body: => Unit): Unit = {