[SPARK-48150][SQL] try_parse_json output should be declared as nullable
### What changes were proposed in this pull request?
The `try_parse_json` expression added in https://github.com/apache/spark/pull/46141 declares improper output nullability: the `try_` version's output must be marked as nullable. This PR corrects the nullability and adds a test.
### Why are the changes needed?
Incorrectly declaring an expression's output as non-nullable when it is actually nullable may lead to crashes.
### Does this PR introduce _any_ user-facing change?
Yes, it affects output nullability and thus may affect query result schemas.
### How was this patch tested?
New unit test cases.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #46409 from JoshRosen/fix-try-parse-json-nullability.
Authored-by: Josh Rosen <joshrosen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
index 1772b5d..5c6b21a 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, false, StringType, BooleanType, true, false, true) AS try_parse_json(g)#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils$, VariantType, parseJson, g#0, false, StringType, BooleanType, true, true, true) AS try_parse_json(g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
index 3dbc724..5026d8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
@@ -59,7 +59,7 @@
"parseJson",
Seq(child, Literal(failOnError, BooleanType)),
inputTypes :+ BooleanType,
- returnNullable = false)
+ returnNullable = !failOnError)
override def inputTypes: Seq[AbstractDataType] = StringType :: Nil
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
index f4a6a14..73abf80 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
@@ -810,6 +810,15 @@
"Hello")
}
+ test("SPARK-48150: ParseJson expression nullability") {
+ assert(!ParseJson(Literal("["), failOnError = true).replacement.nullable)
+ assert(ParseJson(Literal("["), failOnError = false).replacement.nullable)
+ checkEvaluation(
+ ParseJson(Literal("["), failOnError = false).replacement,
+ null
+ )
+ }
+
test("cast to variant") {
def check[T : TypeTag](input: T, expectedJson: String): Unit = {
val cast = Cast(Literal.create(input), VariantType, evalMode = EvalMode.ANSI)