[SPARK-45661][SQL][PYTHON] Add toNullable in StructType, MapType and ArrayType
### What changes were proposed in this pull request?
This PR proposes to add:
- `StructType.toNullable`
- `MapType.toNullable`
- `ArrayType.toNullable`
that returns a nullable schema.
### Why are the changes needed?
See https://stackoverflow.com/questions/33193958/change-nullable-property-of-column-in-spark-dataframe as an example.
### Does this PR introduce _any_ user-facing change?
Yes, it adds new API in both Scala and Python:
- `StructType.toNullable`
- `MapType.toNullable`
- `ArrayType.toNullable`
### How was this patch tested?
For Scala, it just adds an alias.
For Python side, doctests were added.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43523 from HyukjinKwon/SPARK-45661.
Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 01db75b..d6862d7 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -139,6 +139,9 @@
"""
return obj
+ def _as_nullable(self) -> "DataType":
+ return self
+
@classmethod
def fromDDL(cls, ddl: str) -> "DataType":
"""
@@ -593,6 +596,41 @@
def simpleString(self) -> str:
return "array<%s>" % self.elementType.simpleString()
+ def _as_nullable(self) -> "ArrayType":
+ return ArrayType(self.elementType._as_nullable(), containsNull=True)
+
+ def toNullable(self) -> "ArrayType":
+ """
+ Returns the same data type but set all nullability fields are true
+ (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+
+ .. versionadded:: 4.0.0
+
+ Returns
+ -------
+ :class:`ArrayType`
+
+ Examples
+ --------
+ Example 1: Simple nullability conversion
+
+ >>> ArrayType(IntegerType(), containsNull=False).toNullable()
+ ArrayType(IntegerType(), True)
+
+ Example 2: Nested nullability conversion
+
+ >>> ArrayType(
+ ... StructType([
+ ... StructField("b", IntegerType(), nullable=False),
+ ... StructField("c", ArrayType(IntegerType(), containsNull=False))
+ ... ]),
+ ... containsNull=False
+ ... ).toNullable()
+ ArrayType(StructType([StructField('b', IntegerType(), True),
+ StructField('c', ArrayType(IntegerType(), True), True)]), True)
+ """
+ return self._as_nullable()
+
def __repr__(self) -> str:
return "ArrayType(%s, %s)" % (self.elementType, str(self.containsNull))
@@ -671,6 +709,44 @@
def simpleString(self) -> str:
return "map<%s,%s>" % (self.keyType.simpleString(), self.valueType.simpleString())
+ def _as_nullable(self) -> "MapType":
+ return MapType(
+ self.keyType._as_nullable(), self.valueType._as_nullable(), valueContainsNull=True
+ )
+
+ def toNullable(self) -> "MapType":
+ """
+ Returns the same data type but set all nullability fields are true
+ (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+
+ .. versionadded:: 4.0.0
+
+ Returns
+ -------
+ :class:`MapType`
+
+ Examples
+ --------
+ Example 1: Simple nullability conversion
+
+ >>> MapType(IntegerType(), StringType(), valueContainsNull=False).toNullable()
+ MapType(IntegerType(), StringType(), True)
+
+ Example 2: Nested nullability conversion
+
+ >>> MapType(
+ ... StringType(),
+ ... MapType(
+ ... IntegerType(),
+ ... ArrayType(IntegerType(), containsNull=False),
+ ... valueContainsNull=False
+ ... ),
+ ... valueContainsNull=False
+ ... ).toNullable()
+ MapType(StringType(), MapType(IntegerType(), ArrayType(IntegerType(), True), True), True)
+ """
+ return self._as_nullable()
+
def __repr__(self) -> str:
return "MapType(%s, %s, %s)" % (self.keyType, self.valueType, str(self.valueContainsNull))
@@ -978,6 +1054,54 @@
def simpleString(self) -> str:
return "struct<%s>" % (",".join(f.simpleString() for f in self))
+ def _as_nullable(self) -> "StructType":
+ fields = []
+ for field in self.fields:
+ fields.append(
+ StructField(
+ field.name,
+ field.dataType._as_nullable(),
+ nullable=True,
+ metadata=field.metadata,
+ )
+ )
+ return StructType(fields)
+
+ def toNullable(self) -> "StructType":
+ """
+ Returns the same data type but set all nullability fields are true
+ (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+
+ .. versionadded:: 4.0.0
+
+ Returns
+ -------
+ :class:`StructType`
+
+ Examples
+ --------
+ Example 1: Simple nullability conversion
+
+ >>> StructType([StructField("a", IntegerType(), nullable=False)]).toNullable()
+ StructType([StructField('a', IntegerType(), True)])
+
+ Example 2: Nested nullability conversion
+
+ >>> StructType([
+ ... StructField("a",
+ ... StructType([
+ ... StructField("b", IntegerType(), nullable=False),
+ ... StructField("c", StructType([
+ ... StructField("d", IntegerType(), nullable=False)
+ ... ]))
+ ... ]),
+ ... nullable=False)
+ ... ]).toNullable()
+ StructType([StructField('a', StructType([StructField('b', IntegerType(), True),
+ StructField('c', StructType([StructField('d', IntegerType(), True)]), True)]), True)])
+ """
+ return self._as_nullable()
+
def __repr__(self) -> str:
return "StructType([%s])" % ", ".join(str(field) for field in self)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index a522687..e5af472 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -96,6 +96,14 @@
override private[spark] def asNullable: ArrayType =
ArrayType(elementType.asNullable, containsNull = true)
+ /**
+ * Returns the same data type but set all nullability fields are true
+ * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+ *
+ * @since 4.0.0
+ */
+ def toNullable: ArrayType = asNullable
+
override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
f(this) || elementType.existsRecursively(f)
}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
index ce0c76d..dba8704 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -76,6 +76,14 @@
override private[spark] def asNullable: MapType =
MapType(keyType.asNullable, valueType.asNullable, valueContainsNull = true)
+ /**
+ * Returns the same data type but set all nullability fields are true
+ * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+ *
+ * @since 4.0.0
+ */
+ def toNullable: MapType = asNullable
+
override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
f(this) || keyType.existsRecursively(f) || valueType.existsRecursively(f)
}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
index f1771d9..5fe6b0a 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -490,6 +490,14 @@
StructType(newFields)
}
+ /**
+ * Returns the same data type but set all nullability fields are true
+ * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+ *
+ * @since 4.0.0
+ */
+ def toNullable: StructType = asNullable
+
override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
f(this) || fields.exists(field => field.dataType.existsRecursively(f))
}