[MINOR][PYTHON][TESTS] Remove the doc in error message tests to allow other PyArrow versions in tests
### What changes were proposed in this pull request?
This PR is a minor change to support more PyArrow versions in the test.
### Why are the changes needed?
To support more PyArrow versions in the test. it can fail: (https://github.com/HyukjinKwon/spark/actions/runs/8994639538/job/24708397027)
```
Traceback (most recent call last):
File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 585, in _test_merge_error
self.__test_merge_error(
File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 606, in __test_merge_error
with self.assertRaisesRegex(error_class, error_message_regex):
AssertionError: "Return type of the user-defined function should be pandas.DataFrame, but is int64." does not match "
An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1834, in main
process()
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1826, in process
serializer.dump_stream(out_iter, outfile)
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 531, in dump_stream
return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 104, in dump_stream
for batch in iterator:
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 524, in init_stream_yield_batches
for series in iterator:
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1694, in mapper
return f(df1_keys, df1_vals, df2_keys, df2_vals)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 370, in <lambda>
return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), to_arrow_type(return_type))]
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 364, in wrapped
verify_pandas_result(
File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 234, in verify_pandas_result
raise PySparkTypeError(
pyspark.errors.exceptions.base.PySparkTypeError: [UDF_RETURN_TYPE] Return type of the user-defined function should be pandas.DataFrame, but is int.
```
### Does this PR introduce _any_ user-facing change?
No, test-only.
### How was this patch tested?
Ci should validate it.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #46453 from HyukjinKwon/minor-test.
Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index 0e7d0e7..b1060ef 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -165,7 +165,7 @@
fn=lambda lft, rgt: lft.size + rgt.size,
error_class=PythonException,
error_message_regex="Return type of the user-defined function "
- "should be pandas.DataFrame, but is int.",
+ "should be pandas.DataFrame, but is int",
)
def test_apply_in_pandas_returning_column_names(self):
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index 37e52d4..692f970 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -151,14 +151,14 @@
with self.assertRaisesRegex(
PythonException,
"Return type of the user-defined function should be iterator of pandas.DataFrame, "
- "but is int.",
+ "but is int",
):
(self.spark.range(10, numPartitions=3).mapInPandas(no_iter, "a int").count())
with self.assertRaisesRegex(
PythonException,
"Return type of the user-defined function should be iterator of pandas.DataFrame, "
- "but is iterator of int.",
+ "but is iterator of int",
):
(self.spark.range(10, numPartitions=3).mapInPandas(bad_iter_elem, "a int").count())
diff --git a/python/pyspark/sql/tests/test_arrow_map.py b/python/pyspark/sql/tests/test_arrow_map.py
index f5fc2ea..2e82869 100644
--- a/python/pyspark/sql/tests/test_arrow_map.py
+++ b/python/pyspark/sql/tests/test_arrow_map.py
@@ -103,14 +103,14 @@
with self.assertRaisesRegex(
PythonException,
"Return type of the user-defined function should be iterator "
- "of pyarrow.RecordBatch, but is int.",
+ "of pyarrow.RecordBatch, but is int",
):
(self.spark.range(10, numPartitions=3).mapInArrow(not_iter, "a int").count())
with self.assertRaisesRegex(
PythonException,
"Return type of the user-defined function should be iterator "
- "of pyarrow.RecordBatch, but is iterator of int.",
+ "of pyarrow.RecordBatch, but is iterator of int",
):
(self.spark.range(10, numPartitions=3).mapInArrow(bad_iter_elem, "a int").count())