[MINOR][PYTHON][TESTS] Remove the doc in error message tests to allow other PyArrow versions in tests ### What changes were proposed in this pull request? This PR is a minor change to support more PyArrow versions in the test. ### Why are the changes needed? To support more PyArrow versions in the test. it can fail: (https://github.com/HyukjinKwon/spark/actions/runs/8994639538/job/24708397027) ``` Traceback (most recent call last): File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 585, in _test_merge_error self.__test_merge_error( File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 606, in __test_merge_error with self.assertRaisesRegex(error_class, error_message_regex): AssertionError: "Return type of the user-defined function should be pandas.DataFrame, but is int64." does not match " An exception was thrown from the Python worker. Please see the stack trace below. Traceback (most recent call last): File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1834, in main process() File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1826, in process serializer.dump_stream(out_iter, outfile) File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 531, in dump_stream return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 104, in dump_stream for batch in iterator: File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 524, in init_stream_yield_batches for series in iterator: File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1694, in mapper return f(df1_keys, df1_vals, df2_keys, df2_vals) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 370, in <lambda> return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), to_arrow_type(return_type))] ^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 364, in wrapped verify_pandas_result( File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 234, in verify_pandas_result raise PySparkTypeError( pyspark.errors.exceptions.base.PySparkTypeError: [UDF_RETURN_TYPE] Return type of the user-defined function should be pandas.DataFrame, but is int. ``` ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Ci should validate it. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46453 from HyukjinKwon/minor-test. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>

commit: 3b1ea0fde44ec0aef8af24ca9a0a218a1c2d487d [log] [tgz]
author: Hyukjin Kwon <gurwls223@apache.org> Tue May 07 20:07:25 2024 -0700
committer: Dongjoon Hyun <dhyun@apple.com> Tue May 07 20:07:25 2024 -0700
tree: 61fbec0bafef73fb4b1d12c076b3186100ebcbcf
parent: 6588554aa4cc3c7f57d762e0b159d05dc70d75aa [diff]
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index 0e7d0e7..b1060ef 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py

@@ -165,7 +165,7 @@
             fn=lambda lft, rgt: lft.size + rgt.size,
             error_class=PythonException,
             error_message_regex="Return type of the user-defined function "
-            "should be pandas.DataFrame, but is int.",
+            "should be pandas.DataFrame, but is int",
         )
 
     def test_apply_in_pandas_returning_column_names(self):

diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index 37e52d4..692f970 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py

@@ -151,14 +151,14 @@
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of pandas.DataFrame, "
-            "but is int.",
+            "but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(no_iter, "a int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of pandas.DataFrame, "
-            "but is iterator of int.",
+            "but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(bad_iter_elem, "a int").count())
 

diff --git a/python/pyspark/sql/tests/test_arrow_map.py b/python/pyspark/sql/tests/test_arrow_map.py
index f5fc2ea..2e82869 100644
--- a/python/pyspark/sql/tests/test_arrow_map.py
+++ b/python/pyspark/sql/tests/test_arrow_map.py

@@ -103,14 +103,14 @@
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is int.",
+            "of pyarrow.RecordBatch, but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(not_iter, "a int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is iterator of int.",
+            "of pyarrow.RecordBatch, but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(bad_iter_elem, "a int").count())
commit	3b1ea0fde44ec0aef8af24ca9a0a218a1c2d487d	[log] [tgz]
author	Hyukjin Kwon <gurwls223@apache.org>	Tue May 07 20:07:25 2024 -0700
committer	Dongjoon Hyun <dhyun@apple.com>	Tue May 07 20:07:25 2024 -0700
tree	61fbec0bafef73fb4b1d12c076b3186100ebcbcf
parent	6588554aa4cc3c7f57d762e0b159d05dc70d75aa [diff]