python/pyspark/sql/tests/connect/test_connect_session.py - spark - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 import os
 import unittest
 import uuid
 from collections import defaultdict

 from pyspark.util import is_remote_only
 from pyspark.errors import (
     PySparkException,
     PySparkValueError,
     RetriesExceeded,
 )
 from pyspark.sql import SparkSession as PySparkSession
 from pyspark.sql.connect.client.retries import RetryPolicy

 from pyspark.testing.connectutils import (
     should_test_connect,
     ReusedConnectTestCase,
     connect_requirement_message,
 )
 from pyspark.errors.exceptions.connect import (
     AnalysisException,
     SparkConnectException,
     SparkUpgradeException,
 )

 if should_test_connect:
     import grpc
     from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
     from pyspark.sql.connect.client import DefaultChannelBuilder, ChannelBuilder
     from pyspark.sql.connect.client.core import Retrying, SparkConnectClient


 @unittest.skipIf(is_remote_only(), "Session creation different from local mode")
 class SparkConnectSessionTests(ReusedConnectTestCase):
     def setUp(self) -> None:
         self.spark = (
             PySparkSession.builder.config(conf=self.conf())
             .appName(self.__class__.__name__)
             .remote("local[4]")
             .getOrCreate()
         )

     def tearDown(self):
         self.spark.stop()

     def test_progress_handler(self):
         handler_called = []

         def handler(**kwargs):
             nonlocal handler_called
             handler_called.append(kwargs)

         self.spark.registerProgressHandler(handler)
         self.spark.sql("select 1").collect()
         self.assertGreaterEqual(len(handler_called), 1)

         handler_called = []
         self.spark.removeProgressHandler(handler)
         self.spark.sql("select 1").collect()
         self.assertEqual(len(handler_called), 0)

         self.spark.registerProgressHandler(handler)
         self.spark.clearProgressHandlers()
         self.spark.sql("select 1").collect()
         self.assertGreaterEqual(len(handler_called), 0)

     def _check_no_active_session_error(self, e: PySparkException):
         self.check_error(exception=e, error_class="NO_ACTIVE_SESSION", message_parameters=dict())

     def test_stop_session(self):
         df = self.spark.sql("select 1 as a, 2 as b")
         catalog = self.spark.catalog
         self.spark.stop()

         # _execute_and_fetch
         with self.assertRaises(SparkConnectException) as e:
             self.spark.sql("select 1")
         self._check_no_active_session_error(e.exception)

         with self.assertRaises(SparkConnectException) as e:
             catalog.tableExists("table")
         self._check_no_active_session_error(e.exception)

         # _execute
         with self.assertRaises(SparkConnectException) as e:
             self.spark.udf.register("test_func", lambda x: x + 1)
         self._check_no_active_session_error(e.exception)

         # _analyze
         with self.assertRaises(SparkConnectException) as e:
             df._explain_string(extended=True)
         self._check_no_active_session_error(e.exception)

         # Config
         with self.assertRaises(SparkConnectException) as e:
             self.spark.conf.get("some.conf")
         self._check_no_active_session_error(e.exception)

     def test_error_enrichment_message(self):
         with self.sql_conf(
             {
                 "spark.sql.connect.enrichError.enabled": True,
                 "spark.sql.connect.serverStacktrace.enabled": False,
                 "spark.sql.pyspark.jvmStacktrace.enabled": False,
             }
         ):
             name = "test" * 10000
             with self.assertRaises(AnalysisException) as e:
                 self.spark.sql("select " + name).collect()
             self.assertTrue(name in e.exception._message)
             self.assertFalse("JVM stacktrace" in e.exception._message)

     def test_error_enrichment_jvm_stacktrace(self):
         with self.sql_conf(
             {
                 "spark.sql.connect.enrichError.enabled": True,
                 "spark.sql.pyspark.jvmStacktrace.enabled": False,
                 "spark.sql.legacy.timeParserPolicy": "EXCEPTION",
             }
         ):
             with self.sql_conf({"spark.sql.connect.serverStacktrace.enabled": False}):
                 with self.assertRaises(SparkUpgradeException) as e:
                     self.spark.sql(
                         """select from_json(
                             '{"d": "02-29"}', 'd date', map('dateFormat', 'MM-dd'))"""
                     ).collect()
                 self.assertFalse("JVM stacktrace" in e.exception._message)

             with self.sql_conf({"spark.sql.connect.serverStacktrace.enabled": True}):
                 with self.assertRaises(SparkUpgradeException) as e:
                     self.spark.sql(
                         """select from_json(
                             '{"d": "02-29"}', 'd date', map('dateFormat', 'MM-dd'))"""
                     ).collect()
                 self.assertTrue("JVM stacktrace" in str(e.exception))
                 self.assertTrue("org.apache.spark.SparkUpgradeException" in str(e.exception))
                 self.assertTrue(
                     "at org.apache.spark.sql.errors.ExecutionErrors"
                     ".failToParseDateTimeInNewParserError" in str(e.exception)
                 )
                 self.assertTrue("Caused by: java.time.DateTimeException:" in str(e.exception))

     def test_not_hitting_netty_header_limit(self):
         with self.sql_conf({"spark.sql.pyspark.jvmStacktrace.enabled": True}):
             with self.assertRaises(AnalysisException):
                 self.spark.sql("select " + "test" * 1).collect()

     def test_error_stack_trace(self):
         with self.sql_conf({"spark.sql.connect.enrichError.enabled": False}):
             with self.sql_conf({"spark.sql.pyspark.jvmStacktrace.enabled": True}):
                 with self.assertRaises(AnalysisException) as e:
                     self.spark.sql("select x").collect()
                 self.assertTrue("JVM stacktrace" in str(e.exception))
                 self.assertIsNotNone(e.exception.getStackTrace())
                 self.assertTrue(
                     "at org.apache.spark.sql.catalyst.analysis.CheckAnalysis" in str(e.exception)
                 )

             with self.sql_conf({"spark.sql.pyspark.jvmStacktrace.enabled": False}):
                 with self.assertRaises(AnalysisException) as e:
                     self.spark.sql("select x").collect()
                 self.assertFalse("JVM stacktrace" in str(e.exception))
                 self.assertIsNone(e.exception.getStackTrace())
                 self.assertFalse(
                     "at org.apache.spark.sql.catalyst.analysis.CheckAnalysis" in str(e.exception)
                 )

         # Create a new session with a different stack trace size.
         self.spark.stop()
         spark = (
             PySparkSession.builder.config(conf=self.conf())
             .config("spark.connect.grpc.maxMetadataSize", 128)
             .remote("local[4]")
             .getOrCreate()
         )
         spark.conf.set("spark.sql.connect.enrichError.enabled", False)
         spark.conf.set("spark.sql.pyspark.jvmStacktrace.enabled", True)
         with self.assertRaises(AnalysisException) as e:
             spark.sql("select x").collect()
         self.assertTrue("JVM stacktrace" in str(e.exception))
         self.assertIsNotNone(e.exception.getStackTrace())
         self.assertFalse(
             "at org.apache.spark.sql.catalyst.analysis.CheckAnalysis" in str(e.exception)
         )
         spark.stop()

     def test_can_create_multiple_sessions_to_different_remotes(self):
         self.spark.stop()
         self.assertIsNotNone(self.spark._client)
         # Creates a new remote session.
         other = PySparkSession.builder.remote("sc://other.remote:114/").create()
         self.assertNotEqual(self.spark, other)

         # Gets currently active session.
         same = PySparkSession.builder.remote("sc://other.remote.host:114/").getOrCreate()
         self.assertEqual(other, same)
         same.release_session_on_close = False  # avoid sending release to dummy connection
         same.stop()

         # Make sure the environment is clean.
         self.spark.stop()
         with self.assertRaises(RuntimeError) as e:
             PySparkSession.builder.create()
             self.assertIn("Create a new SparkSession is only supported with SparkConnect.", str(e))

     def test_get_message_parameters_without_enriched_error(self):
         with self.sql_conf({"spark.sql.connect.enrichError.enabled": False}):
             exception = None
             try:
                 self.spark.sql("""SELECT a""")
             except AnalysisException as e:
                 exception = e

             self.assertIsNotNone(exception)
             self.assertEqual(exception.getMessageParameters(), {"objectName": "`a`"})

     def test_custom_channel_builder(self):
         # Access self.spark's DefaultChannelBuilder to reuse same endpoint
         endpoint = self.spark._client._builder.endpoint

         class CustomChannelBuilder(ChannelBuilder):
             def toChannel(self):
                 return self._insecure_channel(endpoint)

         session = RemoteSparkSession.builder.channelBuilder(CustomChannelBuilder()).create()
         session.sql("select 1 + 1")

     def test_reset_when_server_and_client_sessionids_mismatch(self):
         session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
         # run a simple query so the session id is synchronized.
         session.range(3).collect()

         # trigger a mismatch between client session id and server session id.
         session._client._session_id = str(uuid.uuid4())
         with self.assertRaises(SparkConnectException):
             session.range(3).collect()

         # assert that getOrCreate() generates a new session
         session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
         session.range(3).collect()

     def test_reset_when_server_session_id_mismatch(self):
         session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
         # run a simple query so the session id is synchronized.
         session.range(3).collect()

         # trigger a mismatch
         session._client._server_session_id = str(uuid.uuid4())
         with self.assertRaises(SparkConnectException):
             session.range(3).collect()

         # assert that getOrCreate() generates a new session
         session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
         session.range(3).collect()


 class SparkConnectSessionWithOptionsTest(unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (
             PySparkSession.builder.config("string", "foo")
             .config("integer", 1)
             .config("boolean", False)
             .appName(self.__class__.__name__)
             .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
             .getOrCreate()
         )

     def tearDown(self):
         self.spark.stop()

     def test_config(self):
         # Config
         self.assertEqual(self.spark.conf.get("string"), "foo")
         self.assertEqual(self.spark.conf.get("boolean"), "false")
         self.assertEqual(self.spark.conf.get("integer"), "1")


 class TestError(grpc.RpcError, Exception):
     def __init__(self, code: grpc.StatusCode):
         self._code = code

     def code(self):
         return self._code


 class TestPolicy(RetryPolicy):
     # Put a small value for initial backoff so that tests don't spend
     # Time waiting
     def __init__(self, initial_backoff=10, **kwargs):
         super().__init__(initial_backoff=initial_backoff, **kwargs)

     def can_retry(self, exception: BaseException):
         return isinstance(exception, TestError)


 class TestPolicySpecificError(TestPolicy):
     def __init__(self, specific_code: grpc.StatusCode, **kwargs):
         super().__init__(**kwargs)
         self.specific_code = specific_code

     def can_retry(self, exception: BaseException):
         return exception.code() == self.specific_code


 @unittest.skipIf(not should_test_connect, connect_requirement_message)
 class RetryTests(unittest.TestCase):
     def setUp(self) -> None:
         self.call_wrap = defaultdict(int)

     def stub(self, retries, code):
         self.call_wrap["attempts"] += 1
         if self.call_wrap["attempts"] < retries:
             self.call_wrap["raised"] += 1
             raise TestError(code)

     def test_simple(self):
         # Check that max_retries 1 is only one retry so two attempts.
         for attempt in Retrying(TestPolicy(max_retries=1)):
             with attempt:
                 self.stub(2, grpc.StatusCode.INTERNAL)

         self.assertEqual(2, self.call_wrap["attempts"])
         self.assertEqual(1, self.call_wrap["raised"])

     def test_below_limit(self):
         # Check that if we have less than 4 retries all is ok.
         for attempt in Retrying(TestPolicy(max_retries=4)):
             with attempt:
                 self.stub(2, grpc.StatusCode.INTERNAL)

         self.assertLess(self.call_wrap["attempts"], 4)
         self.assertEqual(self.call_wrap["raised"], 1)

     def test_exceed_retries(self):
         # Exceed the retries.
         with self.assertRaises(RetriesExceeded):
             for attempt in Retrying(TestPolicy(max_retries=2)):
                 with attempt:
                     self.stub(5, grpc.StatusCode.INTERNAL)

         self.assertLess(self.call_wrap["attempts"], 5)
         self.assertEqual(self.call_wrap["raised"], 3)

     def test_throw_not_retriable_error(self):
         with self.assertRaises(ValueError):
             for attempt in Retrying(TestPolicy(max_retries=2)):
                 with attempt:
                     raise ValueError

     def test_specific_exception(self):
         # Check that only specific exceptions are retried.
         # Check that if we have less than 4 retries all is ok.
         policy = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.UNAVAILABLE)

         for attempt in Retrying(policy):
             with attempt:
                 self.stub(2, grpc.StatusCode.UNAVAILABLE)

         self.assertLess(self.call_wrap["attempts"], 4)
         self.assertEqual(self.call_wrap["raised"], 1)

     def test_specific_exception_exceed_retries(self):
         # Exceed the retries.
         policy = TestPolicySpecificError(max_retries=2, specific_code=grpc.StatusCode.UNAVAILABLE)
         with self.assertRaises(RetriesExceeded):
             for attempt in Retrying(policy):
                 with attempt:
                     self.stub(5, grpc.StatusCode.UNAVAILABLE)

         self.assertLess(self.call_wrap["attempts"], 4)
         self.assertEqual(self.call_wrap["raised"], 3)

     def test_rejected_by_policy(self):
         # Test that another error is always thrown.
         policy = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.UNAVAILABLE)

         with self.assertRaises(TestError):
             for attempt in Retrying(policy):
                 with attempt:
                     self.stub(5, grpc.StatusCode.INTERNAL)

         self.assertEqual(self.call_wrap["attempts"], 1)
         self.assertEqual(self.call_wrap["raised"], 1)

     def test_multiple_policies(self):
         policy1 = TestPolicySpecificError(max_retries=2, specific_code=grpc.StatusCode.UNAVAILABLE)
         policy2 = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.INTERNAL)

         # Tolerate 2 UNAVAILABLE errors and 4 INTERNAL errors

         error_suply = iter([grpc.StatusCode.UNAVAILABLE] * 2 + [grpc.StatusCode.INTERNAL] * 4)

         for attempt in Retrying([policy1, policy2]):
             with attempt:
                 error = next(error_suply, None)
                 if error:
                     raise TestError(error)

         self.assertEqual(next(error_suply, None), None)

     def test_multiple_policies_exceed(self):
         policy1 = TestPolicySpecificError(max_retries=2, specific_code=grpc.StatusCode.INTERNAL)
         policy2 = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.INTERNAL)

         with self.assertRaises(RetriesExceeded):
             for attempt in Retrying([policy1, policy2]):
                 with attempt:
                     self.stub(10, grpc.StatusCode.INTERNAL)

         self.assertEqual(self.call_wrap["attempts"], 7)
         self.assertEqual(self.call_wrap["raised"], 7)


 @unittest.skipIf(not should_test_connect, connect_requirement_message)
 class ChannelBuilderTests(unittest.TestCase):
     def test_invalid_connection_strings(self):
         invalid = [
             "scc://host:12",
             "http://host",
             "sc:/host:1234/path",
             "sc://host/path",
             "sc://host/;parm1;param2",
         ]
         for i in invalid:
             self.assertRaises(PySparkValueError, DefaultChannelBuilder, i)

     def test_sensible_defaults(self):
         chan = DefaultChannelBuilder("sc://host")
         self.assertFalse(chan.secure, "Default URL is not secure")

         chan = DefaultChannelBuilder("sc://host/;token=abcs")
         self.assertTrue(chan.secure, "specifying a token must set the channel to secure")
         self.assertRegex(
             chan.userAgent, r"^_SPARK_CONNECT_PYTHON spark/[^ ]+ os/[^ ]+ python/[^ ]+$"
         )
         chan = DefaultChannelBuilder("sc://host/;use_ssl=abcs")
         self.assertFalse(chan.secure, "Garbage in, false out")

     def test_user_agent(self):
         chan = DefaultChannelBuilder("sc://host/;user_agent=Agent123%20%2F3.4")
         self.assertIn("Agent123 /3.4", chan.userAgent)

     def test_user_agent_len(self):
         user_agent = "x" * 2049
         chan = DefaultChannelBuilder(f"sc://host/;user_agent={user_agent}")
         with self.assertRaises(SparkConnectException) as err:
             chan.userAgent
         self.assertRegex(err.exception._message, "'user_agent' parameter should not exceed")

         user_agent = "%C3%A4" * 341  # "%C3%A4" -> "ä"; (341 * 6 = 2046) < 2048
         expected = "ä" * 341
         chan = DefaultChannelBuilder(f"sc://host/;user_agent={user_agent}")
         self.assertIn(expected, chan.userAgent)

     def test_valid_channel_creation(self):
         chan = DefaultChannelBuilder("sc://host").toChannel()
         self.assertIsInstance(chan, grpc.Channel)

         # Sets up a channel without tokens because ssl is not used.
         chan = DefaultChannelBuilder("sc://host/;use_ssl=true;token=abc").toChannel()
         self.assertIsInstance(chan, grpc.Channel)

         chan = DefaultChannelBuilder("sc://host/;use_ssl=true").toChannel()
         self.assertIsInstance(chan, grpc.Channel)

     def test_channel_properties(self):
         chan = DefaultChannelBuilder(
             "sc://host/;use_ssl=true;token=abc;user_agent=foo;param1=120%2021"
         )
         self.assertEqual("host:15002", chan.endpoint)
         self.assertIn("foo", chan.userAgent.split(" "))
         self.assertEqual(True, chan.secure)
         self.assertEqual("120 21", chan.get("param1"))

     def test_metadata(self):
         chan = DefaultChannelBuilder(
             "sc://host/;use_ssl=true;token=abc;param1=120%2021;x-my-header=abcd"
         )
         md = chan.metadata()
         self.assertEqual([("param1", "120 21"), ("x-my-header", "abcd")], md)

     def test_metadata_with_session_id(self):
         id = str(uuid.uuid4())
         chan = DefaultChannelBuilder(f"sc://host/;session_id={id}")
         self.assertEqual(id, chan.session_id)

         chan = DefaultChannelBuilder(
             f"sc://host/;session_id={id};user_agent=acbd;token=abcd;use_ssl=true"
         )
         md = chan.metadata()
         for kv in md:
             self.assertNotIn(
                 kv[0],
                 [
                     ChannelBuilder.PARAM_SESSION_ID,
                     ChannelBuilder.PARAM_TOKEN,
                     ChannelBuilder.PARAM_USER_ID,
                     ChannelBuilder.PARAM_USER_AGENT,
                     ChannelBuilder.PARAM_USE_SSL,
                 ],
                 "Metadata must not contain fixed params",
             )

         with self.assertRaises(ValueError) as ve:
             chan = DefaultChannelBuilder("sc://host/;session_id=abcd")
             SparkConnectClient(chan)
         self.assertIn("Parameter value session_id must be a valid UUID format", str(ve.exception))

         chan = DefaultChannelBuilder("sc://host/")
         self.assertIsNone(chan.session_id)

     def test_channel_options(self):
         # SPARK-47694
         chan = DefaultChannelBuilder(
             "sc://host", [("grpc.max_send_message_length", 1860), ("test", "robert")]
         )
         options = chan._channel_options
         self.assertEqual(
             [k for k, _ in options].count("grpc.max_send_message_length"),
             1,
             "only one occurrence for defaults",
         )
         self.assertEqual(
             next(v for k, v in options if k == "grpc.max_send_message_length"),
             1860,
             "overwrites defaults",
         )
         self.assertEqual(
             next(v for k, v in options if k == "test"), "robert", "new values are picked up"
         )


 if __name__ == "__main__":
     from pyspark.sql.tests.connect.test_connect_session import *  # noqa: F401

     try:
         import xmlrunner

         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
         testRunner = None

     unittest.main(testRunner=testRunner, verbosity=2)
	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	import os
	import unittest
	import uuid
	from collections import defaultdict

	from pyspark.util import is_remote_only
	from pyspark.errors import (
	PySparkException,
	PySparkValueError,
	RetriesExceeded,
	)
	from pyspark.sql import SparkSession as PySparkSession
	from pyspark.sql.connect.client.retries import RetryPolicy

	from pyspark.testing.connectutils import (
	should_test_connect,
	ReusedConnectTestCase,
	connect_requirement_message,
	)
	from pyspark.errors.exceptions.connect import (
	AnalysisException,
	SparkConnectException,
	SparkUpgradeException,
	)

	if should_test_connect:
	import grpc
	from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
	from pyspark.sql.connect.client import DefaultChannelBuilder, ChannelBuilder
	from pyspark.sql.connect.client.core import Retrying, SparkConnectClient


	@unittest.skipIf(is_remote_only(), "Session creation different from local mode")
	class SparkConnectSessionTests(ReusedConnectTestCase):
	def setUp(self) -> None:
	self.spark = (
	PySparkSession.builder.config(conf=self.conf())
	.appName(self.__class__.__name__)
	.remote("local[4]")
	.getOrCreate()
	)

	def tearDown(self):
	self.spark.stop()

	def test_progress_handler(self):
	handler_called = []

	def handler(**kwargs):
	nonlocal handler_called
	handler_called.append(kwargs)

	self.spark.registerProgressHandler(handler)
	self.spark.sql("select 1").collect()
	self.assertGreaterEqual(len(handler_called), 1)

	handler_called = []
	self.spark.removeProgressHandler(handler)
	self.spark.sql("select 1").collect()
	self.assertEqual(len(handler_called), 0)

	self.spark.registerProgressHandler(handler)
	self.spark.clearProgressHandlers()
	self.spark.sql("select 1").collect()
	self.assertGreaterEqual(len(handler_called), 0)

	def _check_no_active_session_error(self, e: PySparkException):
	self.check_error(exception=e, error_class="NO_ACTIVE_SESSION", message_parameters=dict())

	def test_stop_session(self):
	df = self.spark.sql("select 1 as a, 2 as b")
	catalog = self.spark.catalog
	self.spark.stop()

	# _execute_and_fetch
	with self.assertRaises(SparkConnectException) as e:
	self.spark.sql("select 1")
	self._check_no_active_session_error(e.exception)

	with self.assertRaises(SparkConnectException) as e:
	catalog.tableExists("table")
	self._check_no_active_session_error(e.exception)

	# _execute
	with self.assertRaises(SparkConnectException) as e:
	self.spark.udf.register("test_func", lambda x: x + 1)
	self._check_no_active_session_error(e.exception)

	# _analyze
	with self.assertRaises(SparkConnectException) as e:
	df._explain_string(extended=True)
	self._check_no_active_session_error(e.exception)

	# Config
	with self.assertRaises(SparkConnectException) as e:
	self.spark.conf.get("some.conf")
	self._check_no_active_session_error(e.exception)

	def test_error_enrichment_message(self):
	with self.sql_conf(
	{
	"spark.sql.connect.enrichError.enabled": True,
	"spark.sql.connect.serverStacktrace.enabled": False,
	"spark.sql.pyspark.jvmStacktrace.enabled": False,
	}
	):
	name = "test" * 10000
	with self.assertRaises(AnalysisException) as e:
	self.spark.sql("select " + name).collect()
	self.assertTrue(name in e.exception._message)
	self.assertFalse("JVM stacktrace" in e.exception._message)

	def test_error_enrichment_jvm_stacktrace(self):
	with self.sql_conf(
	{
	"spark.sql.connect.enrichError.enabled": True,
	"spark.sql.pyspark.jvmStacktrace.enabled": False,
	"spark.sql.legacy.timeParserPolicy": "EXCEPTION",
	}
	):
	with self.sql_conf({"spark.sql.connect.serverStacktrace.enabled": False}):
	with self.assertRaises(SparkUpgradeException) as e:
	self.spark.sql(
	"""select from_json(
	'{"d": "02-29"}', 'd date', map('dateFormat', 'MM-dd'))"""
	).collect()
	self.assertFalse("JVM stacktrace" in e.exception._message)

	with self.sql_conf({"spark.sql.connect.serverStacktrace.enabled": True}):
	with self.assertRaises(SparkUpgradeException) as e:
	self.spark.sql(
	"""select from_json(
	'{"d": "02-29"}', 'd date', map('dateFormat', 'MM-dd'))"""
	).collect()
	self.assertTrue("JVM stacktrace" in str(e.exception))
	self.assertTrue("org.apache.spark.SparkUpgradeException" in str(e.exception))
	self.assertTrue(
	"at org.apache.spark.sql.errors.ExecutionErrors"
	".failToParseDateTimeInNewParserError" in str(e.exception)
	)
	self.assertTrue("Caused by: java.time.DateTimeException:" in str(e.exception))

	def test_not_hitting_netty_header_limit(self):
	with self.sql_conf({"spark.sql.pyspark.jvmStacktrace.enabled": True}):
	with self.assertRaises(AnalysisException):
	self.spark.sql("select " + "test" * 1).collect()

	def test_error_stack_trace(self):
	with self.sql_conf({"spark.sql.connect.enrichError.enabled": False}):
	with self.sql_conf({"spark.sql.pyspark.jvmStacktrace.enabled": True}):
	with self.assertRaises(AnalysisException) as e:
	self.spark.sql("select x").collect()
	self.assertTrue("JVM stacktrace" in str(e.exception))
	self.assertIsNotNone(e.exception.getStackTrace())
	self.assertTrue(
	"at org.apache.spark.sql.catalyst.analysis.CheckAnalysis" in str(e.exception)
	)

	with self.sql_conf({"spark.sql.pyspark.jvmStacktrace.enabled": False}):
	with self.assertRaises(AnalysisException) as e:
	self.spark.sql("select x").collect()
	self.assertFalse("JVM stacktrace" in str(e.exception))
	self.assertIsNone(e.exception.getStackTrace())
	self.assertFalse(
	"at org.apache.spark.sql.catalyst.analysis.CheckAnalysis" in str(e.exception)
	)

	# Create a new session with a different stack trace size.
	self.spark.stop()
	spark = (
	PySparkSession.builder.config(conf=self.conf())
	.config("spark.connect.grpc.maxMetadataSize", 128)
	.remote("local[4]")
	.getOrCreate()
	)
	spark.conf.set("spark.sql.connect.enrichError.enabled", False)
	spark.conf.set("spark.sql.pyspark.jvmStacktrace.enabled", True)
	with self.assertRaises(AnalysisException) as e:
	spark.sql("select x").collect()
	self.assertTrue("JVM stacktrace" in str(e.exception))
	self.assertIsNotNone(e.exception.getStackTrace())
	self.assertFalse(
	"at org.apache.spark.sql.catalyst.analysis.CheckAnalysis" in str(e.exception)
	)
	spark.stop()

	def test_can_create_multiple_sessions_to_different_remotes(self):
	self.spark.stop()
	self.assertIsNotNone(self.spark._client)
	# Creates a new remote session.
	other = PySparkSession.builder.remote("sc://other.remote:114/").create()
	self.assertNotEqual(self.spark, other)

	# Gets currently active session.
	same = PySparkSession.builder.remote("sc://other.remote.host:114/").getOrCreate()
	self.assertEqual(other, same)
	same.release_session_on_close = False # avoid sending release to dummy connection
	same.stop()

	# Make sure the environment is clean.
	self.spark.stop()
	with self.assertRaises(RuntimeError) as e:
	PySparkSession.builder.create()
	self.assertIn("Create a new SparkSession is only supported with SparkConnect.", str(e))

	def test_get_message_parameters_without_enriched_error(self):
	with self.sql_conf({"spark.sql.connect.enrichError.enabled": False}):
	exception = None
	try:
	self.spark.sql("""SELECT a""")
	except AnalysisException as e:
	exception = e

	self.assertIsNotNone(exception)
	self.assertEqual(exception.getMessageParameters(), {"objectName": "`a`"})

	def test_custom_channel_builder(self):
	# Access self.spark's DefaultChannelBuilder to reuse same endpoint
	endpoint = self.spark._client._builder.endpoint

	class CustomChannelBuilder(ChannelBuilder):
	def toChannel(self):
	return self._insecure_channel(endpoint)

	session = RemoteSparkSession.builder.channelBuilder(CustomChannelBuilder()).create()
	session.sql("select 1 + 1")

	def test_reset_when_server_and_client_sessionids_mismatch(self):
	session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
	# run a simple query so the session id is synchronized.
	session.range(3).collect()

	# trigger a mismatch between client session id and server session id.
	session._client._session_id = str(uuid.uuid4())
	with self.assertRaises(SparkConnectException):
	session.range(3).collect()

	# assert that getOrCreate() generates a new session
	session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
	session.range(3).collect()

	def test_reset_when_server_session_id_mismatch(self):
	session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
	# run a simple query so the session id is synchronized.
	session.range(3).collect()

	# trigger a mismatch
	session._client._server_session_id = str(uuid.uuid4())
	with self.assertRaises(SparkConnectException):
	session.range(3).collect()

	# assert that getOrCreate() generates a new session
	session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
	session.range(3).collect()


	class SparkConnectSessionWithOptionsTest(unittest.TestCase):
	def setUp(self) -> None:
	self.spark = (
	PySparkSession.builder.config("string", "foo")
	.config("integer", 1)
	.config("boolean", False)
	.appName(self.__class__.__name__)
	.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
	.getOrCreate()
	)

	def tearDown(self):
	self.spark.stop()

	def test_config(self):
	# Config
	self.assertEqual(self.spark.conf.get("string"), "foo")
	self.assertEqual(self.spark.conf.get("boolean"), "false")
	self.assertEqual(self.spark.conf.get("integer"), "1")


	class TestError(grpc.RpcError, Exception):
	def __init__(self, code: grpc.StatusCode):
	self._code = code

	def code(self):
	return self._code


	class TestPolicy(RetryPolicy):
	# Put a small value for initial backoff so that tests don't spend
	# Time waiting
	def __init__(self, initial_backoff=10, **kwargs):
	super().__init__(initial_backoff=initial_backoff, **kwargs)

	def can_retry(self, exception: BaseException):
	return isinstance(exception, TestError)


	class TestPolicySpecificError(TestPolicy):
	def __init__(self, specific_code: grpc.StatusCode, **kwargs):
	super().__init__(**kwargs)
	self.specific_code = specific_code

	def can_retry(self, exception: BaseException):
	return exception.code() == self.specific_code


	@unittest.skipIf(not should_test_connect, connect_requirement_message)
	class RetryTests(unittest.TestCase):
	def setUp(self) -> None:
	self.call_wrap = defaultdict(int)

	def stub(self, retries, code):
	self.call_wrap["attempts"] += 1
	if self.call_wrap["attempts"] < retries:
	self.call_wrap["raised"] += 1
	raise TestError(code)

	def test_simple(self):
	# Check that max_retries 1 is only one retry so two attempts.
	for attempt in Retrying(TestPolicy(max_retries=1)):
	with attempt:
	self.stub(2, grpc.StatusCode.INTERNAL)

	self.assertEqual(2, self.call_wrap["attempts"])
	self.assertEqual(1, self.call_wrap["raised"])

	def test_below_limit(self):
	# Check that if we have less than 4 retries all is ok.
	for attempt in Retrying(TestPolicy(max_retries=4)):
	with attempt:
	self.stub(2, grpc.StatusCode.INTERNAL)

	self.assertLess(self.call_wrap["attempts"], 4)
	self.assertEqual(self.call_wrap["raised"], 1)

	def test_exceed_retries(self):
	# Exceed the retries.
	with self.assertRaises(RetriesExceeded):
	for attempt in Retrying(TestPolicy(max_retries=2)):
	with attempt:
	self.stub(5, grpc.StatusCode.INTERNAL)

	self.assertLess(self.call_wrap["attempts"], 5)
	self.assertEqual(self.call_wrap["raised"], 3)

	def test_throw_not_retriable_error(self):
	with self.assertRaises(ValueError):
	for attempt in Retrying(TestPolicy(max_retries=2)):
	with attempt:
	raise ValueError

	def test_specific_exception(self):
	# Check that only specific exceptions are retried.
	# Check that if we have less than 4 retries all is ok.
	policy = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.UNAVAILABLE)

	for attempt in Retrying(policy):
	with attempt:
	self.stub(2, grpc.StatusCode.UNAVAILABLE)

	self.assertLess(self.call_wrap["attempts"], 4)
	self.assertEqual(self.call_wrap["raised"], 1)

	def test_specific_exception_exceed_retries(self):
	# Exceed the retries.
	policy = TestPolicySpecificError(max_retries=2, specific_code=grpc.StatusCode.UNAVAILABLE)
	with self.assertRaises(RetriesExceeded):
	for attempt in Retrying(policy):
	with attempt:
	self.stub(5, grpc.StatusCode.UNAVAILABLE)

	self.assertLess(self.call_wrap["attempts"], 4)
	self.assertEqual(self.call_wrap["raised"], 3)

	def test_rejected_by_policy(self):
	# Test that another error is always thrown.
	policy = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.UNAVAILABLE)

	with self.assertRaises(TestError):
	for attempt in Retrying(policy):
	with attempt:
	self.stub(5, grpc.StatusCode.INTERNAL)

	self.assertEqual(self.call_wrap["attempts"], 1)
	self.assertEqual(self.call_wrap["raised"], 1)

	def test_multiple_policies(self):
	policy1 = TestPolicySpecificError(max_retries=2, specific_code=grpc.StatusCode.UNAVAILABLE)
	policy2 = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.INTERNAL)

	# Tolerate 2 UNAVAILABLE errors and 4 INTERNAL errors

	error_suply = iter([grpc.StatusCode.UNAVAILABLE] * 2 + [grpc.StatusCode.INTERNAL] * 4)

	for attempt in Retrying([policy1, policy2]):
	with attempt:
	error = next(error_suply, None)
	if error:
	raise TestError(error)

	self.assertEqual(next(error_suply, None), None)

	def test_multiple_policies_exceed(self):
	policy1 = TestPolicySpecificError(max_retries=2, specific_code=grpc.StatusCode.INTERNAL)
	policy2 = TestPolicySpecificError(max_retries=4, specific_code=grpc.StatusCode.INTERNAL)

	with self.assertRaises(RetriesExceeded):
	for attempt in Retrying([policy1, policy2]):
	with attempt:
	self.stub(10, grpc.StatusCode.INTERNAL)

	self.assertEqual(self.call_wrap["attempts"], 7)
	self.assertEqual(self.call_wrap["raised"], 7)


	@unittest.skipIf(not should_test_connect, connect_requirement_message)
	class ChannelBuilderTests(unittest.TestCase):
	def test_invalid_connection_strings(self):
	invalid = [
	"scc://host:12",
	"http://host",
	"sc:/host:1234/path",
	"sc://host/path",
	"sc://host/;parm1;param2",
	]
	for i in invalid:
	self.assertRaises(PySparkValueError, DefaultChannelBuilder, i)

	def test_sensible_defaults(self):
	chan = DefaultChannelBuilder("sc://host")
	self.assertFalse(chan.secure, "Default URL is not secure")

	chan = DefaultChannelBuilder("sc://host/;token=abcs")
	self.assertTrue(chan.secure, "specifying a token must set the channel to secure")
	self.assertRegex(
	chan.userAgent, r"^_SPARK_CONNECT_PYTHON spark/[^ ]+ os/[^ ]+ python/[^ ]+$"
	)
	chan = DefaultChannelBuilder("sc://host/;use_ssl=abcs")
	self.assertFalse(chan.secure, "Garbage in, false out")

	def test_user_agent(self):
	chan = DefaultChannelBuilder("sc://host/;user_agent=Agent123%20%2F3.4")
	self.assertIn("Agent123 /3.4", chan.userAgent)

	def test_user_agent_len(self):
	user_agent = "x" * 2049
	chan = DefaultChannelBuilder(f"sc://host/;user_agent={user_agent}")
	with self.assertRaises(SparkConnectException) as err:
	chan.userAgent
	self.assertRegex(err.exception._message, "'user_agent' parameter should not exceed")

	user_agent = "%C3%A4" * 341 # "%C3%A4" -> "ä"; (341 * 6 = 2046) < 2048
	expected = "ä" * 341
	chan = DefaultChannelBuilder(f"sc://host/;user_agent={user_agent}")
	self.assertIn(expected, chan.userAgent)

	def test_valid_channel_creation(self):
	chan = DefaultChannelBuilder("sc://host").toChannel()
	self.assertIsInstance(chan, grpc.Channel)

	# Sets up a channel without tokens because ssl is not used.
	chan = DefaultChannelBuilder("sc://host/;use_ssl=true;token=abc").toChannel()
	self.assertIsInstance(chan, grpc.Channel)

	chan = DefaultChannelBuilder("sc://host/;use_ssl=true").toChannel()
	self.assertIsInstance(chan, grpc.Channel)

	def test_channel_properties(self):
	chan = DefaultChannelBuilder(
	"sc://host/;use_ssl=true;token=abc;user_agent=foo;param1=120%2021"
	)
	self.assertEqual("host:15002", chan.endpoint)
	self.assertIn("foo", chan.userAgent.split(" "))
	self.assertEqual(True, chan.secure)
	self.assertEqual("120 21", chan.get("param1"))

	def test_metadata(self):
	chan = DefaultChannelBuilder(
	"sc://host/;use_ssl=true;token=abc;param1=120%2021;x-my-header=abcd"
	)
	md = chan.metadata()
	self.assertEqual([("param1", "120 21"), ("x-my-header", "abcd")], md)

	def test_metadata_with_session_id(self):
	id = str(uuid.uuid4())
	chan = DefaultChannelBuilder(f"sc://host/;session_id={id}")
	self.assertEqual(id, chan.session_id)

	chan = DefaultChannelBuilder(
	f"sc://host/;session_id={id};user_agent=acbd;token=abcd;use_ssl=true"
	)
	md = chan.metadata()
	for kv in md:
	self.assertNotIn(
	kv[0],
	[
	ChannelBuilder.PARAM_SESSION_ID,
	ChannelBuilder.PARAM_TOKEN,
	ChannelBuilder.PARAM_USER_ID,
	ChannelBuilder.PARAM_USER_AGENT,
	ChannelBuilder.PARAM_USE_SSL,
	],
	"Metadata must not contain fixed params",
	)

	with self.assertRaises(ValueError) as ve:
	chan = DefaultChannelBuilder("sc://host/;session_id=abcd")
	SparkConnectClient(chan)
	self.assertIn("Parameter value session_id must be a valid UUID format", str(ve.exception))

	chan = DefaultChannelBuilder("sc://host/")
	self.assertIsNone(chan.session_id)

	def test_channel_options(self):
	# SPARK-47694
	chan = DefaultChannelBuilder(
	"sc://host", [("grpc.max_send_message_length", 1860), ("test", "robert")]
	)
	options = chan._channel_options
	self.assertEqual(
	[k for k, _ in options].count("grpc.max_send_message_length"),
	1,
	"only one occurrence for defaults",
	)
	self.assertEqual(
	next(v for k, v in options if k == "grpc.max_send_message_length"),
	1860,
	"overwrites defaults",
	)
	self.assertEqual(
	next(v for k, v in options if k == "test"), "robert", "new values are picked up"
	)


	if __name__ == "__main__":
	from pyspark.sql.tests.connect.test_connect_session import * # noqa: F401

	try:
	import xmlrunner

	testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
	except ImportError:
	testRunner = None

	unittest.main(testRunner=testRunner, verbosity=2)