python/packaging/client/setup.py - spark - Git at Google

 #!/usr/bin/env python3

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # cd python
 # python packaging/classic/setup.py sdist

 # cd python/packaging/classic
 # python setup.py sdist

 import sys
 from setuptools import setup
 import os
 from shutil import copyfile, move
 import glob
 from pathlib import Path

 if (
     # When we package, the parent directory 'client' dir
     # (as we pip install -e python/packaging/client)
     os.getcwd() == str(Path(__file__).parent.absolute())
     and str(Path(__file__).parent.name) == "client"
 ):
     # For:
     # - pip install -e python/packaging/client
     #     It moves the current working directory to 'client'
     # - cd python/packaging/client; python setup.py sdist
     #
     # For:
     # - python packaging/client/setup.py sdist, it does not
     #     execute this branch.
     #
     # Move to spark/python
     os.chdir(Path(__file__).parent.parent.parent.absolute())

 try:
     exec(open("pyspark/version.py").read())
 except IOError:
     print(
         "Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
         file=sys.stderr,
     )
     sys.exit(-1)
 VERSION = __version__  # noqa

 # Check and see if we are under the spark path in which case we need to build the symlink farm.
 # This is important because we only want to build the symlink farm while under Spark otherwise we
 # want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a
 # partially built sdist) we should error and have the user sort it out.
 in_spark = os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or (
     os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1
 )

 test_packages = []
 if "SPARK_TESTING" in os.environ:
     test_packages = [
         "pyspark.errors.tests.connect",
         "pyspark.tests",  # for Memory profiler parity tests
         "pyspark.resource.tests",
         "pyspark.sql.tests",
         "pyspark.sql.tests.arrow",
         "pyspark.sql.tests.connect",
         "pyspark.sql.tests.connect.arrow",
         "pyspark.sql.tests.connect.streaming",
         "pyspark.sql.tests.connect.client",
         "pyspark.sql.tests.connect.pandas",
         "pyspark.sql.tests.connect.shell",
         "pyspark.sql.tests.pandas",
         "pyspark.sql.tests.pandas.helper",
         "pyspark.sql.tests.plot",
         "pyspark.sql.tests.streaming",
         "pyspark.ml.tests",
         "pyspark.ml.tests.connect",
         "pyspark.pandas.tests",
         "pyspark.pandas.tests.computation",
         "pyspark.pandas.tests.data_type_ops",
         "pyspark.pandas.tests.diff_frames_ops",
         "pyspark.pandas.tests.frame",
         "pyspark.pandas.tests.groupby",
         "pyspark.pandas.tests.indexes",
         "pyspark.pandas.tests.io",
         "pyspark.pandas.tests.plot",
         "pyspark.pandas.tests.resample",
         "pyspark.pandas.tests.reshape",
         "pyspark.pandas.tests.series",
         "pyspark.pandas.tests.window",
         "pyspark.pandas.tests.connect",
         "pyspark.pandas.tests.connect.computation",
         "pyspark.pandas.tests.connect.data_type_ops",
         "pyspark.pandas.tests.connect.diff_frames_ops",
         "pyspark.pandas.tests.connect.frame",
         "pyspark.pandas.tests.connect.groupby",
         "pyspark.pandas.tests.connect.indexes",
         "pyspark.pandas.tests.connect.io",
         "pyspark.pandas.tests.connect.plot",
         "pyspark.pandas.tests.connect.resample",
         "pyspark.pandas.tests.connect.reshape",
         "pyspark.pandas.tests.connect.series",
         "pyspark.pandas.tests.connect.window",
         "pyspark.pipelines.tests",
         "pyspark.logger.tests",
         "pyspark.logger.tests.connect",
     ]

 try:
     if in_spark:
         # !!HACK ALTERT!!
         # 1. `setup.py` has to be located with the same directory with the package.
         #    Therefore, we copy the current file, and place it at `spark/python` directory.
         #    After that, we remove it in the end.
         # 2. Here it renames `lib` to `lib.back` so MANIFEST.in does not pick `py4j` up.
         #    We rename it back in the end.
         move("lib", "lib.back")
         copyfile("packaging/client/setup.py", "setup.py")
         copyfile("packaging/client/setup.cfg", "setup.cfg")

     # If you are changing the versions here, please also change ./python/pyspark/sql/pandas/utils.py
     # For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the
     # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
     # Also don't forget to update python/docs/source/getting_started/install.rst,
     # python/packaging/classic/setup.py, and python/packaging/connect/setup.py
     _minimum_pandas_version = "2.2.0"
     _minimum_numpy_version = "1.21"
     _minimum_pyarrow_version = "15.0.0"
     _minimum_grpc_version = "1.67.0"
     _minimum_googleapis_common_protos_version = "1.65.0"
     _minimum_pyyaml_version = "3.11"

     with open("README.md") as f:
         long_description = f.read()

     connect_packages = [
         "pyspark",
         "pyspark.cloudpickle",
         "pyspark.mllib",
         "pyspark.mllib.linalg",
         "pyspark.mllib.stat",
         "pyspark.ml",
         "pyspark.ml.connect",
         "pyspark.ml.linalg",
         "pyspark.ml.param",
         "pyspark.ml.torch",
         "pyspark.ml.deepspeed",
         "pyspark.sql",
         "pyspark.sql.avro",
         "pyspark.sql.connect",
         "pyspark.sql.connect.avro",
         "pyspark.sql.connect.client",
         "pyspark.sql.connect.functions",
         "pyspark.sql.connect.proto",
         "pyspark.sql.connect.protobuf",
         "pyspark.sql.connect.resource",
         "pyspark.sql.connect.shell",
         "pyspark.sql.connect.streaming",
         "pyspark.sql.connect.streaming.worker",
         "pyspark.sql.functions",
         "pyspark.sql.pandas",
         "pyspark.sql.plot",
         "pyspark.sql.protobuf",
         "pyspark.sql.streaming",
         "pyspark.sql.streaming.proto",
         "pyspark.sql.worker",
         "pyspark.streaming",
         "pyspark.pandas",
         "pyspark.pandas.data_type_ops",
         "pyspark.pandas.indexes",
         "pyspark.pandas.missing",
         "pyspark.pandas.plot",
         "pyspark.pandas.spark",
         "pyspark.pandas.typedef",
         "pyspark.pandas.usage_logging",
         "pyspark.pipelines",
         "pyspark.testing",
         "pyspark.resource",
         "pyspark.errors",
         "pyspark.errors.exceptions",
         "pyspark.logger",
     ]

     setup(
         name="pyspark-client",
         version=VERSION,
         description="Python Spark Connect client for Apache Spark",
         long_description=long_description,
         long_description_content_type="text/markdown",
         author="Spark Developers",
         author_email="dev@spark.apache.org",
         url="https://github.com/apache/spark/tree/master/python",
         packages=connect_packages + test_packages,
         include_package_data=True,
         license="Apache-2.0",
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
         install_requires=[
             "pandas>=%s" % _minimum_pandas_version,
             "pyarrow>=%s" % _minimum_pyarrow_version,
             "grpcio>=%s" % _minimum_grpc_version,
             "grpcio-status>=%s" % _minimum_grpc_version,
             "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version,
             "numpy>=%s" % _minimum_numpy_version,
             "pyyaml>=%s" % _minimum_pyyaml_version,
         ],
         python_requires=">=3.10",
         classifiers=[
             "Development Status :: 5 - Production/Stable",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
             "Programming Language :: Python :: 3.12",
             "Programming Language :: Python :: 3.13",
             "Programming Language :: Python :: Implementation :: CPython",
             "Programming Language :: Python :: Implementation :: PyPy",
             "Typing :: Typed",
         ],
     )
 finally:
     if in_spark:
         move("lib.back", "lib")
         os.remove("setup.py")
         os.remove("setup.cfg")
	#!/usr/bin/env python3

	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# cd python
	# python packaging/classic/setup.py sdist

	# cd python/packaging/classic
	# python setup.py sdist

	import sys
	from setuptools import setup
	import os
	from shutil import copyfile, move
	import glob
	from pathlib import Path

	if (
	# When we package, the parent directory 'client' dir
	# (as we pip install -e python/packaging/client)
	os.getcwd() == str(Path(__file__).parent.absolute())
	and str(Path(__file__).parent.name) == "client"
	):
	# For:
	# - pip install -e python/packaging/client
	# It moves the current working directory to 'client'
	# - cd python/packaging/client; python setup.py sdist
	#
	# For:
	# - python packaging/client/setup.py sdist, it does not
	# execute this branch.
	#
	# Move to spark/python
	os.chdir(Path(__file__).parent.parent.parent.absolute())

	try:
	exec(open("pyspark/version.py").read())
	except IOError:
	print(
	"Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
	file=sys.stderr,
	)
	sys.exit(-1)
	VERSION = __version__ # noqa

	# Check and see if we are under the spark path in which case we need to build the symlink farm.
	# This is important because we only want to build the symlink farm while under Spark otherwise we
	# want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a
	# partially built sdist) we should error and have the user sort it out.
	in_spark = os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or (
	os.path.isfile("../RELEASE") and len(glob.glob("../jars/sparkcore.jar")) == 1
	)

	test_packages = []
	if "SPARK_TESTING" in os.environ:
	test_packages = [
	"pyspark.errors.tests.connect",
	"pyspark.tests", # for Memory profiler parity tests
	"pyspark.resource.tests",
	"pyspark.sql.tests",
	"pyspark.sql.tests.arrow",
	"pyspark.sql.tests.connect",
	"pyspark.sql.tests.connect.arrow",
	"pyspark.sql.tests.connect.streaming",
	"pyspark.sql.tests.connect.client",
	"pyspark.sql.tests.connect.pandas",
	"pyspark.sql.tests.connect.shell",
	"pyspark.sql.tests.pandas",
	"pyspark.sql.tests.pandas.helper",
	"pyspark.sql.tests.plot",
	"pyspark.sql.tests.streaming",
	"pyspark.ml.tests",
	"pyspark.ml.tests.connect",
	"pyspark.pandas.tests",
	"pyspark.pandas.tests.computation",
	"pyspark.pandas.tests.data_type_ops",
	"pyspark.pandas.tests.diff_frames_ops",
	"pyspark.pandas.tests.frame",
	"pyspark.pandas.tests.groupby",
	"pyspark.pandas.tests.indexes",
	"pyspark.pandas.tests.io",
	"pyspark.pandas.tests.plot",
	"pyspark.pandas.tests.resample",
	"pyspark.pandas.tests.reshape",
	"pyspark.pandas.tests.series",
	"pyspark.pandas.tests.window",
	"pyspark.pandas.tests.connect",
	"pyspark.pandas.tests.connect.computation",
	"pyspark.pandas.tests.connect.data_type_ops",
	"pyspark.pandas.tests.connect.diff_frames_ops",
	"pyspark.pandas.tests.connect.frame",
	"pyspark.pandas.tests.connect.groupby",
	"pyspark.pandas.tests.connect.indexes",
	"pyspark.pandas.tests.connect.io",
	"pyspark.pandas.tests.connect.plot",
	"pyspark.pandas.tests.connect.resample",
	"pyspark.pandas.tests.connect.reshape",
	"pyspark.pandas.tests.connect.series",
	"pyspark.pandas.tests.connect.window",
	"pyspark.pipelines.tests",
	"pyspark.logger.tests",
	"pyspark.logger.tests.connect",
	]

	try:
	if in_spark:
	# !!HACK ALTERT!!
	# 1. `setup.py` has to be located with the same directory with the package.
	# Therefore, we copy the current file, and place it at `spark/python` directory.
	# After that, we remove it in the end.
	# 2. Here it renames `lib` to `lib.back` so MANIFEST.in does not pick `py4j` up.
	# We rename it back in the end.
	move("lib", "lib.back")
	copyfile("packaging/client/setup.py", "setup.py")
	copyfile("packaging/client/setup.cfg", "setup.cfg")

	# If you are changing the versions here, please also change ./python/pyspark/sql/pandas/utils.py
	# For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the
	# binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
	# Also don't forget to update python/docs/source/getting_started/install.rst,
	# python/packaging/classic/setup.py, and python/packaging/connect/setup.py
	_minimum_pandas_version = "2.2.0"
	_minimum_numpy_version = "1.21"
	_minimum_pyarrow_version = "15.0.0"
	_minimum_grpc_version = "1.67.0"
	_minimum_googleapis_common_protos_version = "1.65.0"
	_minimum_pyyaml_version = "3.11"

	with open("README.md") as f:
	long_description = f.read()

	connect_packages = [
	"pyspark",
	"pyspark.cloudpickle",
	"pyspark.mllib",
	"pyspark.mllib.linalg",
	"pyspark.mllib.stat",
	"pyspark.ml",
	"pyspark.ml.connect",
	"pyspark.ml.linalg",
	"pyspark.ml.param",
	"pyspark.ml.torch",
	"pyspark.ml.deepspeed",
	"pyspark.sql",
	"pyspark.sql.avro",
	"pyspark.sql.connect",
	"pyspark.sql.connect.avro",
	"pyspark.sql.connect.client",
	"pyspark.sql.connect.functions",
	"pyspark.sql.connect.proto",
	"pyspark.sql.connect.protobuf",
	"pyspark.sql.connect.resource",
	"pyspark.sql.connect.shell",
	"pyspark.sql.connect.streaming",
	"pyspark.sql.connect.streaming.worker",
	"pyspark.sql.functions",
	"pyspark.sql.pandas",
	"pyspark.sql.plot",
	"pyspark.sql.protobuf",
	"pyspark.sql.streaming",
	"pyspark.sql.streaming.proto",
	"pyspark.sql.worker",
	"pyspark.streaming",
	"pyspark.pandas",
	"pyspark.pandas.data_type_ops",
	"pyspark.pandas.indexes",
	"pyspark.pandas.missing",
	"pyspark.pandas.plot",
	"pyspark.pandas.spark",
	"pyspark.pandas.typedef",
	"pyspark.pandas.usage_logging",
	"pyspark.pipelines",
	"pyspark.testing",
	"pyspark.resource",
	"pyspark.errors",
	"pyspark.errors.exceptions",
	"pyspark.logger",
	]

	setup(
	name="pyspark-client",
	version=VERSION,
	description="Python Spark Connect client for Apache Spark",
	long_description=long_description,
	long_description_content_type="text/markdown",
	author="Spark Developers",
	author_email="dev@spark.apache.org",
	url="https://github.com/apache/spark/tree/master/python",
	packages=connect_packages + test_packages,
	include_package_data=True,
	license="Apache-2.0",
	# Don't forget to update python/docs/source/getting_started/install.rst
	# if you're updating the versions or dependencies.
	install_requires=[
	"pandas>=%s" % _minimum_pandas_version,
	"pyarrow>=%s" % _minimum_pyarrow_version,
	"grpcio>=%s" % _minimum_grpc_version,
	"grpcio-status>=%s" % _minimum_grpc_version,
	"googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version,
	"numpy>=%s" % _minimum_numpy_version,
	"pyyaml>=%s" % _minimum_pyyaml_version,
	],
	python_requires=">=3.10",
	classifiers=[
	"Development Status :: 5 - Production/Stable",
	"Programming Language :: Python :: 3.10",
	"Programming Language :: Python :: 3.11",
	"Programming Language :: Python :: 3.12",
	"Programming Language :: Python :: 3.13",
	"Programming Language :: Python :: Implementation :: CPython",
	"Programming Language :: Python :: Implementation :: PyPy",
	"Typing :: Typed",
	],
	)
	finally:
	if in_spark:
	move("lib.back", "lib")
	os.remove("setup.py")
	os.remove("setup.cfg")