python/pyspark/context.pyi - spark - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 from typing import (
     Any,
     Callable,
     Dict,
     Iterable,
     List,
     NoReturn,
     Optional,
     Tuple,
     Type,
     TypeVar,
 )
 from types import TracebackType

 from py4j.java_gateway import JavaGateway, JavaObject  # type: ignore[import]

 from pyspark.accumulators import Accumulator, AccumulatorParam
 from pyspark.broadcast import Broadcast
 from pyspark.conf import SparkConf
 from pyspark.profiler import Profiler  # noqa: F401
 from pyspark.resource.information import ResourceInformation
 from pyspark.rdd import RDD
 from pyspark.serializers import Serializer
 from pyspark.status import StatusTracker

 T = TypeVar("T")
 U = TypeVar("U")

 class SparkContext:
     master: str
     appName: str
     sparkHome: str
     PACKAGE_EXTENSIONS: Iterable[str]
     def __init__(
         self,
         master: Optional[str] = ...,
         appName: Optional[str] = ...,
         sparkHome: Optional[str] = ...,
         pyFiles: Optional[List[str]] = ...,
         environment: Optional[Dict[str, str]] = ...,
         batchSize: int = ...,
         serializer: Serializer = ...,
         conf: Optional[SparkConf] = ...,
         gateway: Optional[JavaGateway] = ...,
         jsc: Optional[JavaObject] = ...,
         profiler_cls: type = ...,
     ) -> None: ...
     def __getnewargs__(self) -> NoReturn: ...
     def __enter__(self) -> SparkContext: ...
     def __exit__(
         self,
         type: Optional[Type[BaseException]],
         value: Optional[BaseException],
         trace: Optional[TracebackType],
     ) -> None: ...
     @classmethod
     def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ...
     def setLogLevel(self, logLevel: str) -> None: ...
     @classmethod
     def setSystemProperty(cls, key: str, value: str) -> None: ...
     @property
     def version(self) -> str: ...
     @property
     def applicationId(self) -> str: ...
     @property
     def uiWebUrl(self) -> str: ...
     @property
     def startTime(self) -> int: ...
     @property
     def defaultParallelism(self) -> int: ...
     @property
     def defaultMinPartitions(self) -> int: ...
     def stop(self) -> None: ...
     def emptyRDD(self) -> RDD[Any]: ...
     def range(
         self,
         start: int,
         end: Optional[int] = ...,
         step: int = ...,
         numSlices: Optional[int] = ...,
     ) -> RDD[int]: ...
     def parallelize(self, c: Iterable[T], numSlices: Optional[int] = ...) -> RDD[T]: ...
     def pickleFile(self, name: str, minPartitions: Optional[int] = ...) -> RDD[Any]: ...
     def textFile(
         self, name: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
     ) -> RDD[str]: ...
     def wholeTextFiles(
         self, path: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
     ) -> RDD[Tuple[str, str]]: ...
     def binaryFiles(
         self, path: str, minPartitions: Optional[int] = ...
     ) -> RDD[Tuple[str, bytes]]: ...
     def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]: ...
     def sequenceFile(
         self,
         path: str,
         keyClass: Optional[str] = ...,
         valueClass: Optional[str] = ...,
         keyConverter: Optional[str] = ...,
         valueConverter: Optional[str] = ...,
         minSplits: Optional[int] = ...,
         batchSize: int = ...,
     ) -> RDD[Tuple[T, U]]: ...
     def newAPIHadoopFile(
         self,
         path: str,
         inputFormatClass: str,
         keyClass: str,
         valueClass: str,
         keyConverter: Optional[str] = ...,
         valueConverter: Optional[str] = ...,
         conf: Optional[Dict[str, str]] = ...,
         batchSize: int = ...,
     ) -> RDD[Tuple[T, U]]: ...
     def newAPIHadoopRDD(
         self,
         inputFormatClass: str,
         keyClass: str,
         valueClass: str,
         keyConverter: Optional[str] = ...,
         valueConverter: Optional[str] = ...,
         conf: Optional[Dict[str, str]] = ...,
         batchSize: int = ...,
     ) -> RDD[Tuple[T, U]]: ...
     def hadoopFile(
         self,
         path: str,
         inputFormatClass: str,
         keyClass: str,
         valueClass: str,
         keyConverter: Optional[str] = ...,
         valueConverter: Optional[str] = ...,
         conf: Optional[Dict[str, str]] = ...,
         batchSize: int = ...,
     ) -> RDD[Tuple[T, U]]: ...
     def hadoopRDD(
         self,
         inputFormatClass: str,
         keyClass: str,
         valueClass: str,
         keyConverter: Optional[str] = ...,
         valueConverter: Optional[str] = ...,
         conf: Optional[Dict[str, str]] = ...,
         batchSize: int = ...,
     ) -> RDD[Tuple[T, U]]: ...
     def union(self, rdds: Iterable[RDD[T]]) -> RDD[T]: ...
     def broadcast(self, value: T) -> Broadcast[T]: ...
     def accumulator(
         self, value: T, accum_param: Optional[AccumulatorParam[T]] = ...
     ) -> Accumulator[T]: ...
     def addFile(self, path: str, recursive: bool = ...) -> None: ...
     def addPyFile(self, path: str) -> None: ...
     def setCheckpointDir(self, dirName: str) -> None: ...
     def getCheckpointDir(self) -> Optional[str]: ...
     def setJobGroup(
         self, groupId: str, description: str, interruptOnCancel: bool = ...
     ) -> None: ...
     def setLocalProperty(self, key: str, value: str) -> None: ...
     def getLocalProperty(self, key: str) -> Optional[str]: ...
     def sparkUser(self) -> str: ...
     def setJobDescription(self, value: str) -> None: ...
     def cancelJobGroup(self, groupId: str) -> None: ...
     def cancelAllJobs(self) -> None: ...
     def statusTracker(self) -> StatusTracker: ...
     def runJob(
         self,
         rdd: RDD[T],
         partitionFunc: Callable[[Iterable[T]], Iterable[U]],
         partitions: Optional[List[int]] = ...,
         allowLocal: bool = ...,
     ) -> List[U]: ...
     def show_profiles(self) -> None: ...
     def dump_profiles(self, path: str) -> None: ...
     def getConf(self) -> SparkConf: ...
     @property
     def resources(self) -> Dict[str, ResourceInformation]: ...
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	from typing import (
	Any,
	Callable,
	Dict,
	Iterable,
	List,
	NoReturn,
	Optional,
	Tuple,
	Type,
	TypeVar,
	)
	from types import TracebackType

	from py4j.java_gateway import JavaGateway, JavaObject # type: ignore[import]

	from pyspark.accumulators import Accumulator, AccumulatorParam
	from pyspark.broadcast import Broadcast
	from pyspark.conf import SparkConf
	from pyspark.profiler import Profiler # noqa: F401
	from pyspark.resource.information import ResourceInformation
	from pyspark.rdd import RDD
	from pyspark.serializers import Serializer
	from pyspark.status import StatusTracker

	T = TypeVar("T")
	U = TypeVar("U")

	class SparkContext:
	master: str
	appName: str
	sparkHome: str
	PACKAGE_EXTENSIONS: Iterable[str]
	def __init__(
	self,
	master: Optional[str] = ...,
	appName: Optional[str] = ...,
	sparkHome: Optional[str] = ...,
	pyFiles: Optional[List[str]] = ...,
	environment: Optional[Dict[str, str]] = ...,
	batchSize: int = ...,
	serializer: Serializer = ...,
	conf: Optional[SparkConf] = ...,
	gateway: Optional[JavaGateway] = ...,
	jsc: Optional[JavaObject] = ...,
	profiler_cls: type = ...,
	) -> None: ...
	def __getnewargs__(self) -> NoReturn: ...
	def __enter__(self) -> SparkContext: ...
	def __exit__(
	self,
	type: Optional[Type[BaseException]],
	value: Optional[BaseException],
	trace: Optional[TracebackType],
	) -> None: ...
	@classmethod
	def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ...
	def setLogLevel(self, logLevel: str) -> None: ...
	@classmethod
	def setSystemProperty(cls, key: str, value: str) -> None: ...
	@property
	def version(self) -> str: ...
	@property
	def applicationId(self) -> str: ...
	@property
	def uiWebUrl(self) -> str: ...
	@property
	def startTime(self) -> int: ...
	@property
	def defaultParallelism(self) -> int: ...
	@property
	def defaultMinPartitions(self) -> int: ...
	def stop(self) -> None: ...
	def emptyRDD(self) -> RDD[Any]: ...
	def range(
	self,
	start: int,
	end: Optional[int] = ...,
	step: int = ...,
	numSlices: Optional[int] = ...,
	) -> RDD[int]: ...
	def parallelize(self, c: Iterable[T], numSlices: Optional[int] = ...) -> RDD[T]: ...
	def pickleFile(self, name: str, minPartitions: Optional[int] = ...) -> RDD[Any]: ...
	def textFile(
	self, name: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
	) -> RDD[str]: ...
	def wholeTextFiles(
	self, path: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
	) -> RDD[Tuple[str, str]]: ...
	def binaryFiles(
	self, path: str, minPartitions: Optional[int] = ...
	) -> RDD[Tuple[str, bytes]]: ...
	def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]: ...
	def sequenceFile(
	self,
	path: str,
	keyClass: Optional[str] = ...,
	valueClass: Optional[str] = ...,
	keyConverter: Optional[str] = ...,
	valueConverter: Optional[str] = ...,
	minSplits: Optional[int] = ...,
	batchSize: int = ...,
	) -> RDD[Tuple[T, U]]: ...
	def newAPIHadoopFile(
	self,
	path: str,
	inputFormatClass: str,
	keyClass: str,
	valueClass: str,
	keyConverter: Optional[str] = ...,
	valueConverter: Optional[str] = ...,
	conf: Optional[Dict[str, str]] = ...,
	batchSize: int = ...,
	) -> RDD[Tuple[T, U]]: ...
	def newAPIHadoopRDD(
	self,
	inputFormatClass: str,
	keyClass: str,
	valueClass: str,
	keyConverter: Optional[str] = ...,
	valueConverter: Optional[str] = ...,
	conf: Optional[Dict[str, str]] = ...,
	batchSize: int = ...,
	) -> RDD[Tuple[T, U]]: ...
	def hadoopFile(
	self,
	path: str,
	inputFormatClass: str,
	keyClass: str,
	valueClass: str,
	keyConverter: Optional[str] = ...,
	valueConverter: Optional[str] = ...,
	conf: Optional[Dict[str, str]] = ...,
	batchSize: int = ...,
	) -> RDD[Tuple[T, U]]: ...
	def hadoopRDD(
	self,
	inputFormatClass: str,
	keyClass: str,
	valueClass: str,
	keyConverter: Optional[str] = ...,
	valueConverter: Optional[str] = ...,
	conf: Optional[Dict[str, str]] = ...,
	batchSize: int = ...,
	) -> RDD[Tuple[T, U]]: ...
	def union(self, rdds: Iterable[RDD[T]]) -> RDD[T]: ...
	def broadcast(self, value: T) -> Broadcast[T]: ...
	def accumulator(
	self, value: T, accum_param: Optional[AccumulatorParam[T]] = ...
	) -> Accumulator[T]: ...
	def addFile(self, path: str, recursive: bool = ...) -> None: ...
	def addPyFile(self, path: str) -> None: ...
	def setCheckpointDir(self, dirName: str) -> None: ...
	def getCheckpointDir(self) -> Optional[str]: ...
	def setJobGroup(
	self, groupId: str, description: str, interruptOnCancel: bool = ...
	) -> None: ...
	def setLocalProperty(self, key: str, value: str) -> None: ...
	def getLocalProperty(self, key: str) -> Optional[str]: ...
	def sparkUser(self) -> str: ...
	def setJobDescription(self, value: str) -> None: ...
	def cancelJobGroup(self, groupId: str) -> None: ...
	def cancelAllJobs(self) -> None: ...
	def statusTracker(self) -> StatusTracker: ...
	def runJob(
	self,
	rdd: RDD[T],
	partitionFunc: Callable[[Iterable[T]], Iterable[U]],
	partitions: Optional[List[int]] = ...,
	allowLocal: bool = ...,
	) -> List[U]: ...
	def show_profiles(self) -> None: ...
	def dump_profiles(self, path: str) -> None: ...
	def getConf(self) -> SparkConf: ...
	@property
	def resources(self) -> Dict[str, ResourceInformation]: ...