| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| from typing import ( |
| Any, |
| Callable, |
| Dict, |
| Iterable, |
| List, |
| NoReturn, |
| Optional, |
| Tuple, |
| Type, |
| TypeVar, |
| ) |
| from types import TracebackType |
| |
| from py4j.java_gateway import JavaGateway, JavaObject # type: ignore[import] |
| |
| from pyspark.accumulators import Accumulator, AccumulatorParam |
| from pyspark.broadcast import Broadcast |
| from pyspark.conf import SparkConf |
| from pyspark.profiler import Profiler # noqa: F401 |
| from pyspark.resource.information import ResourceInformation |
| from pyspark.rdd import RDD |
| from pyspark.serializers import Serializer |
| from pyspark.status import StatusTracker |
| |
| T = TypeVar("T") |
| U = TypeVar("U") |
| |
| class SparkContext: |
| master: str |
| appName: str |
| sparkHome: str |
| PACKAGE_EXTENSIONS: Iterable[str] |
| def __init__( |
| self, |
| master: Optional[str] = ..., |
| appName: Optional[str] = ..., |
| sparkHome: Optional[str] = ..., |
| pyFiles: Optional[List[str]] = ..., |
| environment: Optional[Dict[str, str]] = ..., |
| batchSize: int = ..., |
| serializer: Serializer = ..., |
| conf: Optional[SparkConf] = ..., |
| gateway: Optional[JavaGateway] = ..., |
| jsc: Optional[JavaObject] = ..., |
| profiler_cls: type = ..., |
| ) -> None: ... |
| def __getnewargs__(self) -> NoReturn: ... |
| def __enter__(self) -> SparkContext: ... |
| def __exit__( |
| self, |
| type: Optional[Type[BaseException]], |
| value: Optional[BaseException], |
| trace: Optional[TracebackType], |
| ) -> None: ... |
| @classmethod |
| def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ... |
| def setLogLevel(self, logLevel: str) -> None: ... |
| @classmethod |
| def setSystemProperty(cls, key: str, value: str) -> None: ... |
| @property |
| def version(self) -> str: ... |
| @property |
| def applicationId(self) -> str: ... |
| @property |
| def uiWebUrl(self) -> str: ... |
| @property |
| def startTime(self) -> int: ... |
| @property |
| def defaultParallelism(self) -> int: ... |
| @property |
| def defaultMinPartitions(self) -> int: ... |
| def stop(self) -> None: ... |
| def emptyRDD(self) -> RDD[Any]: ... |
| def range( |
| self, |
| start: int, |
| end: Optional[int] = ..., |
| step: int = ..., |
| numSlices: Optional[int] = ..., |
| ) -> RDD[int]: ... |
| def parallelize(self, c: Iterable[T], numSlices: Optional[int] = ...) -> RDD[T]: ... |
| def pickleFile(self, name: str, minPartitions: Optional[int] = ...) -> RDD[Any]: ... |
| def textFile( |
| self, name: str, minPartitions: Optional[int] = ..., use_unicode: bool = ... |
| ) -> RDD[str]: ... |
| def wholeTextFiles( |
| self, path: str, minPartitions: Optional[int] = ..., use_unicode: bool = ... |
| ) -> RDD[Tuple[str, str]]: ... |
| def binaryFiles( |
| self, path: str, minPartitions: Optional[int] = ... |
| ) -> RDD[Tuple[str, bytes]]: ... |
| def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]: ... |
| def sequenceFile( |
| self, |
| path: str, |
| keyClass: Optional[str] = ..., |
| valueClass: Optional[str] = ..., |
| keyConverter: Optional[str] = ..., |
| valueConverter: Optional[str] = ..., |
| minSplits: Optional[int] = ..., |
| batchSize: int = ..., |
| ) -> RDD[Tuple[T, U]]: ... |
| def newAPIHadoopFile( |
| self, |
| path: str, |
| inputFormatClass: str, |
| keyClass: str, |
| valueClass: str, |
| keyConverter: Optional[str] = ..., |
| valueConverter: Optional[str] = ..., |
| conf: Optional[Dict[str, str]] = ..., |
| batchSize: int = ..., |
| ) -> RDD[Tuple[T, U]]: ... |
| def newAPIHadoopRDD( |
| self, |
| inputFormatClass: str, |
| keyClass: str, |
| valueClass: str, |
| keyConverter: Optional[str] = ..., |
| valueConverter: Optional[str] = ..., |
| conf: Optional[Dict[str, str]] = ..., |
| batchSize: int = ..., |
| ) -> RDD[Tuple[T, U]]: ... |
| def hadoopFile( |
| self, |
| path: str, |
| inputFormatClass: str, |
| keyClass: str, |
| valueClass: str, |
| keyConverter: Optional[str] = ..., |
| valueConverter: Optional[str] = ..., |
| conf: Optional[Dict[str, str]] = ..., |
| batchSize: int = ..., |
| ) -> RDD[Tuple[T, U]]: ... |
| def hadoopRDD( |
| self, |
| inputFormatClass: str, |
| keyClass: str, |
| valueClass: str, |
| keyConverter: Optional[str] = ..., |
| valueConverter: Optional[str] = ..., |
| conf: Optional[Dict[str, str]] = ..., |
| batchSize: int = ..., |
| ) -> RDD[Tuple[T, U]]: ... |
| def union(self, rdds: Iterable[RDD[T]]) -> RDD[T]: ... |
| def broadcast(self, value: T) -> Broadcast[T]: ... |
| def accumulator( |
| self, value: T, accum_param: Optional[AccumulatorParam[T]] = ... |
| ) -> Accumulator[T]: ... |
| def addFile(self, path: str, recursive: bool = ...) -> None: ... |
| def addPyFile(self, path: str) -> None: ... |
| def setCheckpointDir(self, dirName: str) -> None: ... |
| def getCheckpointDir(self) -> Optional[str]: ... |
| def setJobGroup( |
| self, groupId: str, description: str, interruptOnCancel: bool = ... |
| ) -> None: ... |
| def setLocalProperty(self, key: str, value: str) -> None: ... |
| def getLocalProperty(self, key: str) -> Optional[str]: ... |
| def sparkUser(self) -> str: ... |
| def setJobDescription(self, value: str) -> None: ... |
| def cancelJobGroup(self, groupId: str) -> None: ... |
| def cancelAllJobs(self) -> None: ... |
| def statusTracker(self) -> StatusTracker: ... |
| def runJob( |
| self, |
| rdd: RDD[T], |
| partitionFunc: Callable[[Iterable[T]], Iterable[U]], |
| partitions: Optional[List[int]] = ..., |
| allowLocal: bool = ..., |
| ) -> List[U]: ... |
| def show_profiles(self) -> None: ... |
| def dump_profiles(self, path: str) -> None: ... |
| def getConf(self) -> SparkConf: ... |
| @property |
| def resources(self) -> Dict[str, ResourceInformation]: ... |