blob: 640a69cad08ab3003440326da99858511ae0ff4b [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import (
Any,
Callable,
Dict,
Iterable,
List,
NoReturn,
Optional,
Tuple,
Type,
TypeVar,
)
from types import TracebackType
from py4j.java_gateway import JavaGateway, JavaObject # type: ignore[import]
from pyspark.accumulators import Accumulator, AccumulatorParam
from pyspark.broadcast import Broadcast
from pyspark.conf import SparkConf
from pyspark.profiler import Profiler # noqa: F401
from pyspark.resource.information import ResourceInformation
from pyspark.rdd import RDD
from pyspark.serializers import Serializer
from pyspark.status import StatusTracker
T = TypeVar("T")
U = TypeVar("U")
class SparkContext:
master: str
appName: str
sparkHome: str
PACKAGE_EXTENSIONS: Iterable[str]
def __init__(
self,
master: Optional[str] = ...,
appName: Optional[str] = ...,
sparkHome: Optional[str] = ...,
pyFiles: Optional[List[str]] = ...,
environment: Optional[Dict[str, str]] = ...,
batchSize: int = ...,
serializer: Serializer = ...,
conf: Optional[SparkConf] = ...,
gateway: Optional[JavaGateway] = ...,
jsc: Optional[JavaObject] = ...,
profiler_cls: type = ...,
) -> None: ...
def __getnewargs__(self) -> NoReturn: ...
def __enter__(self) -> SparkContext: ...
def __exit__(
self,
type: Optional[Type[BaseException]],
value: Optional[BaseException],
trace: Optional[TracebackType],
) -> None: ...
@classmethod
def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ...
def setLogLevel(self, logLevel: str) -> None: ...
@classmethod
def setSystemProperty(cls, key: str, value: str) -> None: ...
@property
def version(self) -> str: ...
@property
def applicationId(self) -> str: ...
@property
def uiWebUrl(self) -> str: ...
@property
def startTime(self) -> int: ...
@property
def defaultParallelism(self) -> int: ...
@property
def defaultMinPartitions(self) -> int: ...
def stop(self) -> None: ...
def emptyRDD(self) -> RDD[Any]: ...
def range(
self,
start: int,
end: Optional[int] = ...,
step: int = ...,
numSlices: Optional[int] = ...,
) -> RDD[int]: ...
def parallelize(self, c: Iterable[T], numSlices: Optional[int] = ...) -> RDD[T]: ...
def pickleFile(self, name: str, minPartitions: Optional[int] = ...) -> RDD[Any]: ...
def textFile(
self, name: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
) -> RDD[str]: ...
def wholeTextFiles(
self, path: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
) -> RDD[Tuple[str, str]]: ...
def binaryFiles(
self, path: str, minPartitions: Optional[int] = ...
) -> RDD[Tuple[str, bytes]]: ...
def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]: ...
def sequenceFile(
self,
path: str,
keyClass: Optional[str] = ...,
valueClass: Optional[str] = ...,
keyConverter: Optional[str] = ...,
valueConverter: Optional[str] = ...,
minSplits: Optional[int] = ...,
batchSize: int = ...,
) -> RDD[Tuple[T, U]]: ...
def newAPIHadoopFile(
self,
path: str,
inputFormatClass: str,
keyClass: str,
valueClass: str,
keyConverter: Optional[str] = ...,
valueConverter: Optional[str] = ...,
conf: Optional[Dict[str, str]] = ...,
batchSize: int = ...,
) -> RDD[Tuple[T, U]]: ...
def newAPIHadoopRDD(
self,
inputFormatClass: str,
keyClass: str,
valueClass: str,
keyConverter: Optional[str] = ...,
valueConverter: Optional[str] = ...,
conf: Optional[Dict[str, str]] = ...,
batchSize: int = ...,
) -> RDD[Tuple[T, U]]: ...
def hadoopFile(
self,
path: str,
inputFormatClass: str,
keyClass: str,
valueClass: str,
keyConverter: Optional[str] = ...,
valueConverter: Optional[str] = ...,
conf: Optional[Dict[str, str]] = ...,
batchSize: int = ...,
) -> RDD[Tuple[T, U]]: ...
def hadoopRDD(
self,
inputFormatClass: str,
keyClass: str,
valueClass: str,
keyConverter: Optional[str] = ...,
valueConverter: Optional[str] = ...,
conf: Optional[Dict[str, str]] = ...,
batchSize: int = ...,
) -> RDD[Tuple[T, U]]: ...
def union(self, rdds: Iterable[RDD[T]]) -> RDD[T]: ...
def broadcast(self, value: T) -> Broadcast[T]: ...
def accumulator(
self, value: T, accum_param: Optional[AccumulatorParam[T]] = ...
) -> Accumulator[T]: ...
def addFile(self, path: str, recursive: bool = ...) -> None: ...
def addPyFile(self, path: str) -> None: ...
def setCheckpointDir(self, dirName: str) -> None: ...
def getCheckpointDir(self) -> Optional[str]: ...
def setJobGroup(
self, groupId: str, description: str, interruptOnCancel: bool = ...
) -> None: ...
def setLocalProperty(self, key: str, value: str) -> None: ...
def getLocalProperty(self, key: str) -> Optional[str]: ...
def sparkUser(self) -> str: ...
def setJobDescription(self, value: str) -> None: ...
def cancelJobGroup(self, groupId: str) -> None: ...
def cancelAllJobs(self) -> None: ...
def statusTracker(self) -> StatusTracker: ...
def runJob(
self,
rdd: RDD[T],
partitionFunc: Callable[[Iterable[T]], Iterable[U]],
partitions: Optional[List[int]] = ...,
allowLocal: bool = ...,
) -> List[U]: ...
def show_profiles(self) -> None: ...
def dump_profiles(self, path: str) -> None: ...
def getConf(self) -> SparkConf: ...
@property
def resources(self) -> Dict[str, ResourceInformation]: ...