python-api/src/main/python/livy/job_context.py - incubator-livy - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 from abc import ABCMeta, abstractproperty, abstractmethod


 class JobContext:
     """
     An abstract class that holds runtime information about the job execution
     context.

     An instance of this class is kept on the node hosting a remote Spark
     context and is made available to jobs being executed via
     RemoteSparkContext#submit().

     """

     __metaclass__ = ABCMeta

     @abstractproperty
     def sc(self):
         """
         The shared SparkContext instance.

         Returns
         -------
         sc : pyspark.context.SparkContext
             A SparkContext instance

         Examples
          -------

         >>> def simple_spark_job(context):
         >>>     elements = [10, 20, 30]
         >>>     sc = context.sc
         >>>     return sc.parallelize(elements, 2).count()
         """
         pass

     @abstractproperty
     def sql_ctx(self):
         """
         The shared SQLContext instance.

         Returns
         -------
         sql_ctx : pyspark.sql.SQLContext
             A SQLContext instance

         Examples
          -------

         >>> def simple_spark_sql_job(context):
         >>>     sql_ctx = context.sql_ctx
         >>>     df1 = sql_ctx.read.json("/sample.json")
         >>>     return df1.dTypes()
         """
         pass

     @abstractproperty
     def hive_ctx(self):
         """
         The shared HiveContext instance.

         Returns
         -------
         hive_ctx : pyspark.sql.HiveContext
             A HiveContext instance

         Examples
          -------

         >>> def simple_spark_hive_job(context):
         >>>     hive_ctx = context.hive_ctx
         >>>     df1 = hive_ctx.read.json("/sample.json")
         >>>     return df1.dTypes()
         """
         pass

     @abstractproperty
     def streaming_ctx(self):
         """
         The shared SparkStreamingContext instance that has already been created

         Returns
         -------
         streaming_ctx : pyspark.streaming.StreamingContext
             A StreamingContext instance

         Raises
         -------
         ValueError
             If the streaming_ctx is not already created using the function
             create_streaming_ctx(batch_duration)

         Examples
          -------

         >>> def simple_spark_streaming_job(context):
         >>>     context.create_streaming_ctx(30)
         >>>     streaming_ctx = context.streaming_ctx
         >>>     lines = streaming_ctx.socketTextStream('localhost', '8080')
         >>>     filtered_lines = lines.filter(lambda line: 'spark' in line)
         >>>     filtered_lines.pprint()
         """
         pass

     @abstractmethod
     def create_streaming_ctx(self, batch_duration):
         """
         Creates the SparkStreaming context.

         Raises
         -------
         ValueError
             If the streaming_ctx has already been initialized

         Examples
         -------
         See usage in JobContext.streaming_ctx

         """
         pass

     @abstractmethod
     def stop_streaming_ctx(self):
         """
         Stops the SparkStreaming context.
         """
         pass

     @abstractproperty
     def local_tmp_dir_path(self):
         """"
         Returns
         -------
         local_tmp_dir_path : string
             Returns a local tmp dir path specific to the context
         """
         pass

     @abstractproperty
     def spark_session(self):
         """
         The shared SparkSession instance.

         Returns
         -------
         sc : pyspark.sql.SparkSession
             A SparkSession instance

         Examples
          -------

         >>> def simple_spark_job(context):
         >>>     session = context.spark_session
         >>>     df1 = session.read.json('/sample.json')
         >>>     return df1.dTypes()
         """
         pass
	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	from abc import ABCMeta, abstractproperty, abstractmethod


	class JobContext:
	"""
	An abstract class that holds runtime information about the job execution
	context.

	An instance of this class is kept on the node hosting a remote Spark
	context and is made available to jobs being executed via
	RemoteSparkContext#submit().

	"""

	__metaclass__ = ABCMeta

	@abstractproperty
	def sc(self):
	"""
	The shared SparkContext instance.

	Returns
	-------
	sc : pyspark.context.SparkContext
	A SparkContext instance

	Examples
	-------

	>>> def simple_spark_job(context):
	>>> elements = [10, 20, 30]
	>>> sc = context.sc
	>>> return sc.parallelize(elements, 2).count()
	"""
	pass

	@abstractproperty
	def sql_ctx(self):
	"""
	The shared SQLContext instance.

	Returns
	-------
	sql_ctx : pyspark.sql.SQLContext
	A SQLContext instance

	Examples
	-------

	>>> def simple_spark_sql_job(context):
	>>> sql_ctx = context.sql_ctx
	>>> df1 = sql_ctx.read.json("/sample.json")
	>>> return df1.dTypes()
	"""
	pass

	@abstractproperty
	def hive_ctx(self):
	"""
	The shared HiveContext instance.

	Returns
	-------
	hive_ctx : pyspark.sql.HiveContext
	A HiveContext instance

	Examples
	-------

	>>> def simple_spark_hive_job(context):
	>>> hive_ctx = context.hive_ctx
	>>> df1 = hive_ctx.read.json("/sample.json")
	>>> return df1.dTypes()
	"""
	pass

	@abstractproperty
	def streaming_ctx(self):
	"""
	The shared SparkStreamingContext instance that has already been created

	Returns
	-------
	streaming_ctx : pyspark.streaming.StreamingContext
	A StreamingContext instance

	Raises
	-------
	ValueError
	If the streaming_ctx is not already created using the function
	create_streaming_ctx(batch_duration)

	Examples
	-------

	>>> def simple_spark_streaming_job(context):
	>>> context.create_streaming_ctx(30)
	>>> streaming_ctx = context.streaming_ctx
	>>> lines = streaming_ctx.socketTextStream('localhost', '8080')
	>>> filtered_lines = lines.filter(lambda line: 'spark' in line)
	>>> filtered_lines.pprint()
	"""
	pass

	@abstractmethod
	def create_streaming_ctx(self, batch_duration):
	"""
	Creates the SparkStreaming context.

	Raises
	-------
	ValueError
	If the streaming_ctx has already been initialized

	Examples
	-------
	See usage in JobContext.streaming_ctx

	"""
	pass

	@abstractmethod
	def stop_streaming_ctx(self):
	"""
	Stops the SparkStreaming context.
	"""
	pass

	@abstractproperty
	def local_tmp_dir_path(self):
	""""
	Returns
	-------
	local_tmp_dir_path : string
	Returns a local tmp dir path specific to the context
	"""
	pass

	@abstractproperty
	def spark_session(self):
	"""
	The shared SparkSession instance.

	Returns
	-------
	sc : pyspark.sql.SparkSession
	A SparkSession instance

	Examples
	-------

	>>> def simple_spark_job(context):
	>>> session = context.spark_session
	>>> df1 = session.read.json('/sample.json')
	>>> return df1.dTypes()
	"""
	pass