Adding support for custom python exec (e.g. virtualenv)
diff --git a/etc/kernel.json b/etc/kernel.json
index a34b388..624c496 100644
--- a/etc/kernel.json
+++ b/etc/kernel.json
@@ -9,7 +9,8 @@
"CAPTURE_STANDARD_ERR": "true",
"MAX_INTERPRETER_THREADS": "16",
"CAPTURE_STANDARD_OUT": "true",
- "SEND_EMPTY_OUTPUT": "false"
+ "SEND_EMPTY_OUTPUT": "false",
+ "PYTHON_EXEC": "python'
},
"argv": [
"/usr/local/share/jupyter/kernels/toree/bin/run.sh",
@@ -17,4 +18,4 @@
"{connection_file}"
],
"codemirror_mode": "scala"
-}
\ No newline at end of file
+}
diff --git a/etc/pip_install/toree/toreeapp.py b/etc/pip_install/toree/toreeapp.py
index 41a6334..e7be0ac 100644
--- a/etc/pip_install/toree/toreeapp.py
+++ b/etc/pip_install/toree/toreeapp.py
@@ -39,6 +39,7 @@
TOREE_SPARK_OPTS = '__TOREE_SPARK_OPTS__'
TOREE_OPTS = '__TOREE_OPTS__'
DEFAULT_INTERPRETER = 'DEFAULT_INTERPRETER'
+PYTHON_EXEC = 'PYTHON_EXEC'
class ToreeInstall(InstallKernelSpec):
'''CLI for extension management.'''
@@ -51,6 +52,7 @@
jupyter toree install --kernel_name=toree_special
jupyter toree install --toree_opts='--nosparkcontext'
jupyter toree install --interpreters=PySpark,SQL
+ jupyter toree install --python=python
'''
spark_home = Unicode('/usr/local/spark', config=True,
@@ -68,12 +70,16 @@
spark_opts = Unicode('', config=True,
help='''Specify command line arguments to proxy for spark config.'''
)
+ python_exec = Unicode('python', config=True,
+ help='''Specify the python executable. Defaults to "python"'''
+ )
aliases = {
'kernel_name': 'ToreeInstall.kernel_name',
'spark_home': 'ToreeInstall.spark_home',
'toree_opts': 'ToreeInstall.toree_opts',
'spark_opts': 'ToreeInstall.spark_opts',
- 'interpreters' : 'ToreeInstall.interpreters'
+ 'interpreters' : 'ToreeInstall.interpreters',
+ 'python_exec' : 'ToreeInstall.python_exec'
}
aliases.update(base_aliases)
@@ -100,7 +106,8 @@
TOREE_SPARK_OPTS : self.spark_opts,
TOREE_OPTS : self.toree_opts,
SPARK_HOME : self.spark_home,
- PYTHON_PATH : '{0}/python:{0}/python/lib/{1}'.format(self.spark_home, py4j_zip)
+ PYTHON_PATH : '{0}/python:{0}/python/lib/{1}'.format(self.spark_home, py4j_zip),
+ PYTHON_EXEC : self.python_exec
}
kernel_json_file = os.path.join(location, 'kernel.json')
diff --git a/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkInterpreter.scala b/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkInterpreter.scala
index b07200d..22d0274 100644
--- a/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkInterpreter.scala
+++ b/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkInterpreter.scala
@@ -32,11 +32,13 @@
/**
* Represents an interpreter interface to PySpark. Requires a properly-set
* SPARK_HOME, PYTHONPATH pointing to Spark's Python source, and py4j installed
- * where it is accessible to the Spark Kernel.
+ * where it is accessible to the Spark Kernel. Optionally specify PYTHON_EXEC
+ * to override the default python executable "python'
*
*/
class PySparkInterpreter(
) extends Interpreter {
+ private val PythonExecEnv = "PYTHON_EXEC"
private val logger = LoggerFactory.getLogger(this.getClass)
private var _kernel:KernelLike = _
@@ -63,6 +65,7 @@
)
private lazy val pySparkService = new PySparkService(
+ Option(System.getenv(PythonExecEnv)).getOrElse("python"),
gatewayServer,
pySparkBridge,
pySparkProcessHandler
diff --git a/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkProcess.scala b/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkProcess.scala
index 81dde5d..d89865a 100644
--- a/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkProcess.scala
+++ b/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkProcess.scala
@@ -28,6 +28,7 @@
/**
* Represents the Python process used to evaluate PySpark code.
*
+ * @param pythonProcessName name of python process
* @param pySparkBridge The bridge to use to retrieve kernel output streams
* and the Spark version to be verified
* @param pySparkProcessHandler The handler to use when the process fails or
@@ -37,12 +38,13 @@
* @param sparkVersion The version of Spark that the process will be using
*/
class PySparkProcess(
+ private val pythonProcessName: String,
private val pySparkBridge: PySparkBridge,
private val pySparkProcessHandler: PySparkProcessHandler,
private val port: Int,
private val sparkVersion: String
) extends BrokerProcess(
- processName = "python",
+ processName = pythonProcessName,
entryResource = "PySpark/pyspark_runner.py",
otherResources = Nil,
brokerBridge = pySparkBridge,
diff --git a/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkService.scala b/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkService.scala
index 5cfd59a..8636abc 100644
--- a/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkService.scala
+++ b/pyspark-interpreter/src/main/scala/org/apache/toree/kernel/interpreter/pyspark/PySparkService.scala
@@ -28,6 +28,7 @@
* Represents the service that provides the high-level interface between the
* JVM and Python.
*
+ * @param pythonProcessName name of python process
* @param gatewayServer The backend to start to communicate between the JVM and
* Python
* @param pySparkBridge The bridge to use for communication between the JVM and
@@ -36,6 +37,7 @@
* the PySpark process
*/
class PySparkService(
+ private val pythonProcessName: String,
private val gatewayServer: GatewayServer,
private val pySparkBridge: PySparkBridge,
private val pySparkProcessHandler: PySparkProcessHandler
@@ -48,6 +50,7 @@
/** Represents the process used to execute Python code via the bridge. */
private lazy val pySparkProcess = {
val p = new PySparkProcess(
+ pythonProcessName,
pySparkBridge,
pySparkProcessHandler,
gatewayServer.getListeningPort,