Installing action requirements inside the driver and shipping them to the executors
diff --git a/frameworks/spark/dispatcher/src/main/scala/org/apache/amaterasu/frameworks/spark/dispatcher/runners/providers/PySparkRunnerProvider.scala b/frameworks/spark/dispatcher/src/main/scala/org/apache/amaterasu/frameworks/spark/dispatcher/runners/providers/PySparkRunnerProvider.scala
index 0e9fea4..7cac602 100644
--- a/frameworks/spark/dispatcher/src/main/scala/org/apache/amaterasu/frameworks/spark/dispatcher/runners/providers/PySparkRunnerProvider.scala
+++ b/frameworks/spark/dispatcher/src/main/scala/org/apache/amaterasu/frameworks/spark/dispatcher/runners/providers/PySparkRunnerProvider.scala
@@ -11,9 +11,14 @@
log.info(s"===> Cluster manager: ${conf.mode}")
command +
//s" $$SPARK_HOME/conf/spark-env.sh" +
- // s" && env PYSPARK_PYTHON=$getVirtualPythonPath" +
+ // s" && env PYSPARK_PYTHON=$getVirtualPythonPath" +
//s" env PYSPARK_DRIVER_PYTHON=$getVirtualPythonPath" + d
- s" && $$SPARK_HOME/bin/spark-submit --master yarn-cluster --conf spark.pyspark.python=$getVirtualPythonPath --files $$SPARK_HOME/conf/hive-site.xml ${actionData.getSrc}"
+ s" && $$SPARK_HOME/bin/spark-submit --master yarn-client " +
+ s"--conf spark.pyspark.virtualenv.enabled=true " +
+ s"--conf spark.pyspark.virtualenv.type=native " +
+ s"--conf spark.pyspark.virtualenv.bin.path=$getVirtualPythonBin " +
+ s"--conf spark.pyspark.python=$getVirtualPythonPath " +
+ s"--files $$SPARK_HOME/conf/hive-site.xml ${actionData.getSrc}"
}
override def getRunnerResources: Array[String] = {
diff --git a/sdk_python/amaterasu/pyspark/runtime.py b/sdk_python/amaterasu/pyspark/runtime.py
index c51382d..a8fbe3c 100644
--- a/sdk_python/amaterasu/pyspark/runtime.py
+++ b/sdk_python/amaterasu/pyspark/runtime.py
@@ -19,7 +19,6 @@
from amaterasu.base import BaseAmaContextBuilder, LoaderAmaContext
from .datasets import DatasetManager
from pip._internal import main as pip_main
-import zipfile
import os
import sys