blob: e6d27aa33694a81a9761261be82e9dcb0e29f103 [file] [log] [blame]
#!/usr/bin/python3
# *****************************************************************************
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# ******************************************************************************
import argparse
import os
from datalab.fab import *
from datalab.meta_lib import *
from fabric import *
from patchwork.files import exists
from patchwork import files
parser = argparse.ArgumentParser()
parser.add_argument('--cluster_name', type=str, default='')
parser.add_argument('--spark_version', type=str, default='')
parser.add_argument('--hadoop_version', type=str, default='')
parser.add_argument('--os_user', type=str, default='')
parser.add_argument('--spark_master', type=str, default='')
parser.add_argument('--keyfile', type=str, default='')
parser.add_argument('--notebook_ip', type=str, default='')
parser.add_argument('--datalake_enabled', type=str, default='false')
parser.add_argument('--spark_master_ip', type=str, default='')
args = parser.parse_args()
def configure_notebook(keyfile, hoststring):
templates_dir = '/root/templates/'
scripts_dir = '/root/scripts/'
conn.run('mkdir -p /tmp/{}/'.format(args.cluster_name))
conn.put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json')
# conn.put(templates_dir + 'pyspark_dataengine_template.json', '/tmp/{}/pyspark_dataengine_template.json'.format(args.cluster_name))
# conn.put(templates_dir + 'notebook_spark-defaults_local.conf', '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name))
spark_master_ip = args.spark_master.split('//')[1].split(':')[0]
# spark_memory = get_spark_memory(True, args.os_user, spark_master_ip, keyfile)
# conn.run('echo "spark.executor.memory {0}m" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(spark_memory, args.cluster_name))
if not exists(conn,'/usr/local/bin/tensor_dataengine_create_configs.py'):
conn.put(scripts_dir + 'tensor_dataengine_create_configs.py', '/tmp/tensor_dataengine_create_configs.py')
conn.sudo('cp -f /tmp/tensor_dataengine_create_configs.py /usr/local/bin/tensor_dataengine_create_configs.py')
conn.sudo('chmod 755 /usr/local/bin/tensor_dataengine_create_configs.py')
if not exists(conn,'/usr/lib/python3.8/datalab/'):
conn.sudo('mkdir -p /usr/lib/python3.8/datalab/')
conn.local('cd /usr/lib/python3.8/datalab/; tar -zcvf /tmp/datalab.tar.gz *')
conn.put('/tmp/datalab.tar.gz', '/tmp/datalab.tar.gz')
conn.sudo('tar -zxvf /tmp/datalab.tar.gz -C /usr/lib/python3.8/datalab/')
conn.sudo('chmod a+x /usr/lib/python3.8/datalab/*')
if exists(conn, '/usr/lib64'):
conn.sudo('mkdir -p /usr/lib64/python3.8')
conn.sudo('ln -fs /usr/lib/python3.8/datalab /usr/lib64/python3.8/datalab')
def install_sparkamagic_kernels(args):
try:
datalab.fab.conn.sudo('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension')
sparkmagic_dir = datalab.fab.conn.sudo(''' bash -l -c 'pip3 show sparkmagic | grep "Location: "' ''').stdout.rstrip("\n\r").split(' ')[1]
datalab.fab.conn.sudo('jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --prefix=/home/{}/.local/'.format(sparkmagic_dir, args.os_user))
datalab.fab.conn.sudo('jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --prefix=/home/{}/.local/'.format(sparkmagic_dir, args.os_user))
#datalab.fab.conn.sudo('jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --prefix=/home/{}/.local/'.format(sparkmagic_dir, args.os_user))
pyspark_kernel_name = 'PySpark (Python-{2} / Spark-{0} ) [{1}]'.format(args.spark_version,
args.cluster_name, os.environ['notebook_python_venv_version'][:3])
datalab.fab.conn.sudo('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
pyspark_kernel_name, args.os_user))
scala_version = datalab.fab.conn.sudo('''bash -l -c 'spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"' ''').stdout.rstrip("\n\r")
spark_kernel_name = 'Spark (Scala-{0} / Spark-{1} ) [{2}]'.format(scala_version, args.spark_version,
args.cluster_name)
datalab.fab.conn.sudo('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format(
spark_kernel_name, args.os_user))
#r_version = datalab.fab.conn.sudo(''' bash -l -c 'R --version | grep -o -P "R version \K.{0,5}"' ''').stdout.rstrip("\n\r")
#sparkr_kernel_name = 'SparkR (R-{0} / Spark-{1} ) [{2}]'.format(r_version, args.spark_version,
# args.cluster_name)
#datalab.fab.conn.sudo('sed -i \'s|SparkR|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkrkernel/kernel.json'.format(
# sparkr_kernel_name, args.os_user))
datalab.fab.conn.sudo('sudo mv -f /home/{0}/.local/share/jupyter/kernels/pysparkkernel '
'/home/{0}/.local/share/jupyter/kernels/pysparkkernel_{1}'.format(args.os_user, args.cluster_name))
datalab.fab.conn.sudo('sudo mv -f /home/{0}/.local/share/jupyter/kernels/sparkkernel '
'/home/{0}/.local/share/jupyter/kernels/sparkkernel_{1}'.format(args.os_user, args.cluster_name))
#datalab.fab.conn.run('sudo mv -f /home/{0}/.local/share/jupyter/kernels/sparkrkernel '
# '/home/{0}/.local/share/jupyter/kernels/sparkrkernel_{1}'.format(args.os_user, args.cluster_name))
datalab.fab.conn.sudo('mkdir -p /home/' + args.os_user + '/.sparkmagic')
datalab.fab.conn.sudo('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json')
spark_master_ip = args.spark_master.split('//')[1].split(':')[0]
datalab.fab.conn.sudo('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format(
spark_master_ip, args.os_user))
datalab.fab.conn.sudo('sudo chown -R {0}:{0} /home/{0}/.sparkmagic/'.format(args.os_user))
except Exception as err:
print(err)
sys.exit(1)
def create_inactivity_log(master_ip, hoststring):
reworked_ip = master_ip.replace('.', '-')
conn.sudo('''bash -l -c "date +%s > /opt/inactivity/{}_inactivity" '''.format(reworked_ip))
if __name__ == "__main__":
global conn
conn = datalab.fab.init_datalab_connection(args.notebook_ip, args.os_user, args.keyfile)
try:
region = os.environ['aws_region']
except:
region = ''
if 'spark_configurations' not in os.environ:
os.environ['spark_configurations'] = '[]'
configure_notebook(args.keyfile, args.notebook_ip)
install_sparkamagic_kernels(args)
create_inactivity_log(args.spark_master_ip, args.notebook_ip)
#conn.sudo('/usr/bin/python3 /usr/local/bin/tensor_dataengine_create_configs.py '
# '--cluster_name {} --spark_version {} --hadoop_version {} --os_user {} --spark_master {} --region {} '
# '--datalake_enabled {} --spark_configurations "{}"'.
# format(args.cluster_name, args.spark_version, args.hadoop_version, args.os_user, args.spark_master, region,
# args.datalake_enabled, os.environ['spark_configurations']))