blob: bb89cd5f4f8ed691ce569d2bdde1deea08e5cf6e [file] [log] [blame]
# *****************************************************************************
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# ******************************************************************************
import csv
import datetime
import json
import logging
import os
import random
import re
import string
import sys
import time
import traceback
import subprocess
from datalab.actions_lib import *
from datalab.common_lib import *
from datalab.meta_lib import *
from fabric import *
from patchwork.files import exists
from patchwork import files
def ensure_python_venv(python_venv_version):
try:
if not exists(conn, '/opt/python/python{}'.format(python_venv_version)):
conn.sudo('wget https://www.python.org/ftp/python/{0}/Python-{0}.tgz -O /tmp/Python-{0}.tgz'.format(
python_venv_version))
conn.sudo('tar zxvf /tmp/Python-{}.tgz -C /tmp/'.format(python_venv_version))
if os.environ['application'] in ('rstudio', 'tensor-rstudio'):
conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} '''
'''--with-zlib-dir=/usr/local/lib/ --with-ensurepip=install --enable-shared' '''.format(
python_venv_version))
conn.sudo(
'''bash -l -c 'echo "export LD_LIBRARY_PATH=/opt/python/python{}/lib" >> /etc/profile' '''.format(
python_venv_version))
else:
conn.sudo(
'''bash -l -c 'cd /tmp/Python-{0} && ./configure --prefix=/opt/python/python{0} '''
'''--with-zlib-dir=/usr/local/lib/ --with-ensurepip=install' '''.format(
python_venv_version))
conn.sudo('''bash -l -c 'cd /tmp/Python-{0} && make altinstall' '''.format(python_venv_version))
conn.sudo('''bash -l -c 'cd /tmp && rm -rf Python-{}' '''.format(python_venv_version))
conn.sudo('''bash -l -c 'virtualenv /opt/python/python{0}' '''.format(python_venv_version))
venv_command = 'source /opt/python/python{}/bin/activate'.format(python_venv_version)
pip_command = '/opt/python/python{0}/bin/pip{1}'.format(python_venv_version, python_venv_version[:3])
conn.sudo('''bash -l -c '{0} && {1} install -U pip=={2}' '''.format(venv_command, pip_command, os.environ['conf_pip_version']))
conn.sudo('''bash -l -c '{0} && {1} install ipython ipykernel --no-cache-dir' '''.format(venv_command, pip_command))
conn.sudo('''bash -l -c '{0} && {1} install NumPy=={2} SciPy Matplotlib pandas Sympy Pillow sklearn --no-cache-dir' '''.format(venv_command, pip_command, os.environ['notebook_numpy_version']))
except Exception as err:
print('Error:', str(err))
sys.exit(1)
def install_venv_pip_pkg(pkg_name, pkg_version = ''):
try:
venv_install_command = 'source /opt/python/python{0}/bin/activate && /opt/python/python{0}/bin/pip{1}'.format(
os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
if pkg_version:
pip_pkg = '{}=={}'.format(pkg_name,pkg_version)
else:
pip_pkg = pkg_name
conn.sudo('''bash -l -c '{0} install {1} --no-cache-dir' '''.format(venv_install_command, pip_pkg))
except Exception as err:
print('Error:', str(err))
sys.exit(1)
def ensure_pip(requisites):
try:
if not exists(conn,'/home/{}/.ensure_dir/pip_path_added'.format(os.environ['conf_os_user'])):
conn.sudo('bash -l -c "echo PATH=$PATH:/usr/local/bin/:/opt/spark/bin/ >> /etc/profile"')
conn.sudo('bash -l -c "echo export PATH >> /etc/profile"')
conn.sudo('pip3 install -UI pip=={} --no-cache-dir'.format(os.environ['conf_pip_version']))
conn.sudo('pip3 install -U setuptools=={}'.format(os.environ['notebook_setuptools_version']))
conn.sudo('pip3 install -UI {} --no-cache-dir'.format(requisites))
conn.sudo('touch /home/{}/.ensure_dir/pip_path_added'.format(os.environ['conf_os_user']))
except:
sys.exit(1)
def dataengine_dir_prepare(cluster_dir):
subprocess.run('mkdir -p ' + cluster_dir, shell=True, check=True)
def install_pip_pkg(requisites, pip_version, lib_group, dataengine_service = False):
status = list()
error_parser = "Could not|No matching|ImportError:|failed|EnvironmentError:|requires|FileNotFoundError:|RuntimeError:|error:"
try:
if dataengine_service:
install_command = pip_version
elif os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning':
install_command = 'conda activate && {}'.format(pip_version)
else:
install_command = 'source /opt/python/python{0}/bin/activate && /opt/python/python{0}/bin/pip{1}'.format(
os.environ['notebook_python_venv_version'], os.environ['notebook_python_venv_version'][:3])
#if pip_version == 'pip3' and not exists(conn, '/bin/pip3'):
# for v in range(4, 8):
# if exists(conn, '/bin/pip3.{}'.format(v)):
# conn.sudo('ln -s /bin/pip3.{} /bin/pip3'.format(v))
#conn.sudo('{} install -U pip=={} setuptools=={}'.format(pip_version, os.environ['conf_pip_version'], os.environ['notebook_setuptools_version']))
#conn.sudo('{} install -U pip=={} --no-cache-dir'.format(pip_version, os.environ['conf_pip_version']))
#conn.sudo('{} install --upgrade pip=={}'.format(pip_version, os.environ['conf_pip_version']))
for pip_pkg in requisites:
name, vers = pip_pkg
if pip_pkg[1] == '' or pip_pkg[1] == 'N/A':
pip_pkg = pip_pkg[0]
version = 'N/A'
else:
version = pip_pkg[1]
pip_pkg = "{}=={}".format(pip_pkg[0], pip_pkg[1])
conn.sudo(
'''bash -l -c '{0} install -U {1} --use-deprecated=legacy-resolver --no-cache-dir 2>&1 | '''
'''tee /tmp/{4}_install_{3}.tmp; if ! grep -w -i -E "({2})" /tmp/{4}_install_{3}.tmp > /tmp/{4}_install_{3}.log; '''
'''then echo "" > /tmp/{4}_install_{3}.log;fi' '''.format(
install_command, pip_pkg, error_parser, name, pip_version))
err = conn.sudo('cat /tmp/{0}_install_{1}.log'.format(pip_version, pip_pkg.split("==")[0])).stdout.replace(
'"', "'").replace('\n', ' ')
conn.sudo(
'''bash -l -c '{0} freeze --all | if ! grep -w -i {1} > /tmp/{2}_install_{1}.list; '''
'''then echo "not_found" > /tmp/{2}_install_{1}.list;fi' '''.format(
install_command, name, pip_version))
res = conn.sudo('''bash -l -c 'cat /tmp/{0}_install_{1}.list' '''.format(pip_version, name)).stdout.replace(
'\n', '')
conn.sudo(
'''bash -l -c 'cat /tmp/{0}_install_{1}.tmp | if ! grep -w -i -E "(Successfully installed|up-to-date)" > '''
'''/tmp/{0}_install_{1}.list; then echo "not_installed" > /tmp/{0}_install_{1}.list;fi' '''.format(
pip_version, name))
installed_out = conn.sudo(
'''bash -l -c 'cat /tmp/{0}_install_{1}.list' '''.format(pip_version, name)).stdout.replace('\n', '')
changed_pip_pkg = False
if 'not_found' in res:
changed_pip_pkg = pip_pkg.split("==")[0].replace("_", "-").split('-')
changed_pip_pkg = changed_pip_pkg[0]
conn.sudo(
'''bash -l -c '{0} freeze --all | if ! grep -w -i {1} > /tmp/{2}_install_{1}.list; then echo "" > '''
'''/tmp/{2}_install_{1}.list;fi' '''.format(
install_command, changed_pip_pkg, pip_version))
res = conn.sudo('cat /tmp/{0}_install_{1}.list'.format(pip_version, changed_pip_pkg)).stdout.replace(
'\n', '')
if err and name not in installed_out:
status_msg = 'installation_error'
if 'ERROR: No matching distribution found for {}'.format(name) in err:
status_msg = 'invalid_name'
elif res:
res = res.lower()
ansi_escape = re.compile(r'\x1b[^m]*m')
ver = ansi_escape.sub('', res).split("\r\n")
if changed_pip_pkg:
version = [i for i in ver if changed_pip_pkg.lower() in i][0].split('==')[1]
else:
version = \
[i for i in ver if pip_pkg.split("==")[0].lower() in i][0].split('==')[1]
status_msg = "installed"
versions = []
if 'Could not find a version that satisfies the requirement' in err and 'ERROR: No matching distribution found for {}=='.format(
name) in err:
versions = err[err.find("(from versions: ") + 16: err.find(") ")]
if versions != '' and versions != 'none':
versions = versions.split(', ')
version = vers
status_msg = 'invalid_version'
else:
versions = []
conn.sudo('cat /tmp/{0}_install_{1}.tmp | if ! grep -w -i -E "Installing collected packages:" > '
'/tmp/{0}_install_{1}.dep; then echo "" > /tmp/{0}_install_{1}.dep;fi'.format(pip_version, name))
dep = conn.sudo('cat /tmp/{0}_install_{1}.dep'.format(pip_version, name)).stdout.replace('\n', '').strip()[31:]
if dep == '':
dep = []
else:
dep = dep.split(', ')
for n, i in enumerate(dep):
if i == name:
dep[n] = ''
else:
conn.sudo('{0} show {1} 2>&1 | if ! grep Version: > '
'/tmp/{0}_install_{1}.log; then echo "" > /tmp/{0}_install_{1}.log;fi'.format(pip_version, i))
dep[n] = conn.sudo('cat /tmp/{0}_install_{1}.log'.format(pip_version, i)).stdout.replace('\n', '').replace('Version: ', '{} v.'.format(i))
dep = [i for i in dep if i]
status.append({"group": lib_group, "name": name, "version": version, "status": status_msg,
"error_message": err, "available_versions": versions, "add_pkgs": dep})
conn.sudo('rm -rf /tmp/*{}*'.format(name))
return status
except Exception as err:
for pip_pkg in requisites:
name, vers = pip_pkg
status.append({"group": lib_group, "name": name, "version": vers, "status": 'installation_error', "error_message": err})
print("Failed to install {} packages: {}".format(pip_version, err))
return status
def id_generator(size=10, chars=string.digits + string.ascii_letters):
return ''.join(random.choice(chars) for _ in range(size))
def ensure_dataengine_tensorflow_jars(jars_dir):
subprocess.run('wget https://dl.bintray.com/spark-packages/maven/tapanalyticstoolkit/spark-tensorflow-connector/1.0.0-s_2.11/spark-tensorflow-connector-1.0.0-s_2.11.jar \
-O {}spark-tensorflow-connector-1.0.0-s_2.11.jar'.format(jars_dir), shell=True, check=True)
def prepare(dataengine_service_dir, yarn_dir):
subprocess.run('mkdir -p ' + dataengine_service_dir, shell=True, check=True)
subprocess.run('mkdir -p ' + yarn_dir, shell=True, check=True)
subprocess.run('sudo mkdir -p /opt/python/', shell=True, check=True)
result = os.path.exists(dataengine_service_dir + 'usr/')
return result
def configuring_notebook(dataengine_service_version):
jars_path = '/opt/' + dataengine_service_version + '/jars/'
subprocess.run("""sudo bash -c "find """ + jars_path + """ -name '*netty*' | xargs rm -f" """, shell=True, check=True)
def append_result(error, exception=''):
ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
if exception:
error_message = "[Error-{}]: {}. Exception: {}".format(st, error, str(exception))
print(error_message)
else:
error_message = "[Error-{}]: {}.".format(st, error)
print(error_message)
with open('/root/result.json', 'a+') as f:
text = f.read()
if len(text) == 0:
res = '{"error": ""}'
with open('/root/result.json', 'w') as f:
f.write(res)
with open("/root/result.json") as f:
data = json.load(f)
data['error'] = data['error'] + error_message
with open("/root/result.json", 'w') as f:
json.dump(data, f)
print(data)
def put_resource_status(resource, status, datalab_path, os_user, hostname):
keyfile = os.environ['conf_key_dir'] + os.environ['conf_key_name'] + ".pem"
init_datalab_connection(hostname, os_user, keyfile)
conn.sudo('python3 ' + datalab_path + 'tmp/resource_status.py --resource {} --status {}'.format(resource, status))
conn.close()
def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version, exploratory_name):
if not exists(conn,'/home/' + os_user + '/.ensure_dir/jupyter_ensured'):
try:
if os.environ['conf_deeplearning_cloud_ami'] == 'false' or os.environ['application'] != 'deeplearning':
conn.sudo('pip3 install notebook=={} --no-cache-dir'.format(jupyter_version))
conn.sudo('pip3 install jupyter --no-cache-dir')
conn.sudo('rm -rf {}'.format(jupyter_conf_file))
conn.run('jupyter notebook --generate-config --config {}'.format(jupyter_conf_file))
conn.run('mkdir -p ~/.jupyter/custom/')
conn.run('echo "#notebook-container { width: auto; }" > ~/.jupyter/custom/custom.css')
conn.sudo('echo "c.NotebookApp.ip = \'0.0.0.0\'" >> {}'.format(jupyter_conf_file))
conn.sudo('echo "c.NotebookApp.base_url = \'/{0}/\'" >> {1}'.format(exploratory_name, jupyter_conf_file))
conn.sudo('echo c.NotebookApp.open_browser = False >> {}'.format(jupyter_conf_file))
conn.sudo('echo \'c.NotebookApp.cookie_secret = b"{0}"\' >> {1}'.format(id_generator(), jupyter_conf_file))
conn.sudo('''echo "c.NotebookApp.token = u''" >> {}'''.format(jupyter_conf_file))
conn.sudo('echo \'c.KernelSpecManager.ensure_native_kernel = False\' >> {}'.format(jupyter_conf_file))
if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning':
conn.sudo(
'''echo "c.NotebookApp.kernel_spec_manager_class = 'environment_kernels.EnvironmentKernelSpecManager'" >> {}'''.format(
jupyter_conf_file))
conn.sudo(
'''echo "c.EnvironmentKernelSpecManager.conda_env_dirs=['/home/ubuntu/anaconda3/envs']" >> {}'''.format(
jupyter_conf_file))
conn.put(templates_dir + 'jupyter-notebook.service', '/tmp/jupyter-notebook.service')
conn.sudo("chmod 644 /tmp/jupyter-notebook.service")
if os.environ['application'] == 'tensor':
conn.sudo("sed -i '/ExecStart/s|-c \"|-c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64; |g' /tmp/jupyter-notebook.service")
elif os.environ['application'] == 'deeplearning' and os.environ['conf_deeplearning_cloud_ami'] == 'false':
conn.sudo("sed -i '/ExecStart/s|-c \"|-c \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/cudnn/lib64:/usr/local/cuda/lib64:/usr/lib64/openmpi/lib: ; export PYTHONPATH=/home/" + os_user +
"/caffe/python:/home/" + os_user + "/pytorch/build:$PYTHONPATH ; |g' /tmp/jupyter-notebook.service")
conn.sudo("sed -i 's|CONF_PATH|{}|' /tmp/jupyter-notebook.service".format(jupyter_conf_file))
conn.sudo("sed -i 's|OS_USR|{}|' /tmp/jupyter-notebook.service".format(os_user))
http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
https_proxy = conn.run('''bash -l -c 'echo $https_proxy' ''').stdout.replace('\n','')
#sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
# http_proxy))
#sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\' /tmp/jupyter-notebook.service'.format(
# https_proxy))
java_home = conn.run("update-alternatives --query java | grep -o --color=never \'/.*/java-8.*/jre\'").stdout.splitlines()[0]
conn.sudo('sed -i \'/\[Service\]/ a\Environment=\"JAVA_HOME={}\"\' /tmp/jupyter-notebook.service'.format(
java_home))
conn.sudo('\cp /tmp/jupyter-notebook.service /etc/systemd/system/jupyter-notebook.service')
conn.sudo('chown -R {0}:{0} /home/{0}/.local'.format(os_user))
conn.sudo('mkdir -p /mnt/var')
conn.sudo('chown {0}:{0} /mnt/var'.format(os_user))
if os.environ['application'] == 'jupyter' or os.environ['application'] == 'deeplearning':
try:
conn.sudo('jupyter-kernelspec remove -f python3 || echo "Such kernel doesnt exists"')
conn.sudo('jupyter-kernelspec remove -f python2 || echo "Such kernel doesnt exists"')
except Exception as err:
print('Error:', str(err))
conn.sudo("systemctl daemon-reload")
conn.sudo("systemctl enable jupyter-notebook")
conn.sudo("systemctl start jupyter-notebook")
conn.sudo('touch /home/{}/.ensure_dir/jupyter_ensured'.format(os_user))
except:
sys.exit(1)
else:
try:
conn.sudo(
'sed -i "s/c.NotebookApp.base_url =.*/c.NotebookApp.base_url = \'\/{0}\/\'/" {1}'.format(exploratory_name, jupyter_conf_file))
conn.sudo("systemctl restart jupyter-notebook")
except Exception as err:
print('Error:', str(err))
sys.exit(1)
def remove_unexisting_kernel(os_user):
if not exists(conn,'/home/{}/.ensure_dir/unexisting_kernel_removed'.format(os_user)):
try:
conn.sudo('jupyter-kernelspec remove -f python3')
conn.sudo('touch /home/{}/.ensure_dir/unexisting_kernel_removed'.format(os_user))
except Exception as err:
print('Error:', str(err))
sys.exit(1)
def configure_docker(os_user):
try:
if not exists(conn,'/home/' + os_user + '/.ensure_dir/docker_ensured'):
docker_version = os.environ['ssn_docker_version']
conn.sudo('curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -')
conn.sudo('add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) \
stable"')
#datalab.common_lib.manage_pkg('update', 'remote', '')
conn.sudo('apt-get update')
conn.sudo('apt-cache policy docker-ce')
#datalab.common_lib.manage_pkg('-y install', 'remote', 'docker-ce=5:{}~3-0~ubuntu-focal'.format(docker_version))
conn.sudo('apt-get install -y docker-ce=5:{}~3-0~ubuntu-focal'.format(docker_version))
conn.sudo('touch /home/{}/.ensure_dir/docker_ensured'.format(os_user))
except Exception as err:
print('Failed to configure Docker:', str(err))
sys.exit(1)
def ensure_jupyterlab_files(os_user, jupyterlab_dir, jupyterlab_image, jupyter_conf_file, jupyterlab_conf_file, exploratory_name, edge_ip):
if not exists(conn,jupyterlab_dir):
try:
conn.sudo('mkdir {}'.format(jupyterlab_dir))
# conn.put(templates_dir + 'pyspark_local_template.json', '/tmp/pyspark_local_template.json')
# conn.put(templates_dir + 'py3spark_local_template.json', '/tmp/py3spark_local_template.json')
conn.put('/root/Dockerfile_jupyterlab', '/tmp/Dockerfile_jupyterlab')
conn.put('/root/scripts/jupyterlab_run.sh', '/tmp/jupyterlab_run.sh')
conn.put('/root/scripts/build.sh', '/tmp/build.sh')
conn.put('/root/scripts/start.sh', '/tmp/start.sh')
# conn.sudo('\cp /tmp/pyspark_local_template.json ' + jupyterlab_dir + 'pyspark_local_template.json')
# conn.sudo('\cp /tmp/py3spark_local_template.json ' + jupyterlab_dir + 'py3spark_local_template.json')
# conn.sudo('sed -i \'s/3.5/3.6/g\' {}py3spark_local_template.json'.format(jupyterlab_dir))
conn.sudo('mv /tmp/jupyterlab_run.sh {}jupyterlab_run.sh'.format(jupyterlab_dir))
conn.sudo('mv /tmp/Dockerfile_jupyterlab {}Dockerfile_jupyterlab'.format(jupyterlab_dir))
conn.sudo('mv /tmp/build.sh {}build.sh'.format(jupyterlab_dir))
conn.sudo('mv /tmp/start.sh {}start.sh'.format(jupyterlab_dir))
# conn.sudo('sed -i \'s/nb_user/{}/g\' {}Dockerfile_jupyterlab'.format(os_user, jupyterlab_dir))
conn.sudo('sed -i \'s/jupyterlab_image/{}/g\' {}Dockerfile_jupyterlab'.format(jupyterlab_image, jupyterlab_dir))
conn.sudo('sed -i \'s/nb_user/{}/g\' {}start.sh'.format(os_user, jupyterlab_dir))
# conn.sudo('sed -i \'s/jup_version/{}/g\' {}Dockerfile_jupyterlab'.format(jupyter_version, jupyterlab_dir))
# conn.sudo('sed -i \'s/hadoop_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_hadoop_version'], jupyterlab_dir))
# conn.sudo('sed -i \'s/tornado_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_tornado_version'], jupyterlab_dir))
# conn.sudo('sed -i \'s/matplotlib_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_matplotlib_version'], jupyterlab_dir))
# conn.sudo('sed -i \'s/numpy_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_numpy_version'], jupyterlab_dir))
# conn.sudo('sed -i \'s/spark_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_spark_version'], jupyterlab_dir))
# conn.sudo('sed -i \'s/scala_version/{}/g\' {}Dockerfile_jupyterlab'.format(os.environ['notebook_scala_version'], jupyterlab_dir))
conn.sudo('sed -i \'s/CONF_PATH/{}/g\' {}jupyterlab_run.sh'.format(jupyterlab_conf_file, jupyterlab_dir))
conn.sudo('touch {}'.format(jupyter_conf_file))
conn.sudo('''bash -l -c "echo 'c.NotebookApp.ip = \\"0.0.0.0\\" ' >> {}" '''.format(jupyter_conf_file))
conn.sudo('''bash -l -c "echo 'c.NotebookApp.base_url = \\"/{0}/\\"' >> {1}" '''.format(exploratory_name, jupyter_conf_file))
conn.sudo('''bash -l -c 'echo "c.NotebookApp.open_browser = False" >> {}' '''.format(jupyter_conf_file))
conn.sudo('''bash -l -c "echo 'c.NotebookApp.cookie_secret = b\\"{0}\\"' >> {1}" '''.format(id_generator(), jupyter_conf_file))
conn.sudo('''bash -l -c "echo \\"c.NotebookApp.token = u''\\" >> {}" '''.format(jupyter_conf_file))
conn.sudo('''bash -l -c 'echo "c.KernelSpecManager.ensure_native_kernel = False" >> {}' '''.format(jupyter_conf_file))
conn.sudo('chown datalab-user:datalab-user /opt')
conn.sudo('''bash -l -c 'echo -e "Host git.epam.com\n HostName git.epam.com\n ProxyCommand nc -X connect -x {}:3128 %h %p\n" > /home/{}/.ssh/config' '''.format(
edge_ip, os_user))
conn.sudo('''bash -l -c 'echo -e "Host github.com\n HostName github.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(edge_ip, os_user))
# conn.sudo('touch {}'.format(spark_script))
# conn.sudo('echo "#!/bin/bash" >> {}'.format(spark_script))
# conn.sudo(
# 'echo "PYJ=\`find /opt/spark/ -name \'*py4j*.zip\' | tr \'\\n\' \':\' | sed \'s|:$||g\'\`; sed -i \'s|PY4J|\'$PYJ\'|g\' /tmp/pyspark_local_template.json" >> {}'.format(
# spark_script))
# conn.sudo(
# 'echo "sed -i \'14s/:",/:\\/home\\/datalab-user\\/caffe\\/python:\\/home\\/datalab-user\\/pytorch\\/build:",/\' /tmp/pyspark_local_template.json" >> {}'.format(
# spark_script))
# conn.sudo('echo \'sed -i "s|SP_VER|{}|g" /tmp/pyspark_local_template.json\' >> {}'.format(os.environ['notebook_spark_version'], spark_script))
# conn.sudo(
# 'echo "PYJ=\`find /opt/spark/ -name \'*py4j*.zip\' | tr \'\\n\' \':\' | sed \'s|:$||g\'\`; sed -i \'s|PY4J|\'$PYJ\'|g\' /tmp/py3spark_local_template.json" >> {}'.format(
# spark_script))
# conn.sudo(
# 'echo "sed -i \'14s/:",/:\\/home\\/datalab-user\\/caffe\\/python:\\/home\\/datalab-user\\/pytorch\\/build:",/\' /tmp/py3spark_local_template.json" >> {}'.format(
# spark_script))
# conn.sudo('echo \'sed -i "s|SP_VER|{}|g" /tmp/py3spark_local_template.json\' >> {}'.format(os.environ['notebook_spark_version'], spark_script))
# conn.sudo('echo "cp /tmp/pyspark_local_template.json /home/{}/.local/share/jupyter/kernels/pyspark_local/kernel.json" >> {}'.format(os_user, spark_script))
# conn.sudo(
# 'echo "cp /tmp/py3spark_local_template.json /home/{}/.local/share/jupyter/kernels/py3spark_local/kernel.json" >> {}'.format(
# os_user, spark_script))
# conn.sudo('git clone https://github.com/legion-platform/legion.git')
# conn.sudo('cp {}sdk/Pipfile {}sdk_Pipfile'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp {}sdk/Pipfile.lock {}sdk_Pipfile.lock'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp {}toolchains/python/Pipfile {}toolchains_Pipfile'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp {}toolchains/python/Pipfile.lock {}toolchains_Pipfile.lock'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp {}cli/Pipfile {}cli_Pipfile'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp {}cli/Pipfile.lock {}cli_Pipfile.lock'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp -r {}sdk {}sdk'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp -r {}toolchains/python {}toolchains_python'.format(legion_dir, jupyterlab_dir))
# conn.sudo('cp -r {}cli {}cli'.format(legion_dir, jupyterlab_dir))
except Exception as err:
print('Error:', str(err))
sys.exit(1)
else:
try:
conn.sudo(
'sed -i "s/c.NotebookApp.base_url =.*/c.NotebookApp.base_url = \'\/{0}\/\'/" {1}'.format(
exploratory_name, jupyter_conf_file))
except Exception as err:
print('Error:', str(err))
sys.exit(1)
def ensure_pyspark_local_kernel(os_user, pyspark_local_path_dir, templates_dir, spark_version):
if not exists(conn,'/home/' + os_user + '/.ensure_dir/pyspark_local_kernel_ensured'):
try:
conn.sudo('mkdir -p ' + pyspark_local_path_dir)
conn.sudo('touch ' + pyspark_local_path_dir + 'kernel.json')
conn.put(templates_dir + 'pyspark_local_template.json', '/tmp/pyspark_local_template.json')
conn.sudo('''bash -l -c "PYJ=`find /opt/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; sed -i 's|PY4J|'$PYJ'|g' /tmp/pyspark_local_template.json" ''')
conn.sudo('sed -i "s|SP_VER|' + spark_version + '|g" /tmp/pyspark_local_template.json')
conn.sudo('sed -i \'/PYTHONPATH\"\:/s|\(.*\)"|\\1/home/{0}/caffe/python:/home/{0}/pytorch/build:"|\' /tmp/pyspark_local_template.json'.format(os_user))
conn.sudo('\cp /tmp/pyspark_local_template.json ' + pyspark_local_path_dir + 'kernel.json')
conn.sudo('touch /home/' + os_user + '/.ensure_dir/pyspark_local_kernel_ensured')
except:
sys.exit(1)
def ensure_py3spark_local_kernel(os_user, py3spark_local_path_dir, templates_dir, spark_version, python_venv_path, python_venv_version):
if not exists(conn,'/home/' + os_user + '/.ensure_dir/py3spark_local_kernel_ensured'):
try:
conn.sudo('mkdir -p ' + py3spark_local_path_dir)
conn.sudo('touch ' + py3spark_local_path_dir + 'kernel.json')
conn.put(templates_dir + 'py3spark_local_template.json', '/tmp/py3spark_local_template.json')
conn.sudo(
'''bash -l -c "PYJ=`find /opt/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; sed -i 's|PY4J|'$PYJ'|g' /tmp/py3spark_local_template.json" ''')
conn.sudo('sed -i "s|PYTHON_VENV_PATH|' + python_venv_path + '|g" /tmp/py3spark_local_template.json')
conn.sudo('sed -i "s|PYTHON_VENV_VERSION|' + python_venv_version + '|g" /tmp/py3spark_local_template.json')
conn.sudo('sed -i "s|PYTHON_VENV_SHORT_VERSION|' + python_venv_version[:3] + '|g" /tmp/py3spark_local_template.json')
conn.sudo('sed -i "s|SP_VER|' + spark_version + '|g" /tmp/py3spark_local_template.json')
conn.sudo('sed -i \'/PYTHONPATH\"\:/s|\(.*\)"|\\1/home/{0}/caffe/python:/home/{0}/pytorch/build:"|\' /tmp/py3spark_local_template.json'.format(os_user))
conn.sudo('\cp /tmp/py3spark_local_template.json ' + py3spark_local_path_dir + 'kernel.json')
conn.sudo('touch /home/' + os_user + '/.ensure_dir/py3spark_local_kernel_ensured')
except:
sys.exit(1)
def pyspark_kernel(kernels_dir, dataengine_service_version, cluster_name, spark_version, bucket, user_name, region, os_user='',
application='', pip_mirror='', numpy_version='1.14.3'):
spark_path = '/opt/{0}/{1}/spark/'.format(dataengine_service_version, cluster_name)
subprocess.run('mkdir -p {0}pyspark_{1}/'.format(kernels_dir, cluster_name), shell=True, check=True)
kernel_path = '{0}pyspark_{1}/kernel.json'.format(kernels_dir, cluster_name)
template_file = "/tmp/pyspark_dataengine-service_template.json"
with open(template_file, 'r') as f:
text = f.read()
text = text.replace('CLUSTER_NAME', cluster_name)
text = text.replace('SPARK_VERSION', 'Spark-' + spark_version)
text = text.replace('SPARK_PATH', spark_path)
text = text.replace('PYTHON_SHORT_VERSION', '3.8')
text = text.replace('PYTHON_FULL_VERSION', '3.8')
text = text.replace('PYTHON_PATH', '/usr/bin/python3.8')
text = text.replace('DATAENGINE-SERVICE_VERSION', dataengine_service_version)
with open(kernel_path, 'w') as f:
f.write(text)
subprocess.run('touch /tmp/kernel_var.json', shell=True, check=True)
subprocess.run('''bash -l -c "PYJ=`find /opt/{0}/{1}/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; cat {2} | sed 's|PY4J|'$PYJ'|g' | sed \'/PYTHONPATH\"\:/s|\(.*\)\"|\\1/home/{3}/caffe/python:/home/{3}/pytorch/build:\"|\' > /tmp/kernel_var.json" '''.
format(dataengine_service_version, cluster_name, kernel_path, os_user), shell=True, check=True)
subprocess.run('sudo mv /tmp/kernel_var.json ' + kernel_path, shell=True, check=True)
get_cluster_python_version(region, bucket, user_name, cluster_name)
with open('/tmp/python_version') as f:
python_version = f.read()
if python_version != '\n':
installing_python(region, bucket, user_name, cluster_name, application, pip_mirror, numpy_version)
subprocess.run('mkdir -p {0}py3spark_{1}/'.format(kernels_dir, cluster_name), shell=True, check=True)
kernel_path = '{0}py3spark_{1}/kernel.json'.format(kernels_dir, cluster_name)
template_file = "/tmp/pyspark_dataengine-service_template.json"
with open(template_file, 'r') as f:
text = f.read()
text = text.replace('CLUSTER_NAME', cluster_name)
text = text.replace('SPARK_VERSION', 'Spark-' + spark_version)
text = text.replace('SPARK_PATH', spark_path)
text = text.replace('PYTHON_SHORT_VERSION', python_version[0:3])
text = text.replace('PYTHON_FULL_VERSION', python_version[0:3])
text = text.replace('PYTHON_PATH', '/opt/python/python' + python_version[:5] + '/bin/python' +
python_version[:3])
text = text.replace('DATAENGINE-SERVICE_VERSION', dataengine_service_version)
with open(kernel_path, 'w') as f:
f.write(text)
subprocess.run('touch /tmp/kernel_var.json', shell=True, check=True)
subprocess.run('''bash -l -c "PYJ=`find /opt/{0}/{1}/spark/ -name '*py4j*.zip' | tr '\\n' ':' | sed 's|:$||g'`; cat {2} | sed 's|PY4J|'$PYJ'|g' | sed \'/PYTHONPATH\"\:/s|\(.*\)\"|\\1/home/{3}/caffe/python:/home/{3}/pytorch/build:\"|\' > /tmp/kernel_var.json" '''
.format(dataengine_service_version, cluster_name, kernel_path, os_user), shell=True, check=True)
subprocess.run('sudo mv /tmp/kernel_var.json {}'.format(kernel_path), shell=True, check=True)
def ensure_ciphers():
try:
conn.sudo('''bash -c "echo -e '\nKexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256' >> /etc/ssh/sshd_config"''')
conn.sudo('''bash -c "echo -e 'Ciphers aes256-gcm@openssh.com,aes128-gcm@openssh.com,chacha20-poly1305@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr' >> /etc/ssh/sshd_config"''')
conn.sudo('''bash -c "echo -e '\tKexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256' >> /etc/ssh/ssh_config"''')
conn.sudo('''bash -c "echo -e '\tCiphers aes256-gcm@openssh.com,aes128-gcm@openssh.com,chacha20-poly1305@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr' >> /etc/ssh/ssh_config"''')
try:
conn.sudo('service ssh reload')
except:
conn.sudo('service sshd reload')
except Exception as err:
traceback.print_exc()
print('Failed to ensure ciphers: ', str(err))
sys.exit(1)
def ensure_dataengine_service_devtools():
try:
if not exists(conn, '/home/{}/dataengine-service-devtools-ensured'.format(os.environ['conf_os_user'])):
if os.environ['conf_cloud_provider'] in 'aws':
manage_pkg('-y install', 'remote', 'libcurl libcurl-devel')
elif (os.environ['conf_cloud_provider'] in 'gcp') and (
'-w-' in conn.sudo('hostname').stdout.replace('\n', '')):
# manage_pkg('-y build-dep', 'remote', 'libcurl4-gnutls-dev libxml2-dev')
manage_pkg('-y install', 'remote', 'libxml2-dev libcurl4-openssl-dev pkg-config')
conn.sudo('R -e "install.packages(\'devtools\', repos = \'cloud.r-project.org\')"')
if (os.environ['conf_cloud_provider'] in 'gcp') and (
"R_LIBS_SITE" not in conn.sudo('cat /opt/conda/miniconda3/lib/R/etc/Renviron').stdout):
conn.sudo(
'''bash -l -c 'echo "R_LIBS_SITE=${R_LIBS_SITE-'/usr/local/lib/R/site-library:/usr/lib/R/site-library:/usr/lib/R/library'}" >> /opt/conda/miniconda3/lib/R/etc/Renviron' ''')
conn.sudo('touch /home/{}/dataengine-service-devtools-ensured'.format(os.environ['conf_os_user']))
except Exception as err:
print('Failed to ensure devtools for dataproc with err: {}'.format(err))
sys.exit(1)
def install_r_pkg(requisites):
status = list()
error_parser = "ERROR:|error:|Cannot|failed|Please run|requires|Error|Skipping|couldn't find"
if os.environ['conf_resource'] == 'dataengine-service':
ensure_dataengine_service_devtools()
try:
for r_pkg in requisites:
name, vers = r_pkg
version = vers
if vers =='N/A':
vers = ''
else:
vers = '"{}"'.format(vers)
if name == 'sparklyr':
conn.run('sudo -i R -e \'devtools::install_version("{0}", version = {1}, repos = "http://cran.us.r-project.org", '
'dependencies = NA)\' 2>&1 | tee /tmp/install_{0}.tmp; if ! grep -w -E "({2})" /tmp/install_{0}.tmp '
'> /tmp/install_{0}.log; then echo "" > /tmp/install_{0}.log;fi'.format(name, vers, error_parser))
else:
conn.sudo('R -e \'devtools::install_version("{0}", version = {1}, repos = "https://cloud.r-project.org", '
'dependencies = NA)\' 2>&1 | tee /tmp/install_{0}.tmp; if ! grep -w -E "({2})" /tmp/install_{0}.tmp > '
'/tmp/install_{0}.log; then echo "" > /tmp/install_{0}.log;fi'.format(name, vers, error_parser))
dep = conn.sudo('grep "(NA.*->". /tmp/install_' + name + '.tmp | awk \'{print $1}\'').stdout.replace('\n', ' ')
dep_ver = conn.sudo('grep "(NA.*->". /tmp/install_' + name + '.tmp | awk \'{print $4}\'').stdout.replace('\n', ' ').replace(')', '').split(' ')
if dep == '':
dep = []
else:
dep = dep.split(' ')
for n, i in enumerate(dep):
if i == name:
dep[n] = ''
else:
dep[n] = '{} v.{}'.format(dep[n], dep_ver[n])
dep = [i for i in dep if i]
conn.sudo('hostname')
err = conn.sudo('cat /tmp/install_{0}.log'.format(name)).stdout.replace('"', "'").replace('\n', '')
conn.sudo('R -e \'installed.packages()[,c(3:4)]\' | if ! grep -w {0} > /tmp/install_{0}.list; then echo "" > /tmp/install_{0}.list;fi'.format(name))
res = conn.sudo('cat /tmp/install_{0}.list'.format(name)).stdout.replace('\n', '')
if err:
status_msg = 'installation_error'
if 'couldn\'t find package \'{}\''.format(name) in err:
status_msg = 'invalid_name'
elif res:
ansi_escape = re.compile(r'\x1b[^m]*m')
version = ansi_escape.sub('', res).split("\n")[0].split('"')[1]
status_msg = 'installed'
if 'Error in download_version_url(package, version, repos, type) :' in err or 'Error in parse_deps(paste(spec,' in err:
conn.sudo('R -e \'install.packages("versions", repos="https://cloud.r-project.org", dep=TRUE)\'')
versions = conn.sudo('R -e \'library(versions); available.versions("' + name + '")\' 2>&1 | grep -A 50 '
'\'date available\' | awk \'{print $2}\'').stdout.strip().replace('\n', ' ')[5:].split(' ')
if versions != ['']:
status_msg = 'invalid_version'
else:
versions = []
else:
versions = []
status.append({"group": "r_pkg", "name": name, "version": version, "status": status_msg, "error_message": err, "available_versions": versions, "add_pkgs": dep})
conn.sudo('rm /tmp/*{}*'.format(name))
return status
except Exception as err:
for r_pkg in requisites:
name, vers = r_pkg
status.append(
{"group": "r_pkg", "name": name, "version": vers, "status": 'installation_error', "error_message": err})
print("Failed to install R packages")
return status
def update_spark_jars(jars_dir='/opt/jars'):
try:
configs = conn.sudo('find /opt/ /etc/ /usr/lib/ -name spark-defaults.conf -type f').stdout.split('\n')
if exists(conn, jars_dir):
for conf in filter(None, configs):
des_path = ''
all_jars = conn.sudo('find {0} -name "*.jar"'.format(jars_dir)).stdout.split('\n')
if ('-des-' in conf):
des_path = '/'.join(conf.split('/')[:3])
all_jars = find_des_jars(all_jars, des_path)
conn.sudo('''sed -i '/^# Generated\|^spark.jars/d' {0}'''.format(conf))
conn.sudo(''' bash -l -c 'echo "# Generated spark.jars by DataLab from {0}\nspark.jars {1}" >> {2}' '''
.format(','.join(filter(None, [jars_dir, des_path])), ','.join(all_jars), conf))
# conn.sudo("sed -i 's/^[[:space:]]*//' {0}".format(conf))
else:
print("Can't find directory {0} with jar files".format(jars_dir))
except Exception as err:
append_result("Failed to update spark.jars parameter", str(err))
print("Failed to update spark.jars parameter")
sys.exit(1)
def install_java_pkg(requisites):
status = list()
error_parser = "ERROR|error|No such|no such|Please run|requires|module not found|Exception"
templates_dir = '/root/templates/'
ivy_dir = '/opt/ivy'
ivy_cache_dir = '{0}/cache/'.format(ivy_dir)
ivy_settings = 'ivysettings.xml'
dest_dir = '/opt/jars/java'
try:
ivy_jar = conn.sudo('find /opt /usr -name "*ivy-{0}.jar" | head -n 1'.format(os.environ['notebook_ivy_version'])).stdout.replace('\n','')
conn.sudo('mkdir -p {0} {1}'.format(ivy_dir, dest_dir))
conn.put('{0}{1}'.format(templates_dir, ivy_settings), '/tmp/{}'.format(ivy_settings))
conn.sudo('cp -f /tmp/{1} {0}/{1}'.format(ivy_dir, ivy_settings))
proxy_string = conn.sudo('cat /etc/profile | grep http_proxy | cut -f2 -d"="').stdout.replace('\n','')
proxy_re = '(?P<proto>http.*)://(?P<host>[^:/ ]+):(?P<port>[0-9]*)'
proxy_find = re.search(proxy_re, proxy_string)
java_proxy = "export _JAVA_OPTIONS='-Dhttp.proxyHost={0} -Dhttp.proxyPort={1} \
-Dhttps.proxyHost={0} -Dhttps.proxyPort={1}'".format(proxy_find.group('host'), proxy_find.group('port'))
for java_pkg in requisites:
conn.sudo('rm -rf {0}'.format(ivy_cache_dir))
conn.sudo('mkdir -p {0}'.format(ivy_cache_dir))
group, artifact, version, override = java_pkg
print("Installing package (override: {3}): {0}:{1}:{2}".format(group, artifact, version, override))
conn.sudo('''bash -c "{8}; java -jar {0} -settings {1}/{2} -cache {3} -dependency {4} {5} {6} 2>&1 | tee /tmp/install_{5}.tmp; if ! grep -w -E \\"({7})\\" /tmp/install_{5}.tmp > /tmp/install_{5}.log; then echo \\"\\" > /tmp/install_{5}.log;fi" '''.format(ivy_jar, ivy_dir, ivy_settings, ivy_cache_dir, group, artifact, version, error_parser, java_proxy))
err = conn.sudo('cat /tmp/install_{0}.log'.format(artifact)).stdout.replace('"', "'").strip()
conn.sudo('find {0} -name "{1}*.jar" | head -n 1 | rev | cut -f1 -d "/" | rev | \
if ! grep -w -i {1} > /tmp/install_{1}.list; then echo "" > /tmp/install_{1}.list;fi'.format(ivy_cache_dir, artifact))
res = conn.sudo('cat /tmp/install_{0}.list'.format(artifact)).stdout.replace('\n','')
if res:
conn.sudo('cp -f $(find {0} -name "*.jar" | xargs) {1}'.format(ivy_cache_dir, dest_dir))
status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "version": version, "status": "installed"})
else:
status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "status": "installation_error", "error_message": err})
update_spark_jars()
conn.sudo('rm -rf /tmp/*{}*'.format(artifact))
return status
except Exception as err:
for java_pkg in requisites:
group, artifact, version, override = java_pkg
status.append({"group": "java", "name": "{0}:{1}".format(group, artifact), "status": "installation_error",
"error_message": err})
print("Failed to install {} packages".format(requisites))
return status
def get_available_r_pkgs():
try:
r_pkgs = dict()
conn.sudo('R -e \'write.table(available.packages(contriburl="https://cloud.r-project.org/src/contrib"), file="/tmp/r.csv", row.names=F, col.names=F, sep=",")\'')
conn.get("/tmp/r.csv", "r.csv")
with open('r.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
r_pkgs[row[0]] = row[1]
return r_pkgs
except Exception as err:
print("Failed to install {} ".format(err))
sys.exit(1)
def ensure_toree_local_kernel(os_user, toree_link, scala_kernel_path, files_dir, scala_version, spark_version):
if not exists(conn,'/home/' + os_user + '/.ensure_dir/toree_local_kernel_ensured'):
try:
conn.sudo('pip install ' + toree_link + ' --no-cache-dir')
conn.sudo('ln -s /opt/spark/ /usr/local/spark')
conn.sudo('jupyter toree install')
conn.sudo('mv ' + scala_kernel_path + 'lib/* /tmp/')
conn.put(files_dir + 'toree-assembly-0.5.0.jar', '/tmp/toree-assembly-0.5.0.jar')
conn.sudo('mv /tmp/toree-assembly-0.5.0.jar ' + scala_kernel_path + 'lib/')
conn.sudo(
'sed -i "s|Apache Toree - Scala|Local Apache Toree - Scala (Scala-' + scala_version +
', Spark-' + spark_version + ')|g" ' + scala_kernel_path + 'kernel.json')
conn.sudo('touch /home/' + os_user + '/.ensure_dir/toree_local_kernel_ensured')
except:
sys.exit(1)
def install_ungit(os_user, notebook_name, edge_ip):
if not exists(conn,'/home/{}/.ensure_dir/ungit_ensured'.format(os_user)):
try:
manage_npm_pkg('npm -g install ungit@{}'.format(os.environ['notebook_ungit_version']))
conn.put('/root/templates/ungit.service', '/tmp/ungit.service')
conn.sudo("sed -i 's|OS_USR|{}|' /tmp/ungit.service".format(os_user))
http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
conn.sudo("sed -i 's|PROXY_HOST|{}|g' /tmp/ungit.service".format(http_proxy))
conn.sudo("sed -i 's|NOTEBOOK_NAME|{}|' /tmp/ungit.service".format(
notebook_name))
conn.sudo("mv -f /tmp/ungit.service /etc/systemd/system/ungit.service")
conn.run('git config --global user.name "Example User"')
conn.run('git config --global user.email "example@example.com"')
conn.run('mkdir -p ~/.git/templates/hooks')
conn.put('/root/scripts/git_pre_commit.py', '/home/{}/.git/templates/hooks/pre-commit'.format(os_user))
conn.sudo('chmod 755 ~/.git/templates/hooks/pre-commit')
conn.run('git config --global init.templatedir ~/.git/templates')
conn.run('touch ~/.gitignore')
conn.run('git config --global core.excludesfile ~/.gitignore')
conn.run('echo ".ipynb_checkpoints/" >> ~/.gitignore')
conn.run('echo "spark-warehouse/" >> ~/.gitignore')
conn.run('echo "metastore_db/" >> ~/.gitignore')
conn.run('echo "derby.log" >> ~/.gitignore')
conn.sudo('''bash -l -c 'echo -e "Host git.epam.com\n HostName git.epam.com\n ProxyCommand nc -X connect -x {}:3128 %h %p\n" > /home/{}/.ssh/config' '''.format(
edge_ip, os_user))
conn.sudo('''bash -l -c 'echo -e "Host github.com\n HostName github.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(
edge_ip, os_user))
conn.sudo('''bash -l -c 'echo -e "Host gitlab.com\n HostName gitlab.com\n ProxyCommand nc -X connect -x {}:3128 %h %p" >> /home/{}/.ssh/config' '''.format(
edge_ip, os_user))
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl enable ungit.service')
conn.sudo('systemctl start ungit.service')
conn.sudo('touch /home/{}/.ensure_dir/ungit_ensured'.format(os_user))
except:
sys.exit(1)
else:
try:
conn.sudo("sed -i 's|--rootPath=/.*-ungit|--rootPath=/{}-ungit|' /etc/systemd/system/ungit.service".format(
notebook_name))
http_proxy = conn.run('''bash -l -c 'echo $http_proxy' ''').stdout.replace('\n','')
conn.sudo("sed -i 's|HTTPS_PROXY=.*3128|HTTPS_PROXY={}|g' /etc/systemd/system/ungit.service".format(http_proxy))
conn.sudo("sed -i 's|HTTP_PROXY=.*3128|HTTP_PROXY={}|g' /etc/systemd/system/ungit.service".format(http_proxy))
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl restart ungit.service')
except:
sys.exit(1)
conn.run('''bash -l -c 'git config --global http.proxy $http_proxy' ''')
conn.run('''bash -l -c 'git config --global https.proxy $https_proxy' ''')
def install_inactivity_checker(os_user, ip_address, rstudio=False):
if not exists(conn,'/home/{}/.ensure_dir/inactivity_ensured'.format(os_user)):
try:
if not exists(conn,'/opt/inactivity'):
conn.sudo('mkdir /opt/inactivity')
conn.put('/root/templates/inactive.service', '/tmp/inactive.service')
conn.sudo('cp /tmp/inactive.service /etc/systemd/system/inactive.service')
conn.put('/root/templates/inactive.timer', '/tmp/inactive.timer')
conn.sudo('cp /tmp/inactive.timer /etc/systemd/system/inactive.timer')
if rstudio:
conn.put('/root/templates/inactive_rs.sh', '/tmp/inactive.sh')
conn.sudo('cp /tmp/inactive.sh /opt/inactivity/inactive.sh')
else:
conn.put('/root/templates/inactive.sh', '/tmp/inactive.sh')
conn.sudo('cp /tmp/inactive.sh /opt/inactivity/inactive.sh')
conn.sudo("sed -i 's|IP_ADRESS|{}|g' /opt/inactivity/inactive.sh".format(ip_address))
conn.sudo("chmod 755 /opt/inactivity/inactive.sh")
conn.sudo("chown root:root /etc/systemd/system/inactive.service")
conn.sudo("chown root:root /etc/systemd/system/inactive.timer")
conn.sudo('''bash -l -c "date +%s > /opt/inactivity/local_inactivity" ''')
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl enable inactive.timer')
conn.sudo('systemctl start inactive.timer')
conn.sudo('touch /home/{}/.ensure_dir/inactive_ensured'.format(os_user))
except Exception as err:
print('Failed to setup inactivity check service!', str(err))
sys.exit(1)
def set_git_proxy(os_user, hostname, keyfile, proxy_host):
init_datalab_connection(hostname, os_user, keyfile)
conn.run('git config --global http.proxy {}'.format(proxy_host))
conn.run('git config --global https.proxy {}'.format(proxy_host))
conn.close()
def set_mongo_parameters(client, mongo_parameters):
for i in mongo_parameters:
client.datalabdb.settings.insert_one({"_id": i, "value": mongo_parameters[i]})
def install_r_packages(os_user):
if not exists(conn,'/home/' + os_user + '/.ensure_dir/r_packages_ensured'):
conn.sudo('R -e "install.packages(\'devtools\', repos = \'https://cloud.r-project.org\')"')
conn.sudo('R -e "install.packages(\'knitr\', repos = \'https://cloud.r-project.org\')"')
conn.sudo('R -e "install.packages(\'ggplot2\', repos = \'https://cloud.r-project.org\')"')
conn.sudo('R -e "install.packages(c(\'devtools\',\'mplot\', \'googleVis\'), '
'repos = \'https://cloud.r-project.org\'); require(devtools); install_github(\'ramnathv/rCharts\')"')
conn.sudo('R -e \'install.packages("versions", repos="https://cloud.r-project.org", dep=TRUE)\'')
conn.sudo('touch /home/' + os_user + '/.ensure_dir/r_packages_ensured')
def add_breeze_library_local(os_user):
if not exists(conn,'/home/' + os_user + '/.ensure_dir/breeze_local_ensured'):
try:
breeze_tmp_dir = '/tmp/breeze_tmp_local/'
jars_dir = '/opt/jars/'
conn.sudo('mkdir -p {}'.format(breeze_tmp_dir))
conn.sudo('wget https://repo1.maven.org/maven2/org/scalanlp/breeze_{0}/{1}/breeze_{0}-{1}.jar -O \
{2}breeze_{0}-{1}.jar'.format('2.11', '0.12', breeze_tmp_dir))
conn.sudo('wget https://repo1.maven.org/maven2/org/scalanlp/breeze-natives_{0}/{1}/breeze-natives_{0}-{1}.jar -O \
{2}breeze-natives_{0}-{1}.jar'.format('2.11', '0.12', breeze_tmp_dir))
conn.sudo('wget https://repo1.maven.org/maven2/org/scalanlp/breeze-viz_{0}/{1}/breeze-viz_{0}-{1}.jar -O \
{2}breeze-viz_{0}-{1}.jar'.format('2.11', '0.12', breeze_tmp_dir))
conn.sudo('wget https://repo1.maven.org/maven2/org/scalanlp/breeze-macros_{0}/{1}/breeze-macros_{0}-{1}.jar -O \
{2}breeze-macros_{0}-{1}.jar'.format('2.11', '0.12', breeze_tmp_dir))
conn.sudo('wget https://repo1.maven.org/maven2/org/scalanlp/breeze-parent_{0}/{1}/breeze-parent_{0}-{1}.jar -O \
{2}breeze-parent_{0}-{1}.jar'.format('2.11', '0.12', breeze_tmp_dir))
conn.sudo('wget https://repo1.maven.org/maven2/org/jfree/jfreechart/{0}/jfreechart-{0}.jar -O \
{1}jfreechart-{0}.jar'.format('1.0.19', breeze_tmp_dir))
conn.sudo('wget https://repo1.maven.org/maven2/org/jfree/jcommon/{0}/jcommon-{0}.jar -O \
{1}jcommon-{0}.jar'.format('1.0.24', breeze_tmp_dir))
conn.sudo('wget --no-check-certificate https://brunelvis.org/jar/spark-kernel-brunel-all-{0}.jar -O \
{1}spark-kernel-brunel-all-{0}.jar'.format('2.3', breeze_tmp_dir))
conn.sudo('mv {0}* {1}'.format(breeze_tmp_dir, jars_dir))
conn.sudo('touch /home/' + os_user + '/.ensure_dir/breeze_local_ensured')
except:
sys.exit(1)
def configure_data_engine_service_pip(hostname, os_user, keyfile, emr=False):
init_datalab_connection(hostname, os_user, keyfile)
#datalab.common_lib.manage_pkg('-y install', 'remote', 'python3-pip')
if not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.9 -V 2>/dev/null | awk '{print $2}'").stdout:
conn.sudo('ln -s /usr/bin/pip-3.9 /usr/bin/pip3')
elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.8 -V 2>/dev/null | awk '{print $2}'").stdout:
conn.sudo('ln -s /usr/bin/pip-3.8 /usr/bin/pip3')
elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.7 -V 2>/dev/null | awk '{print $2}'").stdout:
conn.sudo('ln -s /usr/bin/pip-3.7 /usr/bin/pip3')
elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.6 -V 2>/dev/null | awk '{print $2}'").stdout:
conn.sudo('ln -s /usr/bin/pip-3.6 /usr/bin/pip3')
elif not exists(conn,'/usr/bin/pip3') and conn.sudo("python3.5 -V 2>/dev/null | awk '{print $2}'").stdout:
conn.sudo('ln -s /usr/bin/pip-3.5 /usr/bin/pip3')
if emr:
conn.sudo('pip3 install -U pip=={}'.format(os.environ['conf_pip_version']))
conn.sudo('ln -s /usr/local/bin/pip3.7 /bin/pip3.7')
conn.sudo('''bash -c -l 'echo "export PATH=$PATH:/usr/local/bin" >> /etc/profile' ''')
conn.sudo('bash -c -l "source /etc/profile"')
conn.run('bash -c -l "source /etc/profile"')
conn.close()
def configure_data_engine_service_livy(hostname, os_user, keyfile):
init_datalab_connection(hostname, os_user, keyfile)
if exists(conn,'/usr/local/lib/livy'):
conn.sudo('rm -r /usr/local/lib/livy')
conn.sudo('wget -P /tmp/ --user={} --password={} '
'{}/repository/packages/livy.tar.gz --no-check-certificate'
.format(os.environ['conf_repository_user'],
os.environ['conf_repository_pass'], os.environ['conf_repository_address']))
conn.sudo('tar -xzvf /tmp/livy.tar.gz -C /usr/local/lib/')
conn.sudo('ln -s /usr/local/lib/incubator-livy /usr/local/lib/livy')
conn.put('/root/templates/dataengine-service_livy-env.sh', '/usr/local/lib/livy/conf/livy-env.sh')
conn.put('/root/templates/dataengine-service_livy.service', '/tmp/livy.service')
conn.sudo("sed -i 's|OS_USER|{}|' /tmp/livy.service".format(os_user))
conn.sudo('mv /tmp/livy.service /etc/systemd/system/livy.service')
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl enable livy.service')
conn.sudo('systemctl start livy.service')
conn.close()
def remove_rstudio_dataengines_kernel(cluster_name, os_user):
try:
cluster_re = ['-{}"'.format(cluster_name),
'-{}-'.format(cluster_name),
'-{}/'.format(cluster_name)]
conn.get('/home/{}/.Rprofile'.format(os_user), 'Rprofile')
data = open('Rprofile').read()
conf = filter(None, data.split('\n'))
# Filter config from any math of cluster_name in line,
# separated by defined symbols to avoid partly matches
conf = [i for i in conf if not any(x in i for x in cluster_re)]
comment_all = lambda x: x if x.startswith('#master') else '#{}'.format(x)
uncomment = lambda x: x[1:] if not x.startswith('#master') else x
conf =[comment_all(i) for i in conf]
conf =[uncomment(i) for i in conf]
last_spark = max([conf.index(i) for i in conf if 'master=' in i] or [0])
active_cluster = conf[last_spark].split('"')[-2] if last_spark != 0 else None
conf = conf[:last_spark] + [conf[l][1:] for l in range(last_spark, len(conf)) if conf[l].startswith("#")] \
+ [conf[l] for l in range(last_spark, len(conf)) if not conf[l].startswith('#')]
with open('.Rprofile', 'w') as f:
for line in conf:
f.write('{}\n'.format(line))
conn.put('.Rprofile', '/home/{}/.Rprofile'.format(os_user))
conn.get('/home/{}/.Renviron'.format(os_user), 'Renviron')
data = open('Renviron').read()
conf = filter(None, data.split('\n'))
comment_all = lambda x: x if x.startswith('#') else '#{}'.format(x)
conf = [comment_all(i) for i in conf]
# Filter config from any math of cluster_name in line,
# separated by defined symbols to avoid partly matches
conf = [i for i in conf if not any(x in i for x in cluster_re)]
if active_cluster:
activate_cluster = lambda x: x[1:] if active_cluster in x else x
conf = [activate_cluster(i) for i in conf]
else:
last_spark = max([conf.index(i) for i in conf if 'SPARK_HOME' in i])
conf = conf[:last_spark] + [conf[l][1:] for l in range(last_spark, len(conf)) if conf[l].startswith("#")]
with open('.Renviron', 'w') as f:
for line in conf:
f.write('{}\n'.format(line))
conn.put('.Renviron', '/home/{}/.Renviron'.format(os_user))
if len(conf) == 1:
conn.sudo('rm -f /home/{}/.ensure_dir/rstudio_dataengine_ensured'.format(os_user))
conn.sudo('rm -f /home/{}/.ensure_dir/rstudio_dataengine-service_ensured'.format(os_user))
conn.sudo('''R -e "source('/home/{}/.Rprofile')"'''.format(os_user))
except:
sys.exit(1)
def restart_zeppelin(creds=False, os_user='', hostname='', keyfile=''):
if creds:
init_datalab_connection(hostname, os_user, keyfile)
conn.sudo("systemctl daemon-reload")
conn.sudo("systemctl restart zeppelin-notebook")
if creds:
conn.close()
def get_spark_memory(creds=False, os_user='', hostname='', keyfile=''):
if creds:
con = init_datalab_connection(hostname, os_user, keyfile)
mem = con.sudo('free -m | grep Mem | tr -s " " ":" | cut -f 2 -d ":"').stdout.replace('\n', '')
instance_memory = int(mem)
else:
mem = conn.sudo('free -m | grep Mem | tr -s " " ":" | cut -f 2 -d ":"').stdout.replace('\n','')
instance_memory = int(mem)
spark_memory = round(instance_memory*90/100)
return spark_memory
def replace_multi_symbols(string, symbol, symbol_cut=False):
try:
symbol_amount = 0
for i in range(len(string)):
if string[i] == symbol:
symbol_amount = symbol_amount + 1
while symbol_amount > 1:
string = string.replace(symbol + symbol, symbol)
symbol_amount = symbol_amount - 1
if symbol_cut and string[-1] == symbol:
string = string[:-1]
return string
except Exception as err:
logging.info("Error with replacing multi symbols: " + str(err) + "\n Traceback: " + traceback.print_exc(
file=sys.stdout))
append_result(str({"error": "Error with replacing multi symbols",
"error_message": str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)}))
traceback.print_exc(file=sys.stdout)
def update_pyopenssl_lib(os_user):
if not exists(conn,'/home/{}/.ensure_dir/pyopenssl_updated'.format(os_user)):
try:
if exists(conn, '/usr/bin/pip3'):
conn.sudo('pip3 install -U pyopenssl')
conn.sudo('touch /home/{}/.ensure_dir/pyopenssl_updated'.format(os_user))
except:
sys.exit(1)
def find_cluster_kernels():
try:
de = [i for i in conn.sudo('''bash -l -c 'find /opt/ -maxdepth 1 -name "*-de-*" -type d | rev | cut -f 1 -d "/" | rev | xargs -r' ''').stdout.replace('\n', '').split(' ') if i != '']
des = [i for i in conn.sudo('''bash -l -c 'find /opt/ -maxdepth 2 -name "*-des-*" -type d | rev | cut -f 1,2 -d "/" | rev | xargs -r' ''').stdout.replace('\n', '').split(' ') if i != '']
return (de, des)
except Exception as err:
print('Failed to find cluster kernels.', str(err))
sys.exit(1)
def update_zeppelin_interpreters(multiple_clusters, r_enabled, interpreter_mode='remote'):
try:
interpreters_config = '/opt/zeppelin/conf/interpreter.json'
local_interpreters_config = '/tmp/interpreter.json'
if interpreter_mode != 'remote':
conn.get(local_interpreters_config, local_interpreters_config)
if multiple_clusters == 'true':
groups = [{"class": "org.apache.zeppelin.livy.LivySparkInterpreter", "name": "spark"},
{"class": "org.apache.zeppelin.livy.LivyPySparkInterpreter", "name": "pyspark"},
{"class": "org.apache.zeppelin.livy.LivyPySpark3Interpreter", "name": "pyspark3"},
{"class": "org.apache.zeppelin.livy.LivySparkSQLInterpreter", "name": "sql"}]
if r_enabled:
groups.append({"class": "org.apache.zeppelin.livy.LivySparkRInterpreter", "name": "sparkr"})
else:
groups = [{"class": "org.apache.zeppelin.spark.SparkInterpreter","name": "spark"},
{"class": "org.apache.zeppelin.spark.PySparkInterpreter", "name": "pyspark"},
{"class": "org.apache.zeppelin.spark.SparkSqlInterpreter", "name": "sql"}]
if r_enabled:
groups.append({"class": "org.apache.zeppelin.spark.SparkRInterpreter", "name": "r"})
r_conf = {"zeppelin.R.knitr": "true", "zeppelin.R.image.width": "100%", "zeppelin.R.cmd": "R",
"zeppelin.R.render.options": "out.format = 'html', comment = NA, echo = FALSE, results = 'asis', message = F, warning = F"}
if interpreter_mode != 'remote':
data = json.loads(open(local_interpreters_config).read())
else:
data = json.loads(open(interpreters_config).read())
for i in data['interpreterSettings'].keys():
if data['interpreterSettings'][i]['group'] == 'md':
continue
elif data['interpreterSettings'][i]['group'] == 'sh':
continue
if r_enabled == 'true':
data['interpreterSettings'][i]['properties'].update(r_conf)
data['interpreterSettings'][i]['interpreterGroup'] = groups
if interpreter_mode != 'remote':
with open(local_interpreters_config, 'w') as f:
f.write(json.dumps(data, indent=2))
conn.put(local_interpreters_config, local_interpreters_config)
conn.sudo('cp -f {0} {1}'.format(local_interpreters_config, interpreters_config))
conn.sudo('systemctl restart zeppelin-notebook')
else:
with open(interpreters_config, 'w') as f:
f.write(json.dumps(data, indent=2))
subprocess.run('sudo systemctl restart zeppelin-notebook', shell=True, check=True)
except Exception as err:
print('Failed to update Zeppelin interpreters', str(err))
sys.exit(1)
def update_hosts_file(os_user):
try:
if not exists(conn,'/home/{}/.ensure_dir/hosts_file_updated'.format(os_user)):
conn.sudo('sed -i "s/^127.0.0.1 localhost/127.0.0.1 localhost localhost.localdomain/g" /etc/hosts')
conn.sudo('touch /home/{}/.ensure_dir/hosts_file_updated'.format(os_user))
except Exception as err:
traceback.print_exc()
print('Failed to update hosts file', str(err))
sys.exit(1)
def ensure_docker_compose(os_user):
try:
configure_docker(os_user)
if not exists(conn,'/home/{}/.ensure_dir/docker_compose_ensured'.format(os_user)):
docker_compose_version = "1.24.1"
conn.sudo('curl -L https://github.com/docker/compose/releases/download/{}/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose'.format(docker_compose_version))
conn.sudo('chmod +x /usr/local/bin/docker-compose')
conn.sudo('touch /home/{}/.ensure_dir/docker_compose_ensured'.format(os_user))
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl restart docker')
return True
except:
return False
def configure_superset(os_user, keycloak_auth_server_url, keycloak_realm_name, keycloak_client_id, keycloak_client_secret, edge_instance_private_ip, edge_instance_public_ip, superset_name):
print('Superset configuring')
try:
if not exists(conn,'/home/{}/incubator-superset'.format(os_user)):
conn.sudo('''bash -c 'cd /home/{} && wget https://github.com/apache/incubator-superset/archive/{}.tar.gz' '''.format(
os_user, os.environ['notebook_superset_version']))
conn.sudo('''bash -c 'cd /home/{} && tar -xzf {}.tar.gz' '''.format(os_user, os.environ['notebook_superset_version']))
conn.sudo('''bash -c 'cd /home/{} && ln -sf incubator-superset-{} incubator-superset' '''.format(os_user, os.environ['notebook_superset_version']))
if not exists(conn,'/tmp/superset-notebook_installed'):
conn.sudo('mkdir -p /opt/datalab/templates')
conn.local('cd /root/templates; tar -zcvf /tmp/templates.tar.gz *')
conn.put('/tmp/templates.tar.gz', '/tmp/templates.tar.gz')
conn.sudo('tar -zxvf /tmp/templates.tar.gz -C /opt/datalab/templates')
conn.sudo('sed -i \'s/OS_USER/{}/g\' /opt/datalab/templates/.env'.format(os_user))
proxy_string = '{}:3128'.format(edge_instance_private_ip)
conn.sudo('sed -i \'s|KEYCLOAK_AUTH_SERVER_URL|{}|g\' /opt/datalab/templates/id_provider.json'.format(
keycloak_auth_server_url))
conn.sudo('sed -i \'s/KEYCLOAK_REALM_NAME/{}/g\' /opt/datalab/templates/id_provider.json'.format(
keycloak_realm_name))
conn.sudo('sed -i \'s/CLIENT_ID/{}/g\' /opt/datalab/templates/id_provider.json'.format(keycloak_client_id))
conn.sudo('sed -i \'s/CLIENT_SECRET/{}/g\' /opt/datalab/templates/id_provider.json'.format(
keycloak_client_secret))
conn.sudo('sed -i \'s/PROXY_STRING/{}/g\' /opt/datalab/templates/docker-compose.yml'.format(proxy_string))
conn.sudo('sed -i \'s|KEYCLOAK_AUTH_SERVER_URL|{}|g\' /opt/datalab/templates/superset_config.py'.format(
keycloak_auth_server_url))
conn.sudo('sed -i \'s/KEYCLOAK_REALM_NAME/{}/g\' /opt/datalab/templates/superset_config.py'.format(
keycloak_realm_name))
conn.sudo('sed -i \'s/EDGE_IP/{}/g\' /opt/datalab/templates/superset_config.py'.format(edge_instance_public_ip))
conn.sudo('sed -i \'s/SUPERSET_NAME/{}/g\' /opt/datalab/templates/superset_config.py'.format(superset_name))
conn.sudo('cp -f /opt/datalab/templates/.env /home/{}/incubator-superset/contrib/docker/'.format(os_user))
conn.sudo('cp -f /opt/datalab/templates/docker-compose.yml /home/{}/incubator-superset/contrib/docker/'.format(
os_user))
conn.sudo('cp -f /opt/datalab/templates/id_provider.json /home/{}/incubator-superset/contrib/docker/'.format(
os_user))
conn.sudo(
'cp -f /opt/datalab/templates/requirements-extra.txt /home/{}/incubator-superset/contrib/docker/'.format(
os_user))
conn.sudo('cp -f /opt/datalab/templates/superset_config.py /home/{}/incubator-superset/contrib/docker/'.format(
os_user))
conn.sudo('cp -f /opt/datalab/templates/docker-init.sh /home/{}/incubator-superset/contrib/docker/'.format(
os_user))
conn.sudo('touch /tmp/superset-notebook_installed')
except Exception as err:
print("Failed configure superset: " + str(err))
sys.exit(1)
def manage_npm_pkg(command):
try:
npm_count = 0
installed = False
npm_registry = ['https://registry.npmjs.org/', 'https://registry.npmjs.com/']
while not installed:
if npm_count > 60:
print("NPM registry is not available, please try later")
sys.exit(1)
else:
try:
if npm_count % 2 == 0:
conn.sudo('npm config set registry {}'.format(npm_registry[0]))
else:
conn.sudo('npm config set registry {}'.format(npm_registry[1]))
conn.sudo('{}'.format(command))
installed = True
except:
npm_count += 1
time.sleep(50)
except:
sys.exit(1)
def init_datalab_connection(hostname, username, keyfile):
try:
global conn
attempt = 0
while attempt < 15:
print('connection attempt {}'.format(attempt))
conn = Connection(host = hostname, user = username, connect_kwargs={'key_filename': keyfile})
conn.config.run.echo = True
try:
conn.run('ls')
conn.config.run.echo = True
return conn
except:
attempt += 1
time.sleep(10)
except:
traceback.print_exc()
sys.exit(1)