blob: 7218cb9800b0b0646e17509000d92e7aee1b7c36 [file] [log] [blame]
#!/usr/bin/python3
# *****************************************************************************
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# ******************************************************************************
import argparse
import os
import sys
from datalab.actions_lib import *
from datalab.fab import *
from datalab.notebook_lib import *
parser = argparse.ArgumentParser()
parser.add_argument('--hostname', type=str, default='')
parser.add_argument('--keyfile', type=str, default='')
parser.add_argument('--region', type=str, default='')
parser.add_argument('--spark_version', type=str, default='')
parser.add_argument('--hadoop_version', type=str, default='')
parser.add_argument('--os_user', type=str, default='')
parser.add_argument('--scala_version', type=str, default='')
parser.add_argument('--r_mirror', type=str, default='')
parser.add_argument('--master_ip', type=str, default='')
parser.add_argument('--node_type', type=str, default='')
args = parser.parse_args()
spark_version = args.spark_version
hadoop_version = args.hadoop_version
tensorflow_version = os.environ['notebook_tensorflow_version']
nvidia_version = os.environ['notebook_nvidia_version']
theano_version = os.environ['notebook_theano_version']
keras_version = os.environ['notebook_keras_version']
caffe_version = os.environ['notebook_caffe_version']
caffe2_version = os.environ['notebook_caffe2_version']
cmake_version = os.environ['notebook_cmake_version']
cntk_version = os.environ['notebook_cntk_version']
mxnet_version = os.environ['notebook_mxnet_version']
python3_version = "3.4"
python_venv_version = os.environ['notebook_python_venv_version']
python_venv_path = '/opt/python/python{0}/bin/python{1}'.format(python_venv_version, python_venv_version[:3])
scala_link = "https://www.scala-lang.org/files/archive/"
if args.region == 'cn-north-1':
spark_link = "http://mirrors.hust.edu.cn/apache/spark/spark-" + spark_version + "/spark-" + spark_version + \
"-bin-hadoop" + hadoop_version + ".tgz"
else:
spark_link = "https://archive.apache.org/dist/spark/spark-" + spark_version + "/spark-" + spark_version + \
"-bin-hadoop" + hadoop_version + ".tgz"
cuda_version = os.environ['notebook_cuda_version']
cuda_file_name = os.environ['notebook_cuda_file_name']
cudnn_version = os.environ['notebook_cudnn_version']
cudnn_file_name = os.environ['notebook_cudnn_file_name']
templates_dir = '/root/templates/'
files_dir = '/root/files/'
local_spark_path = '/opt/spark/'
jars_dir = '/opt/jars/'
r_libs = ['R6', 'pbdZMQ={}'.format(os.environ['notebook_pbdzmq_version']), 'RCurl', 'reshape2', 'caTools={}'.format(os.environ['notebook_catools_version']), 'rJava', 'ggplot2']
if os.environ['application'] == 'deeplearning':
tensorflow_version = '1.4.0'
cuda_version = '8.0'
cuda_file_name = 'cuda_8.0.44_linux-run'
cudnn_version = '6.0'
cudnn_file_name = 'cudnn-8.0-linux-x64-v6.0.tgz'
def start_spark(os_user, master_ip, node):
if not exists(conn,'/home/{0}/.ensure_dir/start_spark-{1}_ensured'.format(os_user, node)):
if not exists(conn,'/opt/spark/conf/spark-env.sh'):
conn.sudo('mv /opt/spark/conf/spark-env.sh.template /opt/spark/conf/spark-env.sh')
conn.sudo('''echo "SPARK_MASTER_HOST='{}'" >> /opt/spark/conf/spark-env.sh'''.format(master_ip))
if os.environ['application'] in ('tensor', 'tensor-rstudio'):
conn.sudo('''echo "LD_LIBRARY_PATH=/opt/cudnn/lib64:/usr/local/cuda/lib64" >> /opt/spark/conf/spark-env.sh''')
if os.environ['application'] == 'deeplearning':
conn.sudo('''echo "LD_LIBRARY_PATH=/opt/cudnn/lib64:/usr/local/cuda/lib64:/usr/lib64/openmpi/lib" >> /opt/spark/conf/spark-env.sh''')
if node == 'master':
conn.sudo("sed -i '/start-slaves.sh/d' /opt/spark/sbin/start-all.sh")
conn.sudo('''echo '"${}/sbin"/start-slave.sh spark://{}:7077' >> /opt/spark/sbin/start-all.sh'''.format('{SPARK_HOME}', master_ip))
conn.put('/root/templates/spark-master.service', '/tmp/spark-master.service')
conn.sudo('mv /tmp/spark-master.service /etc/systemd/system/spark-master.service')
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl enable spark-master.service')
conn.sudo('systemctl start spark-master.service')
if node == 'slave':
with open('/root/templates/spark-slave.service', 'r') as f:
text = f.read()
text = text.replace('MASTER', 'spark://{}:7077'.format(master_ip))
with open('/root/templates/spark-slave.service', 'w') as f:
f.write(text)
conn.put('/root/templates/spark-slave.service', '/tmp/spark-slave.service')
conn.sudo('mv /tmp/spark-slave.service /etc/systemd/system/spark-slave.service')
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl enable spark-slave.service')
conn.sudo('systemctl start spark-slave.service')
conn.sudo('touch /home/{0}/.ensure_dir/start_spark-{1}_ensured'.format(os_user, node))
##############
# Run script #
##############
if __name__ == "__main__":
print("Configure connections")
global conn
conn = datalab.fab.init_datalab_connection(args.hostname, args.os_user, args.keyfile)
# PREPARE DISK
print("Prepare .ensure directory")
try:
if not exists(conn,'/home/' + args.os_user + '/.ensure_dir'):
conn.sudo('mkdir /home/' + args.os_user + '/.ensure_dir')
except:
sys.exit(1)
# INSTALL LANGUAGES
print("Install Java")
ensure_jre_jdk(args.os_user)
if os.environ['application'] in ('jupyter', 'zeppelin'):
print("Install Scala")
ensure_scala(scala_link, args.scala_version, args.os_user)
if (os.environ['application'] in ('jupyter', 'zeppelin')
and os.environ['notebook_r_enabled'] == 'true') \
or os.environ['application'] in ('rstudio', 'tensor-rstudio'):
print("Installing R")
ensure_r(args.os_user, r_libs, args.region, args.r_mirror)
print("Install Python 3 modules")
ensure_python3_libraries(args.os_user)
if os.environ['application'] == 'zeppelin':
print("Install python3 specific version")
ensure_python3_specific_version(python3_version, args.os_user)
# INSTALL PYTHON IN VIRTUALENV
print("Configure Python Virtualenv")
ensure_python_venv(python_venv_version)
# INSTALL SPARK AND CLOUD STORAGE JARS FOR SPARK
print("Install Spark")
ensure_local_spark(args.os_user, spark_link, spark_version, hadoop_version, local_spark_path)
print("Install storage jars")
ensure_local_jars(args.os_user, jars_dir)
print("Configure local Spark")
configure_local_spark(jars_dir, templates_dir, '')
# INSTALL TENSORFLOW AND OTHER DEEP LEARNING LIBRARIES
if os.environ['application'] in ('tensor', 'tensor-rstudio', 'deeplearning'):
print("Installing TensorFlow")
install_tensor(args.os_user, cuda_version, cuda_file_name,
cudnn_version, cudnn_file_name, tensorflow_version,
templates_dir, nvidia_version)
print("Install Theano")
install_theano(args.os_user, theano_version)
print("Installing Keras")
install_keras(args.os_user, keras_version)
# INSTALL DEEP LEARNING FRAMEWORKS
if os.environ['application'] == 'deeplearning':
print("Installing Caffe2")
install_caffe2(args.os_user, caffe2_version, cmake_version)
#print("Installing Torch")
#install_torch(args.os_user)
print("Install CNTK Python library")
install_cntk(args.os_user, cntk_version)
print("Installing MXNET")
install_mxnet(args.os_user, mxnet_version)
# START SPARK CLUSTER
if args.node_type == 'master':
print("Starting Spark master")
start_spark(args.os_user, args.hostname, node='master')
elif args.node_type == 'slave':
print("Starting Spark slave")
start_spark(args.os_user, args.master_ip, node='slave')
# INSTALL OPTIONAL PACKAGES
if os.environ['application'] in ('jupyter', 'zeppelin', 'tensor', 'deeplearning'):
print("Install additional Python packages")
ensure_additional_python_libs(args.os_user)
print("Install matplotlib")
ensure_matplot(args.os_user)
if os.environ['application'] == 'jupyter':
print("Install SBT")
ensure_sbt(args.os_user)
print("Install Breeze")
add_breeze_library_local(args.os_user)
if os.environ['application'] == 'zeppelin' and os.environ['notebook_r_enabled'] == 'true':
print("Install additional R packages")
install_r_packages(args.os_user)
# INSTALL LIVY
if not exists(conn, '/home/{0}/.ensure_dir/livy_ensured'.format(args.os_user)):
conn.sudo('wget -P /tmp/ --user={} --password={} '
'{}/repository/packages/livy.tar.gz --no-check-certificate'
.format(os.environ['conf_repository_user'],
os.environ['conf_repository_pass'], os.environ['conf_repository_address']))
conn.sudo('tar -xzvf /tmp/livy.tar.gz -C /tmp/')
conn.sudo('mv /tmp/incubator-livy /opt/livy')
conn.sudo('mkdir /var/log/livy')
conn.put('/root/templates/livy-env.sh', '/tmp/livy-env.sh')
conn.sudo("sed -i 's|=python3|={}|' /tmp/livy-env.sh".format(python_venv_path))
conn.sudo('mv /tmp/livy-env.sh /opt/livy/conf/livy-env.sh')
conn.sudo('chown -R -L {0}:{0} /opt/livy/'.format(args.os_user))
conn.sudo('chown -R {0}:{0} /var/log/livy'.format(args.os_user))
conn.put('/root/templates/livy.service', '/tmp/livy.service')
conn.sudo("sed -i 's|OS_USER|{}|' /tmp/livy.service".format(args.os_user))
conn.sudo('mv /tmp/livy.service /etc/systemd/system/livy.service')
conn.sudo('systemctl daemon-reload')
conn.sudo('systemctl enable livy.service')
conn.sudo('systemctl start livy.service')
conn.sudo('touch /home/{0}/.ensure_dir/livy_ensured'.format(args.os_user))
conn.close()