blob: bc452fc3b1cee21349f849f21832f90077a42668 [file] [log] [blame]
#!/usr/bin/python
# *****************************************************************************
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# ******************************************************************************
import boto3
from botocore.client import Config
from fabric.api import *
import argparse
import os
import sys
import time
from fabric.api import lcd
from fabric.contrib.files import exists
from fabvenv import virtualenv
from dlab.notebook_lib import *
from dlab.actions_lib import *
from dlab.fab import *
from dlab.common_lib import *
parser = argparse.ArgumentParser()
parser.add_argument('--bucket', type=str, default='')
parser.add_argument('--cluster_name', type=str, default='')
parser.add_argument('--dry_run', type=str, default='false')
parser.add_argument('--emr_version', type=str, default='')
parser.add_argument('--spark_version', type=str, default='')
parser.add_argument('--scala_version', type=str, default='')
parser.add_argument('--hadoop_version', type=str, default='')
parser.add_argument('--region', type=str, default='')
parser.add_argument('--excluded_lines', type=str, default='')
parser.add_argument('--project_name', type=str, default='')
parser.add_argument('--os_user', type=str, default='')
parser.add_argument('--edge_hostname', type=str, default='')
parser.add_argument('--proxy_port', type=str, default='')
parser.add_argument('--livy_version', type=str, default='')
parser.add_argument('--multiple_clusters', type=str, default='')
parser.add_argument('--pip_mirror', type=str, default='')
parser.add_argument('--numpy_version', type=str, default='')
parser.add_argument('--application', type=str, default='')
parser.add_argument('--r_enabled', type=str, default='')
args = parser.parse_args()
emr_dir = '/opt/' + args.emr_version + '/jars/'
kernels_dir = '/home/' + args.os_user + '/.local/share/jupyter/kernels/'
spark_dir = '/opt/' + args.emr_version + '/' + args.cluster_name + '/spark/'
yarn_dir = '/opt/' + args.emr_version + '/' + args.cluster_name + '/conf/'
if args.region == 'us-east-1':
endpoint_url = 'https://s3.amazonaws.com'
elif args.region == 'cn-north-1':
endpoint_url = "https://s3.{}.amazonaws.com.cn".format(args.region)
else:
endpoint_url = 'https://s3-{}.amazonaws.com'.format(args.region)
def install_remote_livy(args):
local('sudo chown ' + args.os_user + ':' + args.os_user + ' -R /opt/zeppelin/')
local('sudo service zeppelin-notebook stop')
local('sudo -i wget http://archive.cloudera.com/beta/livy/livy-server-' + args.livy_version + '.zip -O /opt/'
+ args.emr_version + '/' + args.cluster_name + '/livy-server-' + args.livy_version + '.zip')
local('sudo unzip /opt/'
+ args.emr_version + '/' + args.cluster_name + '/livy-server-' + args.livy_version + '.zip -d /opt/'
+ args.emr_version + '/' + args.cluster_name + '/')
local('sudo mv /opt/' + args.emr_version + '/' + args.cluster_name + '/livy-server-' + args.livy_version +
'/ /opt/' + args.emr_version + '/' + args.cluster_name + '/livy/')
livy_path = '/opt/' + args.emr_version + '/' + args.cluster_name + '/livy/'
local('sudo mkdir -p ' + livy_path + '/logs')
local('sudo mkdir -p /var/run/livy')
local('sudo chown ' + args.os_user + ':' + args.os_user + ' -R /var/run/livy')
local('sudo chown ' + args.os_user + ':' + args.os_user + ' -R ' + livy_path)
if __name__ == "__main__":
if args.dry_run == 'true':
parser.print_help()
else:
result = prepare(emr_dir, yarn_dir)
if result == False :
jars(args, emr_dir)
yarn(args, yarn_dir)
install_emr_spark(args)
spark_defaults(args)
configuring_notebook(args.emr_version)
if args.multiple_clusters == 'true':
install_remote_livy(args)
installing_python(args.region, args.bucket, args.project_name, args.cluster_name, args.application,
args.pip_mirror, args.numpy_version)
configure_zeppelin_emr_interpreter(args.emr_version, args.cluster_name, args.region, spark_dir, args.os_user,
yarn_dir, args.bucket, args.project_name, endpoint_url, args.multiple_clusters)
update_zeppelin_interpreters(args.multiple_clusters, args.r_enabled)