blob: 47f7b78c47ad82a25aac09ffd74a60616f7915f7 [file] [log] [blame]
#!/usr/bin/python3
# *****************************************************************************
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# ******************************************************************************
import argparse
import ast
import os
import sys
import time
from datalab.fab import *
from datalab.notebook_lib import *
from fabric import *
parser = argparse.ArgumentParser()
parser.add_argument('--hostname', type=str, default='')
parser.add_argument('--keyfile', type=str, default='')
parser.add_argument('--os_user', type=str, default='')
parser.add_argument('--spark_conf', type=str, default='')
parser.add_argument('--cluster_name', type=str, default='')
args = parser.parse_args()
def update_spark_defaults_conf(spark_conf):
try:
timestamp = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.gmtime())
configs = conn.sudo('find /opt/ /etc/ /usr/lib/ -name spark-defaults.conf -type f').stdout.split('\n')
for conf in filter(None, configs):
conn.sudo('''sed -i '/^# Updated/d' {0}'''.format(conf))
conn.sudo('''echo "# Updated by DATALAB at {0} >> {1}'''.format(timestamp, conf))
except Exception as err:
print('Error: {0}'.format(err))
sys.exit(1)
def add_custom_spark_properties(cluster_name):
try:
if os.path.exists('/opt/{0}'.format(cluster_name)):
datalab_header = conn.sudo('cat /tmp/{0}/notebook_spark-defaults_local.conf | grep "^#"'.format(cluster_name)).stdout
spark_configurations = ast.literal_eval(os.environ['spark_configurations'])
new_spark_defaults = list()
spark_defaults = conn.sudo('cat /opt/{0}/spark/conf/spark-defaults.conf'.format(cluster_name)).stdout
current_spark_properties = spark_defaults.split('\n')
for param in current_spark_properties:
if param.split(' ')[0] != '#':
for config in spark_configurations:
if config['Classification'] == 'spark-defaults':
for property in config['Properties']:
if property == param.split(' ')[0]:
param = property + ' ' + config['Properties'][property]
else:
new_spark_defaults.append(property + ' ' + config['Properties'][property])
new_spark_defaults.append(param)
new_spark_defaults = set(new_spark_defaults)
conn.sudo("echo '{0}' > /opt/{1}/spark/conf/spark-defaults.conf".format(datalab_header, cluster_name))
for prop in new_spark_defaults:
prop = prop.rstrip()
conn.sudo('echo "{0}" >> /opt/{1}/spark/conf/spark-defaults.conf'.format(prop, cluster_name))
conn.sudo('sed -i "/^\s*$/d" /opt/{0}/spark/conf/spark-defaults.conf'.format(cluster_name))
except Exception as err:
print('Error: {0}'.format(err))
sys.exit(1)
if __name__ == "__main__":
print('Configure connections')
global conn
conn = datalab.fab.init_datalab_connection(args.hostname, args.os_user, args.keyfile)
if (args.spark_conf != ''):
update_spark_defaults_conf(args.spark_conf)
update_spark_jars()
if 'spark_configurations' in os.environ:
add_custom_spark_properties(args.cluster_name)
conn.close()