blob: 7ffe03291254463c61f9aa7dc7af75e7a44f05f2 [file] [log] [blame]
#!/usr/bin/env bash
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# exit in case of error or unbound var
set -euo pipefail
# get file directory to allow finding the file with the utils
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
source cluster.env
source "$SCRIPT_DIR/cluster_utils.sh"
if [ -n "$TARGET_SUBNET" ]; then
SUBNET=$TARGET_SUBNET
else
#Get the first available subnet in the default VPC of the configured region
SUBNET=$(aws ec2 describe-subnets --region $REGION \
--filter "Name=defaultForAz,Values=true" --query "Subnets[0].SubnetId" --output text)
fi
# generate the step definition into STEP variable
generate_step_definition
echo -e "\nLaunching EMR cluster via AWS CLI and adding a step to run $SYSTEMDS_PROGRAM with SystemDS"
CLUSTER_INFO=$(aws emr create-cluster \
--applications Name=AmazonCloudWatchAgent Name=Spark \
--ec2-attributes '{
"KeyName":"'${KEYPAIR_NAME}'",
"InstanceProfile":"EMR_EC2_DefaultRole",
'"$( [ -n "$SECURITY_GROUP_ID'" ] && echo '"AdditionalMasterSecurityGroups": ["'${SECURITY_GROUP_ID}'"],' )"'
"SubnetId": "'${SUBNET}'"
}'\
--service-role EMR_DefaultRole \
--enable-debugging \
--release-label $EMR_VERSION \
--log-uri $LOG_URI \
--name "SystemDS cluster" \
--instance-groups file://$INSTANCE_CONFIGS \
--configurations file://$SPARK_CONFIGS \
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
--no-termination-protected \
$( [ -n "$STEP" ] && echo "--steps $STEP" ) \
$( [ "$AUTO_TERMINATION_TIME" = 0 ] && echo "--auto-terminate" ) \
$( [ "$AUTO_TERMINATION_TIME" -gt 0 ] && echo "--auto-termination-policy IdleTimeout=$AUTO_TERMINATION_TIME" ) \
--region $REGION)
CLUSTER_ID=$(echo $CLUSTER_INFO | jq .ClusterId | tr -d '"')
echo "Cluster successfully initialized with cluster ID: "${CLUSTER_ID}
set_config "CLUSTER_ID" $CLUSTER_ID
# Wait for cluster to start
echo -e "\nWaiting for cluster to enter running state..."
aws emr wait cluster-running --cluster-id $CLUSTER_ID --region $REGION
CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID --region $REGION | jq .Cluster.MasterPublicDnsName | tr -d '"')
set_config "CLUSTER_URL" "$CLUSTER_URL"
echo "...launching process has finished and the cluster is not in state running."
if [ "$AUTO_TERMINATION_TIME" = 0 ]; then
echo -e "\nImmediate automatic termination was enabled so the cluster will terminate directly after the step completion"
elif [ "$AUTO_TERMINATION_TIME" -gt 0 ]; then
echo -e "\nDelayed automatic termination was enabled so the cluster will terminate $AUTO_TERMINATION_TIME
seconds after entering idle state"
else
echo -e "\nAutomatic termination was not enabled so you should manually terminate the cluster"
fi