blob: a9a1b8eb0a1c2f3e024c2e1b0af16f7c4c55acc4 [file] [log] [blame]
#!/bin/bash
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
source systemds_cluster.config
# $1 is the instance type
get_memory_and_cores () {
while IFS="," read -r field1 name memory vCPUs field5 field6
do
if [ "$1" = "$name" ]; then
SPARK_EXECUTOR_MEMORY=$( bc <<< $memory*0.60*1000) #60% of the total memory (beacause of yarn.scheduler.maximum-allocation-mb)
SPARK_EXECUTOR_CORES=$vCPUs
fi
done < aws_ec2_table.csv
}
# $1 key, $2 value
function set_config(){
sed -i "" "s/\($1 *= *\).*/\1$2/" systemds_cluster.config
}
get_memory_and_cores $INSTANCES_TYPE
SPARK_EXECUTOR_MEMORY="${SPARK_EXECUTOR_MEMORY}MB"
set_config "SPARK_NUM_EXECUTORS" $CORE_INSTANCES_COUNT
set_config "SPARK_EXECUTOR_CORES" $SPARK_EXECUTOR_CORES
set_config "SPARK_EXECUTOR_MEMORY" $SPARK_EXECUTOR_MEMORY
set_config "SPARK_DRIVER_MEMORY" "1G"
#Create systemDS bucket
aws s3api create-bucket --bucket system-ds-bucket --region $REGION &> /dev/null
aws s3api create-bucket --bucket system-ds-logs-bucket --region $REGION &> /dev/null
# Upload Jar and scripts to s3
aws s3 sync $SYSTEMDS_TARGET_DIRECTORY s3://system-ds-bucket/ --exclude "*" --include "*.dml" --include "*config.xml" --include "*DS.jar*"
# Create keypair
if [ ! -f ${KEYPAIR_NAME}.pem ]; then
aws ec2 create-key-pair --key-name $KEYPAIR_NAME --query "KeyMaterial" --output text > "$KEYPAIR_NAME.pem"
chmod 700 "${KEYPAIR_NAME}.pem"
echo "${KEYPAIR_NAME}.pem private key created!"
fi
#Create the cluster
CLUSTER_INFO=$(aws emr create-cluster \
--applications Name=Ganglia Name=Spark \
--ec2-attributes '{"KeyName":"'${KEYPAIR_NAME}'",
"InstanceProfile":"EMR_EC2_DefaultRole"}'\
--service-role EMR_DefaultRole \
--enable-debugging \
--release-label $EMR_VERSION \
--log-uri "s3n://system-ds-logs/" \
--name "SystemDS cluster" \
--instance-groups '[{"InstanceCount":'${MASTER_INSTANCES_COUNT}',
"InstanceGroupType":"MASTER",
"InstanceType":"'${INSTANCES_TYPE}'",
"Name":"Master Instance Group"},
{"InstanceCount":'${CORE_INSTANCES_COUNT}',
"InstanceGroupType":"CORE",
"InstanceType":"'${INSTANCES_TYPE}'",
"Name":"Core Instance Group"}]'\
--configurations '[{"Classification":"spark","Properties":{"maximizeResourceAllocation": "true"}}]'\
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
--region $REGION)
CLUSTER_ID=$(echo $CLUSTER_INFO | jq .ClusterId | tr -d '"')
echo "Cluster successfully initialized. Save your ClusterID: "${CLUSTER_ID}
set_config "CLUSTER_ID" $CLUSTER_ID
ip_address=$(curl ipecho.net/plain ; echo)
#Add your ip to the security group
aws ec2 create-security-group --group-name ElasticMapReduce-master --description "info" &> /dev/null
aws ec2 authorize-security-group-ingress \
--group-name ElasticMapReduce-master \
--protocol tcp \
--port 22 \
--cidr "${ip_address}"/24 &> /dev/null
# Wait for cluster to start
echo "Waiting for cluster running state"
aws emr wait cluster-running --cluster-id $CLUSTER_ID
echo "Cluster info:"
export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID | jq .Cluster.MasterPublicDnsName | tr -d '"')
aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --command 'aws s3 cp s3://system-ds-bucket/target . --recursive --exclude "*" --include "*DS.jar*"'
echo "Spinup finished."