blob: b6b736091d4d5be431c8da29cdb98360a1df2d29 [file] [log] [blame]
#!/bin/bash
# Make sure we are in the mesos-ec2 directory
cd /root/mesos-ec2
# Set hostname based on EC2 private DNS name, so that it is set correctly
# even if the instance is restarted with a different private DNS name
PRIVATE_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/local-hostname`
hostname $PRIVATE_DNS
echo $PRIVATE_DNS > /etc/hostname
export HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too
echo "Setting up Mesos master on `hostname`..."
# Read command-line arguments
OS_NAME=$1
DOWNLOAD_METHOD=$2
BRANCH=$3
SWAP_MB=$4
MASTERS_FILE="masters"
MASTERS=`cat $MASTERS_FILE`
NUM_MASTERS=`cat $MASTERS_FILE | wc -l`
OTHER_MASTERS=`cat $MASTERS_FILE | sed '1d'`
SLAVES=`cat slaves`
ZOOS=`cat zoo`
if [[ $ZOOS = *NONE* ]]; then
NUM_ZOOS=0
ZOOS=""
else
NUM_ZOOS=`cat zoo | wc -l`
fi
# Scripts that get used for/while running Mesos.
SCRIPTS="copy-dir
create-swap
mesos-daemon
redeploy-mesos
setup-slave
ssh-no-keychecking
start-hypertable
start-mesos
stop-hypertable
stop-mesos"
EPHEMERAL_HDFS=/root/ephemeral-hdfs
PERSISTENT_HDFS=/root/persistent-hdfs
#TODO(*): update config scripts to have conditionals for handling different
# platforms
JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk.x86_64
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"
if [[ `tty` == "not a tty" ]] ; then
echo "Expecting a tty or pty! (use the ssh -t option)."
exit 1
fi
echo "Setting executable permissions on scripts..."
for s in $SCRIPTS; do chmod u+x $s; done
echo "Running setup-slave on master to mount filesystems, etc..."
./setup-slave $SWAP_MB
echo "SSH'ing to master machine(s) to approve key(s)..."
for master in $MASTERS; do
echo $master
ssh $SSH_OPTS $master echo -n &
sleep 0.3
done
ssh $SSH_OPTS localhost echo -n &
ssh $SSH_OPTS `hostname` echo -n &
wait
if [[ $NUM_ZOOS != 0 ]] ; then
echo "SSH'ing to ZooKeeper server(s) to approve keys..."
zid=1
for zoo in $ZOO; do
echo $zoo
ssh $SSH_OPTS $zoo echo -n \; mkdir -p /tmp/zookeeper \; echo $zid \> /tmp/zookeeper/myid &
zid=$(($zid+1))
sleep 0.3
done
fi
# Try to SSH to each cluster node to approve their key. Since some nodes may
# be slow in starting, we retry failed slaves up to 3 times.
TODO="$SLAVES $ZOO $OTHER_MASTERS" # List of nodes to try (initially all)
TRIES="0" # Number of times we've tried so far
echo "SSH'ing to other cluster nodes to approve keys..."
while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do
NEW_TODO=
for slave in $TODO; do
echo $slave
ssh $SSH_OPTS $slave echo -n
if [ $? != 0 ] ; then
NEW_TODO="$NEW_TODO $slave"
fi
done
TRIES=$[$TRIES + 1]
if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then
sleep 15
TODO="$NEW_TODO"
echo "Re-attempting SSH to cluster nodes to approve keys..."
else
break;
fi
done
echo "RSYNC'ing /root/mesos-ec2 to other cluster nodes..."
for node in $SLAVES $ZOO $OTHER_MASTERS; do
echo $node
rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $node:/root &
scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh &
sleep 0.3
done
wait
echo "Running slave setup script on other cluster nodes..."
for node in $SLAVES $ZOO $OTHER_MASTERS; do
echo $node
ssh -t $SSH_OPTS root@$node "mesos-ec2/setup-slave $SWAP_MB" & sleep 0.3
done
wait
echo "RSYNC'ing HDFS config files to other cluster nodes..."
for node in $SLAVES $ZOO $OTHER_MASTERS; do
echo $node
rsync -e "ssh $SSH_OPTS" -az $EPHEMERAL_HDFS/conf $node:$EPHEMERAL_HDFS &
rsync -e "ssh $SSH_OPTS" -az $PERSISTENT_HDFS/conf $node:$PERSISTENT_HDFS &
sleep 0.3
done
wait
DOWNLOADED=0
if [[ "$DOWNLOAD_METHOD" == "git" ]] ; then
# change git's ssh command so it does not ask to accept a keys
export GIT_SSH=/root/mesos-ec2/ssh-no-keychecking
REPOSITORY=git://github.com/mesos/mesos.git
echo "Checking out Mesos from $REPOSITORY"
pushd /root > /dev/null 2>&1
rm -rf mesos mesos.tgz
# Set git SSH command to a script that uses -o StrictHostKeyChecking=no
git clone $REPOSITORY mesos
pushd mesos 2>&1
git checkout -b $BRANCH --track origin/$BRANCH
popd > /dev/null 2>&1
popd > /dev/null 2>&1
DOWNLOADED=1
fi
# Build Mesos if we downloaded it
if [[ "$DOWNLOADED" == "1" ]] ; then
echo "Building Mesos..."
pushd /root/mesos > /dev/null 2>&1
./configure.ubuntu-lucid-64
make clean
make
popd > /dev/null 2>&1
if [ -d /root/spark ] ; then
echo "Building Spark..."
pushd /root/spark > /dev/null 2>&1
MESOS_HOME=/root/mesos make all native
popd > /dev/null 2>&1
fi
echo "Building Hadoop framework..."
pushd /root/mesos/frameworks/hadoop-0.20.2 > /dev/null 2>&1
ant
ant examples
popd > /dev/null 2>&1
fi
echo "Setting up Hadoop framework config files..."
cp hadoop-framework-conf/* /root/mesos/frameworks/hadoop-0.20.2/conf
echo "Setting up haproxy+apache framework config files..."
cp haproxy+apache/* /root/mesos/frameworks/haproxy+apache
echo "Setting up Spark config files..."
# TODO: This currently overwrites whatever the user wrote there; on
# the other hand, we also don't want to leave an old file created by
# us because it would have the wrong hostname for HDFS etc
mkdir -p /root/spark/conf
echo "-Dspark.dfs=hdfs://$HOSTNAME:9000" \
> /root/spark/conf/java-opts
chmod u+x /root/spark/conf/spark-env.sh
echo "Deploying Spark config files..."
/root/mesos-ec2/copy-dir /root/spark/conf
echo "Redeploying /root/mesos..."
./redeploy-mesos
echo "Setting up NFS..."
if [ ! -e /nfs ] ; then
mkdir -p /mnt/nfs
rm -fr /nfs
ln -s /mnt/nfs /nfs
fi
if ! grep -e '^/nfs ' /etc/exports; then
echo "/nfs 10.0.0.0/8(ro,async,no_subtree_check)" >> /etc/exports
fi
/sbin/service portmap start
/sbin/service nfs start
# Unexport and re-export everything in /etc/exports because, if we are
# restarting a stopped EC2 instance, we might have had an entry for /nfs in
# /etc/exports before we created /mnt/nfs.
exportfs -ua
exportfs -a
echo "Mounting NFS on slaves..."
for slave in $SLAVES; do
echo $slave
ssh -t $SSH_OPTS root@$slave "mkdir -p /nfs; service portmap start; service nfs start; mount $HOSTNAME:/nfs /nfs" & sleep 0.3
done
wait
echo "Formatting ephemeral HDFS namenode..."
$EPHEMERAL_HDFS/bin/hadoop namenode -format
echo "Starting ephemeral HDFS..."
$EPHEMERAL_HDFS/bin/start-dfs.sh
if [[ ! -e /vol/persistent-hdfs/dfs/name ]] ; then
echo "Formatting persistent HDFS namenode..."
$PERSISTENT_HDFS/bin/hadoop namenode -format
fi
echo "Starting persistent HDFS..."
$PERSISTENT_HDFS/bin/start-dfs.sh
sleep 1
if [[ $NUM_ZOOS != 0 ]]; then
echo "Starting ZooKeeper quorum..."
for zoo in $ZOOS; do
ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null" & sleep 0.1
done
wait
sleep 5
fi
echo "Stopping any existing Mesos cluster..."
./stop-mesos
sleep 2
echo "Starting Mesos cluster..."
./start-mesos