| #!/bin/bash |
| |
| # Make sure we are in the mesos-ec2 directory |
| cd /root/mesos-ec2 |
| |
| # Set hostname based on EC2 private DNS name, so that it is set correctly |
| # even if the instance is restarted with a different private DNS name |
| PRIVATE_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/local-hostname` |
| hostname $PRIVATE_DNS |
| echo $PRIVATE_DNS > /etc/hostname |
| export HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too |
| |
| echo "Setting up Mesos master on `hostname`..." |
| |
| # Read command-line arguments |
| OS_NAME=$1 |
| DOWNLOAD_METHOD=$2 |
| BRANCH=$3 |
| |
| MASTERS_FILE="masters" |
| MASTERS=`cat $MASTERS_FILE` |
| NUM_MASTERS=`cat $MASTERS_FILE | wc -l` |
| OTHER_MASTERS=`cat $MASTERS_FILE | sed '1d'` |
| SLAVES=`cat slaves` |
| ZOOS=`cat zoo` |
| |
| if [[ $ZOOS = *NONE* ]]; then |
| NUM_ZOOS=0 |
| ZOOS="" |
| else |
| NUM_ZOOS=`cat zoo | wc -l` |
| fi |
| |
| # Scripts that get used for/while running Mesos. |
| SCRIPTS="copy-dir |
| mesos-daemon |
| redeploy-mesos |
| setup-slave |
| ssh-no-keychecking |
| start-mesos |
| stop-mesos" |
| |
| HADOOP_HOME=/root/hadoop-0.20.2 |
| |
| #TODO(*): update config scripts to have conditionals for handling different |
| # platforms |
| JAVA_HOME=/usr/lib/jvm/java-6-openjdk |
| #JAVA_HOME=/usr/lib/jvm/java-6-sun #works for karmic, this is lucid |
| |
| SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" |
| |
| if [[ `tty` == "not a tty" ]] ; then |
| echo "Expecting a tty or pty! (use the ssh -t option)." |
| exit 1 |
| fi |
| |
| echo "Setting executable permissions on scripts..." |
| for s in $SCRIPTS; do chmod u+x $s; done |
| |
| echo "Running setup-slave on master to mount filesystems, etc..." |
| ./setup-slave |
| |
| echo "SSH'ing to master machine(s) to approve key(s)..." |
| for master in $MASTERS; do |
| echo $master |
| ssh $SSH_OPTS $master echo -n & |
| sleep 0.3 |
| done |
| ssh $SSH_OPTS localhost echo -n & |
| ssh $SSH_OPTS `hostname` echo -n & |
| wait |
| |
| if [[ $NUM_ZOOS != 0 ]] ; then |
| echo "SSH'ing to ZooKeeper server(s) to approve keys..." |
| zid=1 |
| for zoo in $ZOO; do |
| echo $zoo |
| ssh $SSH_OPTS $zoo echo -n \; mkdir -p /tmp/zookeeper \; echo $zid \> /tmp/zookeeper/myid & |
| zid=$(($zid+1)) |
| sleep 0.3 |
| done |
| fi |
| |
| # Try to SSH to each cluster node to approve their key. Since some nodes may |
| # be slow in starting, we retry failed slaves up to 3 times. |
| TODO="$SLAVES $ZOO $OTHER_MASTERS" # List of nodes to try (initially all) |
| TRIES="0" # Number of times we've tried so far |
| echo "SSH'ing to other cluster nodes to approve keys..." |
| while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do |
| NEW_TODO= |
| for slave in $TODO; do |
| echo $slave |
| ssh $SSH_OPTS $slave echo -n |
| if [ $? != 0 ] ; then |
| NEW_TODO="$NEW_TODO $slave" |
| fi |
| done |
| TRIES=$[$TRIES + 1] |
| if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then |
| sleep 15 |
| TODO="$NEW_TODO" |
| echo "Re-attempting SSH to cluster nodes to approve keys..." |
| else |
| break; |
| fi |
| done |
| |
| echo "RSYNC'ing /root/mesos-ec2 to other cluster nodes..." |
| for node in $SLAVES $ZOO $OTHER_MASTERS; do |
| echo $node |
| rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $node:/root & |
| scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh & |
| sleep 0.3 |
| done |
| wait |
| |
| echo "Running slave setup script on other cluster nodes..." |
| for node in $SLAVES $ZOO $OTHER_MASTERS; do |
| echo $node |
| ssh -t $SSH_OPTS root@$node "mesos-ec2/setup-slave" & sleep 0.3 |
| done |
| wait |
| |
| echo "RSYNC'ing HDFS config files to other cluster nodes..." |
| for node in $SLAVES $ZOO $OTHER_MASTERS; do |
| echo $node |
| rsync -e "ssh $SSH_OPTS" -az $HADOOP_HOME/conf $node:$HADOOP_HOME & |
| sleep 0.3 |
| done |
| wait |
| |
| DOWNLOADED=0 |
| |
| if [[ "$DOWNLOAD_METHOD" == "git" ]] ; then |
| # change git's ssh command so it does not ask to accept a keys |
| export GIT_SSH=/root/mesos-ec2/ssh-no-keychecking |
| REPOSITORY=git://github.com/mesos/mesos.git |
| echo "Checking out Mesos from $REPOSITORY" |
| pushd /root > /dev/null 2>&1 |
| rm -rf mesos mesos.tgz |
| # Set git SSH command to a script that uses -o StrictHostKeyChecking=no |
| git clone $REPOSITORY mesos |
| pushd mesos 2>&1 |
| git checkout -b $BRANCH --track origin/$BRANCH |
| popd > /dev/null 2>&1 |
| popd > /dev/null 2>&1 |
| DOWNLOADED=1 |
| fi |
| |
| # Build Mesos if we downloaded it |
| if [[ "$DOWNLOADED" == "1" ]] ; then |
| echo "Building Mesos..." |
| pushd /root/mesos > /dev/null 2>&1 |
| ./configure.ubuntu-lucid-64 |
| make clean |
| make |
| popd > /dev/null 2>&1 |
| if [ -d /root/spark ] ; then |
| echo "Building Spark..." |
| pushd /root/spark > /dev/null 2>&1 |
| MESOS_HOME=/root/mesos make all native |
| popd > /dev/null 2>&1 |
| fi |
| echo "Building Hadoop framework..." |
| pushd /root/mesos/frameworks/hadoop-0.20.2 > /dev/null 2>&1 |
| ant |
| ant examples |
| popd > /dev/null 2>&1 |
| fi |
| |
| echo "Setting up Hadoop framework config files..." |
| cp hadoop-framework-conf/* /root/mesos/frameworks/hadoop-0.20.2/conf |
| |
| echo "Setting up haproxy+apache framework config files..." |
| cp haproxy+apache/* /root/mesos/frameworks/haproxy+apache |
| |
| echo "Setting up Spark config files..." |
| # TODO: This currently overwrites whatever the user wrote there; on |
| # the other hand, we also don't want to leave an old file created by |
| # us because it would have the wrong hostname for HDFS etc |
| mkdir -p /root/spark/conf |
| echo "-Dspark.dfs=hdfs://$HOSTNAME:9000 -Dspark.repl.classdir=/nfs" \ |
| > /root/spark/conf/java-opts |
| |
| echo "Redeploying /root/mesos..." |
| ./redeploy-mesos |
| |
| echo "Setting up NFS..." |
| if [ ! -e /nfs ] ; then |
| mkdir -p /mnt/nfs |
| rm -fr /nfs |
| ln -s /mnt/nfs /nfs |
| fi |
| if ! grep -e '^/nfs ' /etc/exports; then |
| echo "/nfs 10.0.0.0/8(ro,async,no_subtree_check)" >> /etc/exports |
| fi |
| # Unexport and re-export everything in /etc/exports because, if we are |
| # restarting a stopped EC2 instance, we might have had an entry for /nfs in |
| # /etc/exports before we created /mnt/nfs. |
| exportfs -ua |
| exportfs -a |
| |
| echo "Mounting NFS on slaves..." |
| for slave in $SLAVES; do |
| echo $slave |
| ssh -t $SSH_OPTS root@$slave "mkdir -p /nfs; mount $HOSTNAME:/nfs /nfs" & sleep 0.3 |
| done |
| wait |
| |
| echo "Formatting HDFS namenode..." |
| $HADOOP_HOME/bin/hadoop namenode -format |
| |
| echo "Starting HDFS..." |
| $HADOOP_HOME/bin/start-dfs.sh |
| |
| sleep 1 |
| |
| if [[ $NUM_ZOOS != 0 ]]; then |
| echo "Starting ZooKeeper quorum..." |
| for zoo in $ZOOS; do |
| ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null" & sleep 0.1 |
| done |
| wait |
| sleep 5 |
| fi |
| |
| echo "Stopping any existing Mesos cluster..." |
| ./stop-mesos |
| sleep 2 |
| |
| echo "Starting Mesos cluster..." |
| ./start-mesos |