blob: 4c67c02a094aee82e69ed7f3b85c45111de09ec5 [file] [log] [blame]
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Create a Hadoop AMI. Runs on the EC2 instance.
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
# Remove environment script since it contains sensitive information
rm -f "$bin"/hadoop-ec2-env.sh
# Install Java
echo "Downloading and installing java binary."
cd /usr/local
wget -nv -O java.bin $JAVA_BINARY_URL
sh java.bin
rm -f java.bin
# Install tools
echo "Installing rpms."
yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php
yum -y clean all
# Install Hadoop
echo "Installing Hadoop $HADOOP_VERSION."
cd /usr/local
wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
[ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
tar xzf hadoop-$HADOOP_VERSION.tar.gz
rm -f hadoop-$HADOOP_VERSION.tar.gz
# Configure Hadoop
sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \
-e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \
-e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \
-e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \
/usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh
# Run user data as script on instance startup
chmod +x /etc/init.d/ec2-run-user-data
echo "/etc/init.d/ec2-run-user-data" >> /etc/rc.d/rc.local
# Setup root user bash environment
echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile
echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile
echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile
# Configure networking.
# Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.)
rm -f /root/.ssh/authorized_keys
# Ensure logging in to new hosts is seamless.
echo ' StrictHostKeyChecking no' >> /etc/ssh/ssh_config
# Bundle and upload image
cd ~root
# Don't need to delete .bash_history since it isn't written until exit.
df -h
ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH -r $ARCH
ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY
# End
echo Done