blob: add852d2989dc0f87cb3e0f04b32ffc80998c2b3 [file]
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This script tests the Hadoop cloud scripts by running through a minimal
# sequence of steps to start a persistent (EBS) cluster, run a job, then
# shutdown the cluster.
#
# Example usage:
# HADOOP_HOME=~/dev/hadoop-0.20.1/ ./persistent-cluster.sh
#
function wait_for_volume_detachment() {
set +e
set +x
while true; do
attached=`$HADOOP_CLOUD_SCRIPT list-storage --config-dir=$CONFIG_DIR \
$CLUSTER | awk '{print $6}' | grep 'attached'`
sleep 5
if [ -z "$attached" ]; then
break
fi
done
set -e
set -x
}
set -e
set -x
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
WORKSPACE=${WORKSPACE:-`pwd`}
CONFIG_DIR=${CONFIG_DIR:-$WORKSPACE/.hadoop-cloud}
CLUSTER=${CLUSTER:-hadoop-cloud-ebs-$USER-test-cluster}
IMAGE_ID=${IMAGE_ID:-ami-6159bf08} # default to Fedora 32-bit AMI
AVAILABILITY_ZONE=${AVAILABILITY_ZONE:-us-east-1c}
KEY_NAME=${KEY_NAME:-$USER}
AUTO_SHUTDOWN=${AUTO_SHUTDOWN:-15}
LOCAL_HADOOP_VERSION=${LOCAL_HADOOP_VERSION:-0.20.1}
HADOOP_HOME=${HADOOP_HOME:-$WORKSPACE/hadoop-$LOCAL_HADOOP_VERSION}
HADOOP_CLOUD_HOME=${HADOOP_CLOUD_HOME:-$bin/../py}
HADOOP_CLOUD_PROVIDER=${HADOOP_CLOUD_PROVIDER:-ec2}
SSH_OPTIONS=${SSH_OPTIONS:-"-i ~/.$HADOOP_CLOUD_PROVIDER/id_rsa-$KEY_NAME \
-o StrictHostKeyChecking=no"}
HADOOP_CLOUD_SCRIPT=$HADOOP_CLOUD_HOME/hadoop-$HADOOP_CLOUD_PROVIDER
export HADOOP_CONF_DIR=$CONFIG_DIR/$CLUSTER
# Install Hadoop locally
if [ ! -d $HADOOP_HOME ]; then
wget http://archive.apache.org/dist/hadoop/core/hadoop-\
$LOCAL_HADOOP_VERSION/hadoop-$LOCAL_HADOOP_VERSION.tar.gz
tar zxf hadoop-$LOCAL_HADOOP_VERSION.tar.gz -C $WORKSPACE
rm hadoop-$LOCAL_HADOOP_VERSION.tar.gz
fi
# Create storage
$HADOOP_CLOUD_SCRIPT create-storage --config-dir=$CONFIG_DIR \
--availability-zone=$AVAILABILITY_ZONE $CLUSTER nn 1 \
$bin/ebs-storage-spec.json
$HADOOP_CLOUD_SCRIPT create-storage --config-dir=$CONFIG_DIR \
--availability-zone=$AVAILABILITY_ZONE $CLUSTER dn 1 \
$bin/ebs-storage-spec.json
# Launch a cluster
$HADOOP_CLOUD_SCRIPT launch-cluster --config-dir=$CONFIG_DIR \
--image-id=$IMAGE_ID --key-name=$KEY_NAME --auto-shutdown=$AUTO_SHUTDOWN \
--availability-zone=$AVAILABILITY_ZONE $CLIENT_CIDRS $ENVS $CLUSTER 1
# Run a proxy and save its pid in HADOOP_CLOUD_PROXY_PID
eval `$HADOOP_CLOUD_SCRIPT proxy --config-dir=$CONFIG_DIR \
--ssh-options="$SSH_OPTIONS" $CLUSTER`
# Run a job and check it works
$HADOOP_HOME/bin/hadoop fs -mkdir input
$HADOOP_HOME/bin/hadoop fs -put $HADOOP_HOME/LICENSE.txt input
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-*-examples.jar grep \
input output Apache
# following returns a non-zero exit code if no match
$HADOOP_HOME/bin/hadoop fs -cat 'output/part-00000' | grep Apache
# Shutdown the cluster
kill $HADOOP_CLOUD_PROXY_PID
$HADOOP_CLOUD_SCRIPT terminate-cluster --config-dir=$CONFIG_DIR --force $CLUSTER
sleep 5 # wait for termination to take effect
# Relaunch the cluster
$HADOOP_CLOUD_SCRIPT launch-cluster --config-dir=$CONFIG_DIR \
--image-id=$IMAGE_ID --key-name=$KEY_NAME --auto-shutdown=$AUTO_SHUTDOWN \
--availability-zone=$AVAILABILITY_ZONE $CLIENT_CIDRS $ENVS $CLUSTER 1
# Run a proxy and save its pid in HADOOP_CLOUD_PROXY_PID
eval `$HADOOP_CLOUD_SCRIPT proxy --config-dir=$CONFIG_DIR \
--ssh-options="$SSH_OPTIONS" $CLUSTER`
# Check output is still there
$HADOOP_HOME/bin/hadoop fs -cat 'output/part-00000' | grep Apache
# Shutdown the cluster
kill $HADOOP_CLOUD_PROXY_PID
$HADOOP_CLOUD_SCRIPT terminate-cluster --config-dir=$CONFIG_DIR --force $CLUSTER
sleep 5 # wait for termination to take effect
# Cleanup
$HADOOP_CLOUD_SCRIPT delete-cluster --config-dir=$CONFIG_DIR $CLUSTER
wait_for_volume_detachment
$HADOOP_CLOUD_SCRIPT delete-storage --config-dir=$CONFIG_DIR --force $CLUSTER