blob: 1ee132a0edba80ba79c0d8994652e9bdf06e8d46 [file] [log] [blame]
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
if [ "$HADOOP_HOME" != "" ]; then
echo "Warning: \$HADOOP_HOME is deprecated."
echo
fi
. "$bin"/../libexec/hadoop-config.sh
usage() {
echo "
usage: $0 <parameters>
Optional parameters:
--auto Setup path and configuration automatically
--default Setup configuration as default
--conf-dir=/etc/hadoop Set configuration directory
--datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory
-h Display this message
--jobtracker-url=hostname:9001 Set jobtracker url
--log-dir=/var/log/hadoop Set log directory
--pid-dir=/var/run/hadoop Set pid directory
--hdfs-dir=/var/lib/hadoop/hdfs Set hdfs directory
--mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory
--namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory
--namenode-url=hdfs://hostname:9000/ Set namenode url
--replication=3 Set replication factor
--taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler
"
exit 1
}
template_generator() {
REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
cat $1 |
while read line ; do
while [[ "$line" =~ $REGEX ]] ; do
LHS=${BASH_REMATCH[1]}
RHS="$(eval echo "\"$LHS\"")"
line=${line//$LHS/$RHS}
done
echo $line >> $2
done
}
OPTS=$(getopt \
-n $0 \
-o '' \
-l 'auto' \
-l 'conf-dir:' \
-l 'default' \
-l 'hdfs-dir:' \
-l 'namenode-dir:' \
-l 'datanode-dir:' \
-l 'mapred-dir:' \
-l 'namenode-url:' \
-l 'jobtracker-url:' \
-l 'log-dir:' \
-l 'pid-dir:' \
-l 'replication:' \
-l 'taskscheduler:' \
-o 'h' \
-- "$@")
if [ $? != 0 ] ; then
usage
fi
# Make sure the HADOOP_LOG_DIR is not picked up from user environment.
unset HADOOP_LOG_DIR
eval set -- "${OPTS}"
while true ; do
case "$1" in
--auto)
AUTOSETUP=1
AUTOMATED=1
shift
;;
--conf-dir)
HADOOP_CONF_DIR=$2; shift 2
AUTOMATED=1
;;
--default)
AUTOMATED=1; shift
;;
-h)
usage
;;
--hdfs-dir)
HADOOP_HDFS_DIR=$2; shift 2
AUTOMATED=1
;;
--namenode-dir)
HADOOP_NN_DIR=$2; shift 2
AUTOMATED=1
;;
--datanode-dir)
HADOOP_DN_DIR=$2; shift 2
AUTOMATED=1
;;
--mapred-dir)
HADOOP_MAPRED_DIR=$2; shift 2
AUTOMATED=1
;;
--namenode-url)
HADOOP_NN_HOST=$2; shift 2
AUTOMATED=1
;;
--jobtracker-url)
HADOOP_JT_HOST=$2; shift 2
AUTOMATED=1
;;
--log-dir)
HADOOP_LOG_DIR=$2; shift 2
AUTOMATED=1
;;
--pid-dir)
HADOOP_PID_DIR=$2; shift 2
AUTOMATED=1
;;
--replication)
HADOOP_REPLICATION=$2; shift 2
AUTOMATED=1
;;
--taskscheduler)
HADOOP_TASK_SCHEDULER=$2; shift 2
AUTOMATED=1
;;
--)
shift ; break
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
AUTOSETUP=${AUTOSETUP:-1}
JAVA_HOME=${JAVA_HOME:-/usr/java/default}
HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://`hostname`:9000/}
HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`:9001}
HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs}
HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop}
HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/log/hadoop}
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
HADOOP_REPLICATION=${HADOOP_RELICATION:-3}
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
if [ "${AUTOMATED}" != "1" ]; then
echo "Setup Hadoop Configuration"
echo
echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) "
read USER_HADOOP_CONF_DIR
echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) "
read USER_HADOOP_LOG_DIR
echo -n "Where would you like to put pid directory? (${HADOOP_PID_DIR}) "
read USER_HADOOP_PID_DIR
echo -n "What is the url of the namenode? (${HADOOP_NN_HOST}) "
read USER_HADOOP_NN_HOST
echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) "
read USER_HADOOP_NN_DIR
echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) "
read USER_HADOOP_DN_DIR
echo -n "What is the url of the jobtracker? (${HADOOP_JT_HOST}) "
read USER_HADOOP_JT_HOST
echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) "
read USER_HADOOP_MAPRED_DIR
echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) "
read USER_JAVA_HOME
echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) "
read USER_AUTOSETUP
echo
JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME}
HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST}
HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR}
HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR}
HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST}
HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR}
HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR}
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR}
HADOOP_PID_DIR=${USER_HADOOP_PID_DIR:-$HADOOP_PID_DIR}
HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR}
AUTOSETUP=${USER_AUTOSETUP:-y}
echo "Review your choices:"
echo
echo "Config directory : ${HADOOP_CONF_DIR}"
echo "Log directory : ${HADOOP_LOG_DIR}"
echo "PID directory : ${HADOOP_PID_DIR}"
echo "Namenode url : ${HADOOP_NN_HOST}"
echo "Namenode directory : ${HADOOP_NN_DIR}"
echo "Datanode directory : ${HADOOP_DN_DIR}"
echo "Jobtracker url : ${HADOOP_JT_HOST}"
echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}"
echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}"
echo "JAVA_HOME directory : ${JAVA_HOME}"
echo "Create dirs/copy conf files : ${AUTOSETUP}"
echo
echo -n "Proceed with generate configuration? (y/N) "
read CONFIRM
if [ "${CONFIRM}" != "y" ]; then
echo "User aborted setup, exiting..."
exit 1
fi
fi
rm -f core-site.xml >/dev/null
rm -f hdfs-site.xml >/dev/null
rm -f mapred-site.xml >/dev/null
rm -f hadoop-env.sh >/dev/null
template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/core-site.xml core-site.xml
template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hdfs-site.xml hdfs-site.xml
template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/mapred-site.xml mapred-site.xml
template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/hadoop-env.sh hadoop-env.sh
chown root:hadoop hadoop-env.sh
chmod 755 hadoop-env.sh
if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
mkdir -p ${HADOOP_HDFS_DIR}
mkdir -p ${HADOOP_NN_DIR}
mkdir -p ${HADOOP_DN_DIR}
mkdir -p ${HADOOP_MAPRED_DIR}
mkdir -p ${HADOOP_CONF_DIR}
mkdir -p ${HADOOP_LOG_DIR}
mkdir -p ${HADOOP_LOG_DIR}/hdfs
mkdir -p ${HADOOP_LOG_DIR}/mapred
mkdir -p ${HADOOP_PID_DIR}
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
chown hdfs:hadoop ${HADOOP_NN_DIR}
chown hdfs:hadoop ${HADOOP_DN_DIR}
chown mapred:hadoop ${HADOOP_MAPRED_DIR}
chown root:hadoop ${HADOOP_LOG_DIR}
chmod 775 ${HADOOP_LOG_DIR}
chmod 775 ${HADOOP_PID_DIR}
chown hdfs:hadoop ${HADOOP_LOG_DIR}/hdfs
chown mapred:hadoop ${HADOOP_LOG_DIR}/mapred
cp -f *.xml ${HADOOP_CONF_DIR}
cp -f hadoop-env.sh ${HADOOP_CONF_DIR}
echo "Configuration setup is completed."
if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then
echo "Proceed to run hadoop-setup-hdfs.sh on namenode."
fi
else
echo
echo "Configuration file has been generated, please copy:"
echo
echo "core-site.xml"
echo "hdfs-site.xml"
echo "mapred-site.xml"
echo "hadoop-env.sh"
echo
echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode."
fi