| #!/usr/bin/env bash |
| |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| bin=`dirname "$0"` |
| bin=`cd "$bin"; pwd` |
| |
| this="${BASH_SOURCE-$0}" |
| HADOOP_DEFAULT_PREFIX=`dirname "$this"`/.. |
| HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX} |
| export HADOOP_PREFIX |
| |
| usage() { |
| echo " |
| usage: $0 <parameters> |
| |
| Optional parameters: |
| --auto Setup path and configuration automatically |
| --default Setup configuration as default |
| --conf-dir=/etc/hadoop Set configuration directory |
| --datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory |
| --group=hadoop Set Hadoop group name |
| -h Display this message |
| --hdfs-user=hdfs Set HDFS user |
| --jobtracker-host=hostname Set jobtracker host |
| --namenode-host=hostname Set namenode host |
| --secondarynamenode-host=hostname Set secondary namenode host |
| --kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm |
| --kinit-location=/usr/kerberos/bin/kinit Set kinit location |
| --keytab-dir=/etc/security/keytabs Set keytab directory |
| --log-dir=/var/log/hadoop Set log directory |
| --pid-dir=/var/run/hadoop Set pid directory |
| --hdfs-dir=/var/lib/hadoop/hdfs Set HDFS directory |
| --hdfs-user-keytab=/home/hdfs/hdfs.keytab Set HDFS user key tab |
| --mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory |
| --mapreduce-user=mr Set mapreduce user |
| --mapreduce-user-keytab=/home/mr/hdfs.keytab Set mapreduce user key tab |
| --namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory |
| --replication=3 Set replication factor |
| --taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler |
| --datanodes=hostname1,hostname2,... SET the datanodes |
| --tasktrackers=hostname1,hostname2,... SET the tasktrackers |
| --dfs-webhdfs-enabled=false|true Enable webhdfs |
| --dfs-support-append=false|true Enable append |
| --hadoop-proxy-users='user1:groups:hosts;user2:groups:hosts' Setup proxy users for hadoop |
| --hbase-user=hbase User which hbase is running as. Defaults to hbase |
| --mapreduce-cluster-mapmemory-mb=memory Virtual memory of a map slot for the MR framework. Defaults to -1 |
| --mapreduce-cluster-reducememory-mb=memory Virtual memory, of a reduce slot for the MR framework. Defaults to -1 |
| --mapreduce-jobtracker-maxmapmemory-mb=memory Maximum virtual memory of a single map task. Defaults to -1 |
| This value should be set to (mapreduce.cluster.mapmemory.mb * mapreduce.tasktracker.map.tasks.maximum) |
| --mapreduce-jobtracker-maxreducememory-mb=memory Maximum virtual memory of a single reduce task. Defaults to -1 |
| This value should be set to (mapreduce.cluster.reducememory.mb * mapreduce.tasktracker.reduce.tasks.maximum) |
| --mapreduce-map-memory-mb=memory Virtual memory of a single map slot for a job. Defaults to -1 |
| This value should be <= mapred.cluster.max.map.memory.mb |
| --mapreduce-reduce-memory-mb=memory Virtual memory, of a single reduce slot for a job. Defaults to -1 |
| This value should be <= mapred.cluster.max.reduce.memory.mb |
| --dfs-datanode-dir-perm=700 Set the permission for the datanode data directories. Defaults to 700 |
| --dfs-block-local-path-access-user=user User for which you want to enable shortcircuit read. |
| --dfs-client-read-shortcircuit=true/false Enable shortcircuit read for the client. Will default to true if the shortcircuit user is set. |
| --dfs-client-read-shortcircuit-skip-checksum=false/true Disable checking of checksum when shortcircuit read is taking place. Defaults to false. |
| " |
| exit 1 |
| } |
| |
| check_permission() { |
| TARGET=$1 |
| OWNER="0" |
| RESULT=0 |
| while [ "$TARGET" != "/" ]; do |
| if [ "`uname`" = "Darwin" ]; then |
| OWNER=`stat -f %u $TARGET` |
| else |
| OWNER=`stat -c %u $TARGET` |
| fi |
| if [ "$OWNER" != "0" ]; then |
| RESULT=1 |
| break |
| fi |
| TARGET=`dirname $TARGET` |
| done |
| return $RESULT |
| } |
| |
| template_generator() { |
| REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})' |
| if [ -e $2 ]; then |
| mv -f $2 "$2.bak" |
| fi |
| cat $1 | |
| while read line ; do |
| while [[ "$line" =~ $REGEX ]] ; do |
| LHS=${BASH_REMATCH[1]} |
| RHS="$(eval echo "\"$LHS\"")" |
| line=${line//$LHS/$RHS} |
| done |
| echo $line >> $2 |
| done |
| } |
| |
| ######################################### |
| # Function to modify a value of a field in an xml file |
| # Params: $1 is the file with full path; $2 is the property, $3 is the new value |
| ######################################### |
| function addPropertyToXMLConf |
| { |
| #read the file name with full path |
| local file=$1 |
| #get the property name |
| local property=$2 |
| #get what value should be set for that |
| local propValue=$3 |
| #get the description |
| local desc=$4 |
| #get the value for the final tag |
| local finalVal=$5 |
| |
| #create the property text, make sure the / are escaped |
| propText="<property>\n<name>$property<\/name>\n<value>$propValue<\/value>\n" |
| #if description is not empty add it |
| if [ ! -z $desc ] |
| then |
| propText="${propText}<description>$desc<\/description>\n" |
| fi |
| |
| #if final is not empty add it |
| if [ ! -z $finalVal ] |
| then |
| propText="${propText}final>$finalVal<\/final>\n" |
| fi |
| |
| #add the ending tag |
| propText="${propText}<\/property>\n" |
| |
| #add the property to the file |
| endText="<\/configuration>" |
| #add the text using sed at the end of the file |
| sed -i "s|$endText|$propText$endText|" $file |
| } |
| |
| ########################################## |
| # Function to setup up the short circuit read settings |
| ######################################### |
| function setupShortCircuitRead |
| { |
| local conf_file="${HADOOP_CONF_DIR}/hdfs-site.xml" |
| #if the shortcircuit user is not set then return |
| if [ -z $DFS_BLOCK_LOCAL_PATH_ACCESS_USER ] |
| then |
| return |
| fi |
| |
| #set the defaults if values not present |
| DFS_CLIENT_READ_SHORTCIRCUIT=${DFS_CLIENT_READ_SHORTCIRCUIT:-false} |
| DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=${DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM:-false} |
| |
| #add the user to the conf file |
| addPropertyToXMLConf "$conf_file" "dfs.block.local-path-access.user" "$DFS_BLOCK_LOCAL_PATH_ACCESS_USER" |
| addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit" "$DFS_CLIENT_READ_SHORTCIRCUIT" |
| addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit.skip.checksum" "$DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM" |
| } |
| |
| ########################################## |
| # Function to setup up the proxy user settings |
| ######################################### |
| function setupProxyUsers |
| { |
| local conf_file="${HADOOP_CONF_DIR}/core-site.xml" |
| #if hadoop proxy users are sent, setup hadoop proxy |
| if [ ! -z $HADOOP_PROXY_USERS ] |
| then |
| oldIFS=$IFS |
| IFS=';' |
| #process each proxy config |
| for proxy in $HADOOP_PROXY_USERS |
| do |
| #get the user, group and hosts information for each proxy |
| IFS=':' |
| arr=($proxy) |
| user="${arr[0]}" |
| groups="${arr[1]}" |
| hosts="${arr[2]}" |
| #determine the property names and values |
| proxy_groups_property="hadoop.proxyuser.${user}.groups" |
| proxy_groups_val="$groups" |
| addPropertyToXMLConf "$conf_file" "$proxy_groups_property" "$proxy_groups_val" |
| proxy_hosts_property="hadoop.proxyuser.${user}.hosts" |
| proxy_hosts_val="$hosts" |
| addPropertyToXMLConf "$conf_file" "$proxy_hosts_property" "$proxy_hosts_val" |
| IFS=';' |
| done |
| IFS=$oldIFS |
| fi |
| } |
| |
| OPTS=$(getopt \ |
| -n $0 \ |
| -o '' \ |
| -l 'auto' \ |
| -l 'java-home:' \ |
| -l 'conf-dir:' \ |
| -l 'default' \ |
| -l 'group:' \ |
| -l 'hdfs-dir:' \ |
| -l 'namenode-dir:' \ |
| -l 'datanode-dir:' \ |
| -l 'mapred-dir:' \ |
| -l 'namenode-host:' \ |
| -l 'secondarynamenode-host:' \ |
| -l 'jobtracker-host:' \ |
| -l 'log-dir:' \ |
| -l 'pid-dir:' \ |
| -l 'replication:' \ |
| -l 'taskscheduler:' \ |
| -l 'hdfs-user:' \ |
| -l 'hdfs-user-keytab:' \ |
| -l 'mapreduce-user:' \ |
| -l 'mapreduce-user-keytab:' \ |
| -l 'keytab-dir:' \ |
| -l 'kerberos-realm:' \ |
| -l 'kinit-location:' \ |
| -l 'datanodes:' \ |
| -l 'tasktrackers:' \ |
| -l 'dfs-webhdfs-enabled:' \ |
| -l 'hadoop-proxy-users:' \ |
| -l 'dfs-support-append:' \ |
| -l 'hbase-user:' \ |
| -l 'mapreduce-cluster-mapmemory-mb:' \ |
| -l 'mapreduce-cluster-reducememory-mb:' \ |
| -l 'mapreduce-jobtracker-maxmapmemory-mb:' \ |
| -l 'mapreduce-jobtracker-maxreducememory-mb:' \ |
| -l 'mapreduce-map-memory-mb:' \ |
| -l 'mapreduce-reduce-memory-mb:' \ |
| -l 'dfs-datanode-dir-perm:' \ |
| -l 'dfs-block-local-path-access-user:' \ |
| -l 'dfs-client-read-shortcircuit:' \ |
| -l 'dfs-client-read-shortcircuit-skip-checksum:' \ |
| -o 'h' \ |
| -- "$@") |
| |
| if [ $? != 0 ] ; then |
| usage |
| fi |
| |
| # Make sure the HADOOP_LOG_DIR is not picked up from user environment. |
| unset HADOOP_LOG_DIR |
| |
| eval set -- "${OPTS}" |
| while true ; do |
| case "$1" in |
| --auto) |
| AUTOSETUP=1 |
| AUTOMATED=1 |
| shift |
| ;; |
| --java-home) |
| JAVA_HOME=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --conf-dir) |
| HADOOP_CONF_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --default) |
| AUTOMATED=1; shift |
| ;; |
| --group) |
| HADOOP_GROUP=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| -h) |
| usage |
| ;; |
| --hdfs-dir) |
| HADOOP_HDFS_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --namenode-dir) |
| HADOOP_NN_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --datanode-dir) |
| HADOOP_DN_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapred-dir) |
| HADOOP_MAPRED_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --namenode-host) |
| HADOOP_NN_HOST=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --secondarynamenode-host) |
| HADOOP_SNN_HOST=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --jobtracker-host) |
| HADOOP_JT_HOST=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --log-dir) |
| HADOOP_LOG_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --pid-dir) |
| HADOOP_PID_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --replication) |
| HADOOP_REPLICATION=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --taskscheduler) |
| HADOOP_TASK_SCHEDULER=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --hdfs-user) |
| HADOOP_HDFS_USER=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-user) |
| HADOOP_MR_USER=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --keytab-dir) |
| KEYTAB_DIR=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --hdfs-user-keytab) |
| HDFS_KEYTAB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-user-keytab) |
| MR_KEYTAB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --kerberos-realm) |
| KERBEROS_REALM=$2; shift 2 |
| SECURITY_TYPE="kerberos" |
| AUTOMATED=1 |
| ;; |
| --kinit-location) |
| KINIT=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --datanodes) |
| DATANODES=$2; shift 2 |
| AUTOMATED=1 |
| DATANODES=$(echo $DATANODES | tr ',' ' ') |
| ;; |
| --tasktrackers) |
| TASKTRACKERS=$2; shift 2 |
| AUTOMATED=1 |
| TASKTRACKERS=$(echo $TASKTRACKERS | tr ',' ' ') |
| ;; |
| --dfs-webhdfs-enabled) |
| DFS_WEBHDFS_ENABLED=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --hadoop-proxy-users) |
| HADOOP_PROXY_USERS=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --dfs-support-append) |
| DFS_SUPPORT_APPEND=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --hbase-user) |
| HBASE_USER=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-cluster-mapmemory-mb) |
| MAPREDUCE_CLUSTER_MAPMEMORY_MB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-cluster-reducememory-mb) |
| MAPREDUCE_CLUSTER_REDUCEMEMORY_MB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-jobtracker-maxmapmemory-mb) |
| MAPREDUCE_JOBTRACKER_MAXMAPMEMORY_MB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-jobtracker-maxreducememory-mb) |
| MAPREDUCE_JOBTRACKER_MAXREDUCEMEMORY_MB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-map-memory-mb) |
| MAPREDUCE_MAP_MEMORY_MB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --mapreduce-reduce-memory-mb) |
| MAPREDUCE_REDUCE_MEMORY_MB=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --dfs-datanode-dir-perm) |
| DFS_DATANODE_DIR_PERM=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --dfs-block-local-path-access-user) |
| DFS_BLOCK_LOCAL_PATH_ACCESS_USER=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --dfs-client-read-shortcircuit) |
| DFS_CLIENT_READ_SHORTCIRCUIT=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --dfs-client-read-shortcircuit-skip-checksum) |
| DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=$2; shift 2 |
| AUTOMATED=1 |
| ;; |
| --) |
| shift ; break |
| ;; |
| *) |
| echo "Unknown option: $1" |
| usage |
| exit 1 |
| ;; |
| esac |
| done |
| |
| AUTOSETUP=${AUTOSETUP:-1} |
| JAVA_HOME=${JAVA_HOME:-/usr/java/default} |
| HADOOP_GROUP=${HADOOP_GROUP:-hadoop} |
| HADOOP_NN_HOST=${HADOOP_NN_HOST:-`hostname`} |
| HADOOP_SNN_HOST=${HADOOP_SNN_HOST:-`hostname`} |
| HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode} |
| HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode} |
| HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`} |
| HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs} |
| HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred} |
| HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop} |
| HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/log/hadoop} |
| HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop} |
| HADOOP_REPLICATION=${HADOOP_RELICATION:-3} |
| HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler} |
| HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs} |
| HADOOP_MR_USER=${HADOOP_MR_USER:-mr} |
| DFS_WEBHDFS_ENABLED=${DFS_WEBHDFS_ENABLED:-false} |
| DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false} |
| HBASE_USER=${HBASE_USER:-hbase} |
| MAPREDUCE_CLUSTER_MAPMEMORY_MB=${MAPREDUCE_CLUSTER_MAPMEMORY_MB:--1} |
| MAPREDUCE_CLUSTER_REDUCEMEMORY_MB=${MAPREDUCE_CLUSTER_REDUCEMEMORY_MB:--1} |
| MAPREDUCE_JOBTRACKER_MAXMAPMEMORY_MB=${MAPREDUCE_JOBTRACKER_MAXMAPMEMORY_MB:--1} |
| MAPREDUCE_JOBTRACKER_MAXREDUCEMEMORY_MB=${MAPREDUCE_JOBTRACKER_MAXREDUCEMEMORY_MB:--1} |
| MAPREDUCE_MAP_MEMORY_MB=${MAPREDUCE_MAP_MEMORY_MB:--1} |
| MAPREDUCE_REDUCE_MEMORY_MB=${MAPREDUCE_REDUCE_MEMORY_MB:--1} |
| KEYTAB_DIR=${KEYTAB_DIR:-/etc/security/keytabs} |
| HDFS_KEYTAB=${HDFS_KEYTAB:-/home/hdfs/hdfs.keytab} |
| MR_KEYTAB=${MR_KEYTAB:-/home/mr/mr.keytab} |
| DFS_WEBHDFS_ENABLED=${DFS_WEBHDFS_ENABLED:-false} |
| DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false} |
| KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM} |
| SECURITY_TYPE=${SECURITY_TYPE:-simple} |
| KINIT=${KINIT:-/usr/kerberos/bin/kinit} |
| #deault the data dir perm to 700 |
| DFS_DATANODE_DIR_PERM=${DFS_DATANODE_DIR_PERM:-700} |
| if [ "${SECURITY_TYPE}" = "kerberos" ]; then |
| TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController" |
| HADOOP_DN_ADDR="0.0.0.0:1019" |
| HADOOP_DN_HTTP_ADDR="0.0.0.0:1022" |
| SECURITY="true" |
| HADOOP_SECURE_DN_USER=${HADOOP_HDFS_USER} |
| else |
| TASK_CONTROLLER="org.apache.hadoop.mapred.DefaultTaskController" |
| HADOOP_DN_ADDR="0.0.0.0:50010" |
| HADOOP_DN_HTTP_ADDR="0.0.0.0:50075" |
| SECURITY="false" |
| HADOOP_SECURE_DN_USER="" |
| fi |
| |
| #unset env vars |
| unset HADOOP_CLIENT_OPTS HADOOP_NAMENODE_OPTS HADOOP_JOBTRACKER_OPTS HADOOP_TASKTRACKER_OPTS HADOOP_DATANODE_OPTS HADOOP_SECONDARYNAMENODE_OPTS HADOOP_JAVA_PLATFORM_OPTS |
| |
| if [ "${AUTOMATED}" != "1" ]; then |
| echo "Setup Hadoop Configuration" |
| echo |
| echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) " |
| read USER_HADOOP_CONF_DIR |
| echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) " |
| read USER_HADOOP_LOG_DIR |
| echo -n "Where would you like to put pid directory? (${HADOOP_PID_DIR}) " |
| read USER_HADOOP_PID_DIR |
| echo -n "What is the host of the namenode? (${HADOOP_NN_HOST}) " |
| read USER_HADOOP_NN_HOST |
| echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) " |
| read USER_HADOOP_NN_DIR |
| echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) " |
| read USER_HADOOP_DN_DIR |
| echo -n "What is the host of the jobtracker? (${HADOOP_JT_HOST}) " |
| read USER_HADOOP_JT_HOST |
| echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) " |
| read USER_HADOOP_MAPRED_DIR |
| echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) " |
| read USER_JAVA_HOME |
| echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) " |
| read USER_AUTOSETUP |
| echo |
| JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME} |
| HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST} |
| HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR} |
| HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR} |
| HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST} |
| HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR} |
| HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR} |
| HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler} |
| HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR} |
| HADOOP_PID_DIR=${USER_HADOOP_PID_DIR:-$HADOOP_PID_DIR} |
| HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR} |
| AUTOSETUP=${USER_AUTOSETUP:-y} |
| echo "Review your choices:" |
| echo |
| echo "Config directory : ${HADOOP_CONF_DIR}" |
| echo "Log directory : ${HADOOP_LOG_DIR}" |
| echo "PID directory : ${HADOOP_PID_DIR}" |
| echo "Namenode host : ${HADOOP_NN_HOST}" |
| echo "Namenode directory : ${HADOOP_NN_DIR}" |
| echo "Datanode directory : ${HADOOP_DN_DIR}" |
| echo "Jobtracker host : ${HADOOP_JT_HOST}" |
| echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}" |
| echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}" |
| echo "JAVA_HOME directory : ${JAVA_HOME}" |
| echo "Create dirs/copy conf files : ${AUTOSETUP}" |
| echo |
| echo -n "Proceed with generate configuration? (y/N) " |
| read CONFIRM |
| if [ "${CONFIRM}" != "y" ]; then |
| echo "User aborted setup, exiting..." |
| exit 1 |
| fi |
| fi |
| |
| if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then |
| if [ -d ${KEYTAB_DIR} ]; then |
| chmod 700 ${KEYTAB_DIR}/* |
| chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[jt]t.service.keytab |
| chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[dns]n.service.keytab |
| fi |
| chmod 755 -R ${HADOOP_PREFIX}/sbin/*hadoop* |
| chmod 755 -R ${HADOOP_PREFIX}/bin/hadoop |
| |
| HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-${HADOOP_PREFIX}/libexec} |
| chmod 755 -R ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh |
| |
| mkdir -p /home/${HADOOP_MR_USER} |
| chown ${HADOOP_MR_USER}:${HADOOP_GROUP} /home/${HADOOP_MR_USER} |
| HDFS_DIR=`echo ${HADOOP_HDFS_DIR} | sed -e 's/,/ /g'` |
| mkdir -p ${HDFS_DIR} |
| if [ -e ${HADOOP_NN_DIR} ]; then |
| rm -rf ${HADOOP_NN_DIR} |
| fi |
| DATANODE_DIR=`echo ${HADOOP_DN_DIR} | sed -e 's/,/ /g'` |
| mkdir -p ${DATANODE_DIR} |
| MAPRED_DIR=`echo ${HADOOP_MAPRED_DIR} | sed -e 's/,/ /g'` |
| mkdir -p ${MAPRED_DIR} |
| mkdir -p ${HADOOP_CONF_DIR} |
| check_permission ${HADOOP_CONF_DIR} |
| if [ $? == 1 ]; then |
| echo "Full path to ${HADOOP_CONF_DIR} should be owned by root." |
| exit 1 |
| fi |
| |
| mkdir -p ${HADOOP_LOG_DIR} |
| #create the log sub dir for diff users |
| mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} |
| mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_MR_USER} |
| |
| mkdir -p ${HADOOP_PID_DIR} |
| chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HDFS_DIR} |
| chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${DATANODE_DIR} |
| chmod 700 -R ${DATANODE_DIR} |
| chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${MAPRED_DIR} |
| chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR} |
| chmod 775 ${HADOOP_LOG_DIR} |
| chmod 775 ${HADOOP_PID_DIR} |
| chown root:${HADOOP_GROUP} ${HADOOP_PID_DIR} |
| |
| #change the permission and the owner |
| chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} |
| chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} |
| chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_MR_USER} |
| chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_MR_USER} |
| |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties |
| |
| #setup up the proxy users |
| setupProxyUsers |
| |
| #setup short circuit read |
| setupShortCircuitRead |
| |
| #set the owner of the hadoop dir to root |
| chown root ${HADOOP_PREFIX} |
| chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh |
| chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh |
| |
| #set taskcontroller |
| chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg |
| chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg |
| chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller |
| chmod 6050 ${HADOOP_PREFIX}/bin/task-controller |
| |
| |
| #generate the slaves file and include and exclude files for hdfs and mapred |
| echo '' > ${HADOOP_CONF_DIR}/slaves |
| echo '' > ${HADOOP_CONF_DIR}/dfs.include |
| echo '' > ${HADOOP_CONF_DIR}/dfs.exclude |
| echo '' > ${HADOOP_CONF_DIR}/mapred.include |
| echo '' > ${HADOOP_CONF_DIR}/mapred.exclude |
| for dn in $DATANODES |
| do |
| echo $dn >> ${HADOOP_CONF_DIR}/slaves |
| echo $dn >> ${HADOOP_CONF_DIR}/dfs.include |
| done |
| for tt in $TASKTRACKERS |
| do |
| echo $tt >> ${HADOOP_CONF_DIR}/mapred.include |
| done |
| |
| echo "Configuration setup is completed." |
| if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then |
| echo "Proceed to run hadoop-setup-hdfs.sh on namenode." |
| fi |
| else |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties |
| template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties |
| |
| #setup up the proxy users |
| setupProxyUsers |
| |
| #setup short circuit read |
| setupShortCircuitRead |
| |
| chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh |
| chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh |
| #set taskcontroller |
| chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg |
| chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg |
| chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller |
| chmod 6050 ${HADOOP_PREFIX}/bin/task-controller |
| |
| #generate the slaves file and include and exclude files for hdfs and mapred |
| echo '' > ${HADOOP_CONF_DIR}/slaves |
| echo '' > ${HADOOP_CONF_DIR}/dfs.include |
| echo '' > ${HADOOP_CONF_DIR}/dfs.exclude |
| echo '' > ${HADOOP_CONF_DIR}/mapred.include |
| echo '' > ${HADOOP_CONF_DIR}/mapred.exclude |
| for dn in $DATANODES |
| do |
| echo $dn >> ${HADOOP_CONF_DIR}/slaves |
| echo $dn >> ${HADOOP_CONF_DIR}/dfs.include |
| done |
| for tt in $TASKTRACKERS |
| do |
| echo $tt >> ${HADOOP_CONF_DIR}/mapred.include |
| done |
| |
| echo |
| echo "Configuration file has been generated in:" |
| echo |
| echo "${HADOOP_CONF_DIR}/core-site.xml" |
| echo "${HADOOP_CONF_DIR}/hdfs-site.xml" |
| echo "${HADOOP_CONF_DIR}/mapred-site.xml" |
| echo "${HADOOP_CONF_DIR}/hadoop-env.sh" |
| echo "${HADOOP_CONF_DIR}/hadoop-policy.xml" |
| echo "${HADOOP_CONF_DIR}/commons-logging.properties" |
| echo "${HADOOP_CONF_DIR}/taskcontroller.cfg" |
| echo "${HADOOP_CONF_DIR}/capacity-scheduler.xml" |
| echo "${HADOOP_CONF_DIR}/log4j.properties" |
| echo "${HADOOP_CONF_DIR}/hadoop-metrics2.properties" |
| echo |
| echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode." |
| fi |