| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| <!-- |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| --> |
| |
| <configuration> |
| <property> |
| <name>hdfs_log_dir_prefix</name> |
| <value>/var/log/hadoop</value> |
| <description>Hadoop Log Dir Prefix</description> |
| </property> |
| <property> |
| <name>hadoop_pid_dir_prefix</name> |
| <value>/var/run/hadoop</value> |
| <description>Hadoop PID Dir Prefix</description> |
| </property> |
| <property> |
| <name>hadoop_root_logger</name> |
| <value>INFO,RFA</value> |
| <description>Hadoop Root Logger</description> |
| </property> |
| <property> |
| <name>hadoop_heapsize</name> |
| <value>1024</value> |
| <description>Hadoop maximum Java heap size</description> |
| </property> |
| <property> |
| <name>namenode_heapsize</name> |
| <value>1024</value> |
| <description>NameNode Java heap size</description> |
| </property> |
| <property> |
| <name>namenode_opt_newsize</name> |
| <value>200</value> |
| <description>NameNode new generation size</description> |
| </property> |
| <property> |
| <name>namenode_opt_maxnewsize</name> |
| <value>200</value> |
| <description>NameNode maximum new generation size</description> |
| </property> |
| <property> |
| <name>namenode_opt_permsize</name> |
| <value>128</value> |
| <description>NameNode permanent generation size</description> |
| </property> |
| <property> |
| <name>namenode_opt_maxpermsize</name> |
| <value>256</value> |
| <description>NameNode maximum permanent generation size</description> |
| </property> |
| <property> |
| <name>dtnode_heapsize</name> |
| <value>1024</value> |
| <description>DataNode maximum Java heap size</description> |
| </property> |
| <property> |
| <name>proxyuser_group</name> |
| <value>users</value> |
| <property-type>GROUP</property-type> |
| <description>Proxy user group.</description> |
| </property> |
| <property> |
| <name>hdfs_user</name> |
| <value>hdfs</value> |
| <property-type>USER</property-type> |
| <description>User to run HDFS as</description> |
| </property> |
| |
| <!-- hadoop-env.sh --> |
| <property> |
| <name>content</name> |
| <description>This is the jinja template for hadoop-env.sh file</description> |
| <value> |
| # Set Hadoop-specific environment variables here. |
| |
| # The only required environment variable is JAVA_HOME. All others are |
| # optional. When running a distributed configuration it is best to |
| # set JAVA_HOME in this file, so that it is correctly defined on |
| # remote nodes. |
| |
| # The java implementation to use. Required. |
| export JAVA_HOME={{java_home}} |
| export HADOOP_HOME_WARN_SUPPRESS=1 |
| |
| # Hadoop home directory |
| export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop} |
| |
| # Hadoop Configuration Directory |
| #TODO: if env var set that can cause problems |
| export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-{{hadoop_conf_dir}}} |
| |
| {# this is different for HDP1 #} |
| # Path to jsvc required by secure HDP 2.0 datanode |
| export JSVC_HOME={{jsvc_path}} |
| |
| |
| # The maximum amount of heap to use, in MB. Default is 1000. |
| export HADOOP_HEAPSIZE="{{hadoop_heapsize}}" |
| |
| export HADOOP_NAMENODE_INIT_HEAPSIZE="-Xms{{namenode_heapsize}}" |
| |
| # Extra Java runtime options. Empty by default. |
| export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}" |
| |
| # Command specific options appended to HADOOP_OPTS when specified |
| export HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -XX:PermSize={{namenode_opt_permsize}} -XX:MaxPermSize={{namenode_opt_maxpermsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_NAMENODE_OPTS}" |
| HADOOP_JOBTRACKER_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{jtnode_opt_newsize}} -XX:MaxNewSize={{jtnode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xmx{{jtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dhadoop.mapreduce.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}" |
| |
| HADOOP_TASKTRACKER_OPTS="-server -Xmx{{ttnode_heapsize}} -Dhadoop.security.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}" |
| HADOOP_DATANODE_OPTS="-Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=ERROR,DRFAS ${HADOOP_DATANODE_OPTS}" |
| HADOOP_BALANCER_OPTS="-server -Xmx{{hadoop_heapsize}}m ${HADOOP_BALANCER_OPTS}" |
| |
| export HADOOP_SECONDARYNAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -XX:PermSize={{namenode_opt_permsize}} -XX:MaxPermSize={{namenode_opt_maxpermsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps ${HADOOP_NAMENODE_INIT_HEAPSIZE} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_SECONDARYNAMENODE_OPTS}" |
| |
| # The following applies to multiple commands (fs, dfs, fsck, distcp etc) |
| export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m $HADOOP_CLIENT_OPTS" |
| # On secure datanodes, user to run the datanode as after dropping privileges |
| export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER:-{{hadoop_secure_dn_user}}} |
| |
| # Extra ssh options. Empty by default. |
| export HADOOP_SSH_OPTS="-o ConnectTimeout=5 -o SendEnv=HADOOP_CONF_DIR" |
| |
| # Where log files are stored. $HADOOP_HOME/logs by default. |
| export HADOOP_LOG_DIR={{hdfs_log_dir_prefix}}/$USER |
| |
| # History server logs |
| export HADOOP_MAPRED_LOG_DIR={{mapred_log_dir_prefix}}/$USER |
| |
| # Where log files are stored in the secure data environment. |
| export HADOOP_SECURE_DN_LOG_DIR={{hdfs_log_dir_prefix}}/$HADOOP_SECURE_DN_USER |
| |
| # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. |
| # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves |
| |
| # host:path where hadoop code should be rsync'd from. Unset by default. |
| # export HADOOP_MASTER=master:/home/$USER/src/hadoop |
| |
| # Seconds to sleep between slave commands. Unset by default. This |
| # can be useful in large clusters, where, e.g., slave rsyncs can |
| # otherwise arrive faster than the master can service them. |
| # export HADOOP_SLAVE_SLEEP=0.1 |
| |
| # The directory where pid files are stored. /tmp by default. |
| export HADOOP_PID_DIR={{hadoop_pid_dir_prefix}}/$USER |
| export HADOOP_SECURE_DN_PID_DIR={{hadoop_pid_dir_prefix}}/$HADOOP_SECURE_DN_USER |
| |
| # History server pid |
| export HADOOP_MAPRED_PID_DIR={{mapred_pid_dir_prefix}}/$USER |
| |
| YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY" |
| |
| # A string representing this instance of hadoop. $USER by default. |
| export HADOOP_IDENT_STRING=$USER |
| |
| # The scheduling priority for daemon processes. See 'man nice'. |
| |
| # export HADOOP_NICENESS=10 |
| |
| # Use libraries from standard classpath |
| JAVA_JDBC_LIBS="" |
| #Add libraries required by mysql connector |
| for jarFile in `ls /usr/share/java/*mysql* 2>/dev/null` |
| do |
| JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile |
| done |
| #Add libraries required by oracle connector |
| for jarFile in `ls /usr/share/java/*ojdbc* 2>/dev/null` |
| do |
| JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile |
| done |
| #Add libraries required by nodemanager |
| MAPREDUCE_LIBS={{mapreduce_libs_path}} |
| export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}${JAVA_JDBC_LIBS}:${MAPREDUCE_LIBS} |
| |
| if [ -d "/usr/lib/tez" ]; then |
| export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/lib/tez/*:/usr/lib/tez/lib/*:/etc/tez/conf |
| fi |
| |
| # Setting path to hdfs command line |
| export HADOOP_LIBEXEC_DIR={{hadoop_libexec_dir}} |
| |
| #Mostly required for hadoop 2.0 |
| export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:/usr/lib/hadoop/lib/native/Linux-amd64-64 |
| </value> |
| </property> |
| |
| </configuration> |