| #!/bin/bash |
| |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| echo "export JAVA_HOME=${JAVA_HOME}" >> ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh |
| |
| echo "export HADOOP_OPTS=\"${HADOOP_OPTS} --add-opens=java.base/java.nio=ALL-UNNAMED \"" >> ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh |
| |
| cat <<EOF > /etc/ssh/ssh_config |
| Host * |
| StrictHostKeyChecking no |
| UserKnownHostsFile=/dev/null |
| EOF |
| |
| if [ "${KERBEROS_ENABLED}" == "true" ] |
| then |
| ${RANGER_SCRIPTS}/wait_for_keytab.sh hive.keytab |
| ${RANGER_SCRIPTS}/wait_for_keytab.sh hdfs.keytab |
| ${RANGER_SCRIPTS}/wait_for_keytab.sh HTTP.keytab |
| ${RANGER_SCRIPTS}/wait_for_testusers_keytab.sh |
| fi |
| |
| cp ${RANGER_SCRIPTS}/hive-site.xml ${HIVE_HOME}/conf/hive-site.xml |
| cp ${RANGER_SCRIPTS}/hive-site.xml ${HIVE_HOME}/conf/hiveserver2-site.xml |
| |
| # Configure Tez |
| mkdir -p ${TEZ_HOME}/conf |
| |
| # Create Tez configuration directory for Hadoop |
| mkdir -p ${HADOOP_HOME}/etc/hadoop |
| |
| cp ${RANGER_SCRIPTS}/core-site.xml ${HADOOP_HOME}/etc/hadoop/core-site.xml |
| |
| # Create mapred-site.xml for YARN integration |
| cat <<EOF > ${HADOOP_HOME}/etc/hadoop/mapred-site.xml |
| <configuration> |
| <property> |
| <name>mapreduce.framework.name</name> |
| <value>yarn</value> |
| </property> |
| <property> |
| <name>mapreduce.application.classpath</name> |
| <value>\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value> |
| </property> |
| <property> |
| <name>yarn.app.mapreduce.am.env</name> |
| <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> |
| </property> |
| <property> |
| <name>mapreduce.map.env</name> |
| <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> |
| </property> |
| <property> |
| <name>mapreduce.reduce.env</name> |
| <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> |
| </property> |
| </configuration> |
| EOF |
| |
| # Create yarn-site.xml for YARN ResourceManager connection |
| cat <<EOF > ${HADOOP_HOME}/etc/hadoop/yarn-site.xml |
| <configuration> |
| <property> |
| <name>yarn.resourcemanager.hostname</name> |
| <value>ranger-hadoop</value> |
| </property> |
| <property> |
| <name>yarn.resourcemanager.address</name> |
| <value>ranger-hadoop:8032</value> |
| </property> |
| <property> |
| <name>yarn.resourcemanager.principal</name> |
| <value>rm/ranger-hadoop.rangernw@EXAMPLE.COM</value> |
| </property> |
| </configuration> |
| EOF |
| |
| # Fix tez-site.xml to use absolute HDFS path (critical for Tez to find libraries) |
| cat <<EOF > ${TEZ_HOME}/conf/tez-site.xml |
| <?xml version="1.0" encoding="UTF-8"?> |
| <configuration> |
| <property> |
| <name>tez.lib.uris</name> |
| <value>hdfs://ranger-hadoop:9000/apps/tez/apache-tez-${TEZ_VERSION}-bin.tar.gz</value> |
| <description>Comma-delimited list of the location of the Tez libraries which will be localized for DAGs.</description> |
| </property> |
| <property> |
| <name>tez.use.cluster.hadoop-libs</name> |
| <value>true</value> |
| <description>Use Hadoop libraries provided by cluster instead of those packaged with Tez</description> |
| </property> |
| <property> |
| <name>tez.am.resource.memory.mb</name> |
| <value>1024</value> |
| <description>The amount of memory to be used by the AppMaster</description> |
| </property> |
| <property> |
| <name>tez.am.java.opts</name> |
| <value>-Xmx768m</value> |
| <description>Java opts for the Tez AppMaster process</description> |
| </property> |
| <property> |
| <name>tez.task.resource.memory.mb</name> |
| <value>1024</value> |
| <description>The amount of memory to be used by tasks</description> |
| </property> |
| <property> |
| <name>tez.task.launch.cmd-opts</name> |
| <value>-Xmx768m</value> |
| <description>Java opts for tasks</description> |
| </property> |
| <property> |
| <name>tez.staging-dir</name> |
| <value>/tmp/hive</value> |
| <description>The staging directory for Tez applications in HDFS.</description> |
| </property> |
| </configuration> |
| EOF |
| |
| rebuild_tez_tarball() { |
| if [ ! -f "/opt/apache-tez-${TEZ_VERSION}-bin.tar.gz" ]; then |
| echo "Recreating Tez tarball for HDFS upload..." |
| tar -C /opt -czf /opt/apache-tez-${TEZ_VERSION}-bin.tar.gz apache-tez-${TEZ_VERSION}-bin/ |
| fi |
| } |
| |
| create_hdfs_directories_and_files() { |
| exec_user=$1 |
| |
| # prepare tez directories and files in hdfs folders |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /apps/tez" "$exec_user" |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -put -f /opt/apache-tez-${TEZ_VERSION}-bin.tar.gz /apps/tez/" "$exec_user" |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 755 /apps/tez" "$exec_user" |
| |
| # Create HDFS user directory for hive |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /user/hive" "$exec_user" |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 770 /user/hive" "$exec_user" |
| |
| # Create HDFS /tmp/hive directory for Tez staging |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /tmp/hive" "$exec_user" |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 770 /tmp/hive" "$exec_user" |
| |
| # Create /user/root directory for YARN job execution |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /user/root" "$exec_user" |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod 770 /user/root" "$exec_user" |
| } |
| |
| # Copy Tez JARs to Hive lib directory |
| cp ${TEZ_HOME}/lib/tez-*.jar ${HIVE_HOME}/lib/ |
| cp ${TEZ_HOME}/tez-*.jar ${HIVE_HOME}/lib/ |
| |
| # Copy all Hadoop configurations to Hive conf directory so Hive can find them |
| cp ${HADOOP_HOME}/etc/hadoop/core-site.xml ${HIVE_HOME}/conf/ |
| cp ${HADOOP_HOME}/etc/hadoop/mapred-site.xml ${HIVE_HOME}/conf/ |
| cp ${HADOOP_HOME}/etc/hadoop/yarn-site.xml ${HIVE_HOME}/conf/ |
| cp ${TEZ_HOME}/conf/tez-site.xml ${HIVE_HOME}/conf/ |
| |
| # Upload Tez libraries to HDFS |
| if [ "${KERBEROS_ENABLED}" == "true" ]; then |
| echo "Kerberos enabled - authenticating as hdfs user..." |
| su -c "kinit -kt /etc/keytabs/hdfs.keytab hdfs/\`hostname -f\`@EXAMPLE.COM" hdfs |
| rc=$? |
| if [ $rc -ne 0 ]; then |
| echo "ERROR: kinit failed for hdfs principal (exit code=$rc)" >&2 |
| exit $rc |
| fi |
| |
| echo "kinit successful, proceeding operations as hive user" |
| |
| # Recreate Tez tarball if it doesn't exist |
| rebuild_tez_tarball |
| |
| # Create hdfs directories and files for hive and tez |
| create_hdfs_directories_and_files 'hdfs' |
| |
| su -c "kdestroy" hdfs |
| else |
| # Non-Kerberos mode - use hdfs user |
| su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /apps/tez" hdfs |
| |
| # Recreate Tez tarball if it doesn't exist (it gets removed during Docker build) |
| rebuild_tez_tarball |
| |
| # Create hdfs directories and files for hive and tez |
| create_hdfs_directories_and_files 'hdfs' |
| fi |
| |
| # Initialize Hive schema |
| su -c "${HIVE_HOME}/bin/schematool -dbType ${RANGER_DB_TYPE} -initSchema" hive |
| |
| mkdir -p /opt/hive/logs |
| chown -R hive:hadoop /opt/hive/ |
| chmod g+w /opt/hive/logs |
| |
| cd ${RANGER_HOME}/ranger-hive-plugin |
| ./enable-hive-plugin.sh |