bin/create-test-configuration.sh - impala - Git at Google

 #!/bin/bash
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 # Create the test environment needed by Impala. Includes generation of the
 # Hadoop config files: core-site.xml, hbase-site.xml, hive-site.xml as well
 # as creation of the Hive metastore.

 set -euo pipefail
 . $IMPALA_HOME/bin/report_build_error.sh
 setup_report_build_error

 # Perform search-replace on $1, output to $2.
 # Search $1 ($GCIN) for strings that look like "${FOO}".  If FOO is defined in
 # the environment then replace "${FOO}" with the environment value.  Also
 # remove or leave special kerberos settings as desired.  Sanity check at end.
 #
 # NOTE: for Hadoop-style XML configuration files (foo-site.xml) prefer using
 # bin/generate_xml_config.py instead of this method. This method is useful for
 # ini-style or other configuration formats.
 #
 # TODO(todd): convert remaining 'foo-site.xml' files to use the preferred
 # mechanism.
 #
 # TODO(todd): consider a better Python-based templating system for the other
 # configuration files as well.
 function generate_config {
   GCIN="$1"
   GCOUT="$2"

   perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
       "${GCIN}" > "${GCOUT}.tmp"

   if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
     sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
         "${GCOUT}.tmp" > "${GCOUT}"
   else
     cp "${GCOUT}.tmp" "${GCOUT}"
   fi
   rm -f "${GCOUT}.tmp"

   # Check for anything that might have been missed.
   # Assumes that environment variables will be ALL CAPS...
   if grep '\${[A-Z_]*}' "${GCOUT}"; then
     echo "Found undefined variables in ${GCOUT}, aborting"
     exit 1
   fi

   echo "Generated `pwd`/${GCOUT}"
 }

 CREATE_METASTORE=0
 CREATE_RANGER_POLICY_DB=0
 UPGRADE_METASTORE_DB=0

 # parse command line options
 for ARG in $*
 do
   case "$ARG" in
     -create_metastore)
       CREATE_METASTORE=1
       ;;
     -create_ranger_policy_db)
       CREATE_RANGER_POLICY_DB=1
       ;;
     -upgrade_metastore_db)
       UPGRADE_METASTORE_DB=1
       ;;
     -help|*)
       echo "[-create_metastore] : If true, creates a new metastore."
       echo "[-create_ranger_policy_db] : If true, creates a new Ranger policy db."
       echo "[-upgrade_metastore_db] : If true, upgrades the schema of HMS db."
       exit 1
       ;;
   esac
 done

 # If this isn't sourced, bad things will always happen
 if [ "${IMPALA_CONFIG_SOURCED}" != "1" ]; then
   echo "You must source bin/impala-config.sh"
   exit 1
 fi

 ${CLUSTER_DIR}/admin create_cluster

 if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
   # Sanity check...
   if ! ${CLUSTER_DIR}/admin is_kerberized; then
     echo "Kerberized cluster not created, even though told to."
     exit 1
   fi

   # Set some more environment variables.
   . ${MINIKDC_ENV}

   # For hive-site.xml further down...
   export HIVE_S2_AUTH=KERBEROS
 else
   export HIVE_S2_AUTH=NONE
 fi

 export CURRENT_USER=`whoami`

 CONFIG_DIR=${IMPALA_HOME}/fe/src/test/resources
 RANGER_TEST_CONF_DIR="${IMPALA_HOME}/testdata/cluster/ranger"

 echo "Config dir: ${CONFIG_DIR}"
 echo "Current user: ${CURRENT_USER}"
 echo "Metastore DB: ${METASTORE_DB}"
 echo "Ranger DB   : ${RANGER_POLICY_DB}"

 pushd ${CONFIG_DIR}
 # Cleanup any existing files
 rm -f {core,hdfs,hbase,hive,ozone,yarn,mapred}-site.xml
 rm -f authz-provider.ini

 # Generate hive configs first so that schemaTool can be used to init the metastore schema
 # if needed

 # Set IMPALA_JAVA_TOOL_OPTIONS to allow passing it to Tez containers.
 . $IMPALA_HOME/bin/set-impala-java-tool-options.sh

 CORE_SITE_VARIANT=disable_block_locations $IMPALA_HOME/bin/generate_xml_config.py \
   $IMPALA_HOME/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py \
   core-site_disabled_block_locations.xml
 mkdir -p core-site-disabled-block-locations
 rm -f core-site-disabled-block-locations/core-site.xml
 ln -s "${CONFIG_DIR}/core-site_disabled_block_locations.xml" \
     core-site-disabled-block-locations/core-site.xml

 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site.xml
 export HIVE_VARIANT=changed_external_dir
 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_ext.xml
 mkdir -p hive-site-ext
 rm -f hive-site-ext/hive-site.xml
 ln -s "${CONFIG_DIR}/hive-site_ext.xml" hive-site-ext/hive-site.xml

 export HIVE_VARIANT=without_hms_config
 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_without_hms.xml
 mkdir -p hive-site-without-hms
 rm -f hive-site-without-hms/hive-site.xml
 ln -s "${CONFIG_DIR}/hive-site_without_hms.xml" hive-site-without-hms/hive-site.xml

 export HIVE_VARIANT=events_cleanup
 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_events_cleanup.xml
 mkdir -p hive-site-events-cleanup
 rm -f hive-site-events-cleanup/hive-site.xml
 ln -s "${CONFIG_DIR}/hive-site_events_cleanup.xml" hive-site-events-cleanup/hive-site.xml

 export HIVE_VARIANT=housekeeping_on
 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_housekeeping_on.xml
 mkdir -p hive-site-housekeeping-on
 rm -f hive-site-housekeeping-on/hive-site.xml
 ln -s "${CONFIG_DIR}/hive-site_housekeeping_on.xml" \
     hive-site-housekeeping-on/hive-site.xml

 export HIVE_VARIANT=events_config_change
 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_events_config.xml
 mkdir -p hive-site-events-config
 rm -f hive-site-events-config/hive-site.xml
 ln -s "${CONFIG_DIR}/hive-site_events_config.xml" \
     hive-site-events-config/hive-site.xml

 export HIVE_VARIANT=ranger_auth
 HIVE_RANGER_CONF_DIR=hive-site-ranger-auth
 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_ranger_auth.xml

 # Cleanup pycache if created
 rm -rf __pycache__

 rm -rf $HIVE_RANGER_CONF_DIR
 mkdir -p $HIVE_RANGER_CONF_DIR
 ln -s "${CONFIG_DIR}/hive-site_ranger_auth.xml" $HIVE_RANGER_CONF_DIR/hive-site.xml
 # Link some neccessary config files for Hive.
 for f in ranger-hive-security.xml ranger-hive-audit.xml log4j.properties \
     hive-log4j2.properties; do
   ln -s "${CONFIG_DIR}/$f" "$HIVE_RANGER_CONF_DIR/$f"
 done

 generate_config hive-log4j2.properties.template hive-log4j2.properties

 if [ $CREATE_METASTORE -eq 1 ]; then
   echo "Creating postgresql database for Hive metastore"
   dropdb -U hiveuser ${METASTORE_DB} || true
   createdb -U hiveuser ${METASTORE_DB}

   # Use schematool to initialize the metastore db schema. It detects the Hive
   # version and invokes the appropriate scripts
   CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -initSchema -dbType \
 postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
   # TODO: We probably don't need to do this anymore
   # Increase the size limit of PARAM_VALUE from SERDE_PARAMS table to be able to create
   # HBase tables with large number of columns.
   echo "alter table \"SERDE_PARAMS\" alter column \"PARAM_VALUE\" type character varying" \
       | psql -q -U hiveuser -d ${METASTORE_DB}
 fi

 if [ $UPGRADE_METASTORE_DB -eq 1 ]; then
   echo "Upgrading the schema of metastore db ${METASTORE_DB}. Check \
 ${IMPALA_CLUSTER_LOGS_DIR}/schematool.log for details."
   CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -upgradeSchema \
 -dbType postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
 fi

 if [ $CREATE_RANGER_POLICY_DB -eq 1 ]; then
   echo "Creating Ranger Policy Server DB"
   dropdb -U hiveuser "${RANGER_POLICY_DB}" 2> /dev/null || true
   createdb -U hiveuser "${RANGER_POLICY_DB}"
   pushd "${RANGER_HOME}"
   generate_config "${RANGER_TEST_CONF_DIR}/install.properties.template" install.properties
   python ./db_setup.py
   popd
 fi

 echo "Copying common conf files from local cluster:"
 CLUSTER_HADOOP_CONF_DIR=$(${CLUSTER_DIR}/admin get_hadoop_client_conf_dir)
 for file in core-site.xml hdfs-site.xml ozone-site.xml yarn-site.xml ; do
   echo ... $file
   # These need to be copied instead of symlinked so that they can be accessed when the
   # directory is bind-mounted into /opt/impala/conf in docker containers.
   cp ${CLUSTER_HADOOP_CONF_DIR}/$file .
 done

 if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
   # KERBEROS TODO: Without this, the yarn daemons can see these
   # files, but mapreduce jobs *cannot* see these files.  This seems
   # strange, but making these symlinks also results in data loading
   # failures in the non-kerberized case.  Without these, mapreduce
   # jobs die in a kerberized cluster because they can't find their
   # kerberos principals. Obviously this has to be sorted out before
   # a kerberized cluster can load data.
   echo "Linking yarn and mapred from local cluster"
   ln -s ${CLUSTER_HADOOP_CONF_DIR}/mapred-site.xml
 fi

 generate_config log4j.properties.template log4j.properties
 generate_config hbase-site.xml.template hbase-site.xml

 if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
   generate_config hbase-jaas-server.conf.template hbase-jaas-server.conf
   generate_config hbase-jaas-client.conf.template hbase-jaas-client.conf
 fi

 popd

 RANGER_SERVER_CONF_DIR="${RANGER_HOME}/ews/webapp/WEB-INF/classes/conf"
 RANGER_SERVER_CONFDIST_DIR="${RANGER_HOME}/ews/webapp/WEB-INF/classes/conf.dist"
 RANGER_SERVER_LIB_DIR="${RANGER_HOME}/ews/webapp/WEB-INF/lib"
 RANGER_ADMIN_LOGBACK_CONF_FILE="${RANGER_SERVER_CONFDIST_DIR}/logback.xml"
 RANGER_ADMIN_LOG4J2_CONF_FILE="${RANGER_HOME}/ews/webapp/WEB-INF/log4j2.properties"
 RANGER_LOG_DIR="${IMPALA_CLUSTER_LOGS_DIR}/ranger"
 if [[ ! -d "${RANGER_SERVER_CONF_DIR}" ]]; then
     mkdir -p "${RANGER_SERVER_CONF_DIR}"
 fi

 cp -f "${RANGER_TEST_CONF_DIR}/java_home.sh" "${RANGER_SERVER_CONF_DIR}"
 cp -f "${RANGER_TEST_CONF_DIR}/ranger-admin-env-logdir.sh" "${RANGER_SERVER_CONF_DIR}"
 cp -f "${RANGER_TEST_CONF_DIR}/ranger-admin-env-piddir.sh" "${RANGER_SERVER_CONF_DIR}"
 cp -f "${RANGER_SERVER_CONFDIST_DIR}/security-applicationContext.xml" \
     "${RANGER_SERVER_CONF_DIR}"
 # For Apache Ranger, we need logback.xml under ${RANGER_SERVER_CONF_DIR} so that the log
 # files like ranger-admin-$(hostname)-$(whoami).log could be created under
 # ${RANGER_LOG_DIR}.
 if [[ -f ${RANGER_ADMIN_LOGBACK_CONF_FILE} ]]; then
   cp -f ${RANGER_ADMIN_LOGBACK_CONF_FILE} ${RANGER_SERVER_CONF_DIR}
 fi
 # For CDP Ranger, we change the value of the property 'log.dir' in the corresponding
 # log4j2.properties so that the log files like ranger-admin-server.log could be created
 # under ${RANGER_LOG_DIR}.
 if [[ -f ${RANGER_ADMIN_LOG4J2_CONF_FILE} ]]; then
   # Use vertical bar instead of slash as the separator to prevent the slash(es) in
   # ${RANGER_LOG_DIR} from interfering with the parsing of sed.
   sed -i "s|property\.log\.dir=.*|property.log.dir=${RANGER_LOG_DIR}|g" \
       ${RANGER_ADMIN_LOG4J2_CONF_FILE}
 fi

 # Prepend the following 5 URL's to the line starting with "<intercept-url pattern="/**"".
 # Before the end-to-end tests could be performed in a Kerberized environment
 # automatically, we need to allow the requests for the following links so that the
 # statements like CREATE/DROP ROLE <role_name>,
 # GRANT/REVOKE ROLE <role_name> TO/FROM GROUP <group_name>, and SHOW ROLES could work in a
 # non-Kerberized environment. It is better to add the allowed links using sed than to use
 # a hardcoded configuration file consisting of those links since some other configurations
 # could change after CDP_BUILD_NUMBER is bumped up, e.g., the version of jquery.
 sed -i '/<intercept-url pattern="\/\*\*"/i \
     <intercept-url pattern="/service/public/v2/api/roles/*" access="permitAll"/> \
     <intercept-url pattern="/service/public/v2/api/roles/name/*" access="permitAll"/> \
     <intercept-url pattern="/service/public/v2/api/roles/grant/*" access="permitAll"/> \
     <intercept-url pattern="/service/public/v2/api/roles/revoke/*" access="permitAll"/> \
     <intercept-url pattern="/service/public/v2/api/roles/names/*" access="permitAll"/>' \
 "${RANGER_SERVER_CONF_DIR}/security-applicationContext.xml"

 if [[ -f "${POSTGRES_JDBC_DRIVER}" ]]; then
   cp -f "${POSTGRES_JDBC_DRIVER}" "${RANGER_SERVER_LIB_DIR}"
 else
   # IMPALA-8261: Running this script should not fail when FE has not been built.
   MAVEN_URL="https://repo.maven.apache.org/maven2/org/postgresql/postgresql"
   JDBC_JAR="postgresql-${IMPALA_POSTGRES_JDBC_DRIVER_VERSION}.jar"
   wget -P "${RANGER_SERVER_LIB_DIR}" \
     "${MAVEN_URL}/${IMPALA_POSTGRES_JDBC_DRIVER_VERSION}/${JDBC_JAR}"
 fi

 pushd "${RANGER_SERVER_CONF_DIR}"
 generate_config "${RANGER_TEST_CONF_DIR}/ranger-admin-default-site.xml.template" \
     ranger-admin-default-site.xml
 generate_config "${RANGER_TEST_CONF_DIR}/ranger-admin-site.xml.template" \
     ranger-admin-site.xml
 popd

 echo "Completed config generation"

 # Creates a symlink in TARGET_DIR to all subdirectories under SOURCE_DIR
 function symlink_subdirs {
   SOURCE_DIR=$1
   TARGET_DIR=$2
   if [ -d "${SOURCE_DIR}" ]; then
     find ${SOURCE_DIR}/ -maxdepth 1 -mindepth 1 -type d -exec ln -f -s {} ${TARGET_DIR} \;
   else
     echo "No auxiliary tests found at: ${SOURCE_DIR}"
   fi
 }

 # The Impala test framework support running additional tests outside of the main repo.
 # This is an optional feature that can be enabled by setting the IMPALA_AUX_* environment
 # variables to valid locations.
 echo "Searching for auxiliary tests, workloads, and datasets (if any exist)."
 symlink_subdirs ${IMPALA_AUX_WORKLOAD_DIR} ${IMPALA_WORKLOAD_DIR}
 symlink_subdirs ${IMPALA_AUX_DATASET_DIR} ${IMPALA_DATASET_DIR}

 if [ -d ${IMPALA_AUX_TEST_HOME}/tests/functional ]; then
   symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests/functional ${IMPALA_HOME}/tests
 else
   # For compatibility with older auxiliary tests, which aren't in the
   # functional subdirectory.
   symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests ${IMPALA_HOME}/tests
 fi
	#!/bin/bash
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	# Create the test environment needed by Impala. Includes generation of the
	# Hadoop config files: core-site.xml, hbase-site.xml, hive-site.xml as well
	# as creation of the Hive metastore.

	set -euo pipefail
	. $IMPALA_HOME/bin/report_build_error.sh
	setup_report_build_error

	# Perform search-replace on $1, output to $2.
	# Search $1 ($GCIN) for strings that look like "${FOO}". If FOO is defined in
	# the environment then replace "${FOO}" with the environment value. Also
	# remove or leave special kerberos settings as desired. Sanity check at end.
	#
	# NOTE: for Hadoop-style XML configuration files (foo-site.xml) prefer using
	# bin/generate_xml_config.py instead of this method. This method is useful for
	# ini-style or other configuration formats.
	#
	# TODO(todd): convert remaining 'foo-site.xml' files to use the preferred
	# mechanism.
	#
	# TODO(todd): consider a better Python-based templating system for the other
	# configuration files as well.
	function generate_config {
	GCIN="$1"
	GCOUT="$2"

	perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
	"${GCIN}" > "${GCOUT}.tmp"

	if [[ "${IMPALA_KERBERIZE}" != "true" ]]; then
	sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
	"${GCOUT}.tmp" > "${GCOUT}"
	else
	cp "${GCOUT}.tmp" "${GCOUT}"
	fi
	rm -f "${GCOUT}.tmp"

	# Check for anything that might have been missed.
	# Assumes that environment variables will be ALL CAPS...
	if grep '\${[A-Z_]*}' "${GCOUT}"; then
	echo "Found undefined variables in ${GCOUT}, aborting"
	exit 1
	fi

	echo "Generated `pwd`/${GCOUT}"
	}

	CREATE_METASTORE=0
	CREATE_RANGER_POLICY_DB=0
	UPGRADE_METASTORE_DB=0

	# parse command line options
	for ARG in $*
	do
	case "$ARG" in
	-create_metastore)
	CREATE_METASTORE=1
	;;
	-create_ranger_policy_db)
	CREATE_RANGER_POLICY_DB=1
	;;
	-upgrade_metastore_db)
	UPGRADE_METASTORE_DB=1
	;;
	-help\|*)
	echo "[-create_metastore] : If true, creates a new metastore."
	echo "[-create_ranger_policy_db] : If true, creates a new Ranger policy db."
	echo "[-upgrade_metastore_db] : If true, upgrades the schema of HMS db."
	exit 1
	;;
	esac
	done

	# If this isn't sourced, bad things will always happen
	if [ "${IMPALA_CONFIG_SOURCED}" != "1" ]; then
	echo "You must source bin/impala-config.sh"
	exit 1
	fi

	${CLUSTER_DIR}/admin create_cluster

	if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
	# Sanity check...
	if ! ${CLUSTER_DIR}/admin is_kerberized; then
	echo "Kerberized cluster not created, even though told to."
	exit 1
	fi

	# Set some more environment variables.
	. ${MINIKDC_ENV}

	# For hive-site.xml further down...
	export HIVE_S2_AUTH=KERBEROS
	else
	export HIVE_S2_AUTH=NONE
	fi

	export CURRENT_USER=`whoami`

	CONFIG_DIR=${IMPALA_HOME}/fe/src/test/resources
	RANGER_TEST_CONF_DIR="${IMPALA_HOME}/testdata/cluster/ranger"

	echo "Config dir: ${CONFIG_DIR}"
	echo "Current user: ${CURRENT_USER}"
	echo "Metastore DB: ${METASTORE_DB}"
	echo "Ranger DB : ${RANGER_POLICY_DB}"

	pushd ${CONFIG_DIR}
	# Cleanup any existing files
	rm -f {core,hdfs,hbase,hive,ozone,yarn,mapred}-site.xml
	rm -f authz-provider.ini

	# Generate hive configs first so that schemaTool can be used to init the metastore schema
	# if needed

	# Set IMPALA_JAVA_TOOL_OPTIONS to allow passing it to Tez containers.
	. $IMPALA_HOME/bin/set-impala-java-tool-options.sh

	CORE_SITE_VARIANT=disable_block_locations $IMPALA_HOME/bin/generate_xml_config.py \
	$IMPALA_HOME/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py \
	core-site_disabled_block_locations.xml
	mkdir -p core-site-disabled-block-locations
	rm -f core-site-disabled-block-locations/core-site.xml
	ln -s "${CONFIG_DIR}/core-site_disabled_block_locations.xml" \
	core-site-disabled-block-locations/core-site.xml

	$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site.xml
	export HIVE_VARIANT=changed_external_dir
	$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_ext.xml
	mkdir -p hive-site-ext
	rm -f hive-site-ext/hive-site.xml
	ln -s "${CONFIG_DIR}/hive-site_ext.xml" hive-site-ext/hive-site.xml

	export HIVE_VARIANT=without_hms_config
	$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_without_hms.xml
	mkdir -p hive-site-without-hms
	rm -f hive-site-without-hms/hive-site.xml
	ln -s "${CONFIG_DIR}/hive-site_without_hms.xml" hive-site-without-hms/hive-site.xml

	export HIVE_VARIANT=events_cleanup
	$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_events_cleanup.xml
	mkdir -p hive-site-events-cleanup
	rm -f hive-site-events-cleanup/hive-site.xml
	ln -s "${CONFIG_DIR}/hive-site_events_cleanup.xml" hive-site-events-cleanup/hive-site.xml

	export HIVE_VARIANT=housekeeping_on
	$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_housekeeping_on.xml
	mkdir -p hive-site-housekeeping-on
	rm -f hive-site-housekeeping-on/hive-site.xml
	ln -s "${CONFIG_DIR}/hive-site_housekeeping_on.xml" \
	hive-site-housekeeping-on/hive-site.xml

	export HIVE_VARIANT=events_config_change
	$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_events_config.xml
	mkdir -p hive-site-events-config
	rm -f hive-site-events-config/hive-site.xml
	ln -s "${CONFIG_DIR}/hive-site_events_config.xml" \
	hive-site-events-config/hive-site.xml

	export HIVE_VARIANT=ranger_auth
	HIVE_RANGER_CONF_DIR=hive-site-ranger-auth
	$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_ranger_auth.xml

	# Cleanup pycache if created
	rm -rf __pycache__

	rm -rf $HIVE_RANGER_CONF_DIR
	mkdir -p $HIVE_RANGER_CONF_DIR
	ln -s "${CONFIG_DIR}/hive-site_ranger_auth.xml" $HIVE_RANGER_CONF_DIR/hive-site.xml
	# Link some neccessary config files for Hive.
	for f in ranger-hive-security.xml ranger-hive-audit.xml log4j.properties \
	hive-log4j2.properties; do
	ln -s "${CONFIG_DIR}/$f" "$HIVE_RANGER_CONF_DIR/$f"
	done

	generate_config hive-log4j2.properties.template hive-log4j2.properties

	if [ $CREATE_METASTORE -eq 1 ]; then
	echo "Creating postgresql database for Hive metastore"
	dropdb -U hiveuser ${METASTORE_DB} \|\| true
	createdb -U hiveuser ${METASTORE_DB}

	# Use schematool to initialize the metastore db schema. It detects the Hive
	# version and invokes the appropriate scripts
	CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -initSchema -dbType \
	postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
	# TODO: We probably don't need to do this anymore
	# Increase the size limit of PARAM_VALUE from SERDE_PARAMS table to be able to create
	# HBase tables with large number of columns.
	echo "alter table \"SERDE_PARAMS\" alter column \"PARAM_VALUE\" type character varying" \
	\| psql -q -U hiveuser -d ${METASTORE_DB}
	fi

	if [ $UPGRADE_METASTORE_DB -eq 1 ]; then
	echo "Upgrading the schema of metastore db ${METASTORE_DB}. Check \
	${IMPALA_CLUSTER_LOGS_DIR}/schematool.log for details."
	CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -upgradeSchema \
	-dbType postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
	fi

	if [ $CREATE_RANGER_POLICY_DB -eq 1 ]; then
	echo "Creating Ranger Policy Server DB"
	dropdb -U hiveuser "${RANGER_POLICY_DB}" 2> /dev/null \|\| true
	createdb -U hiveuser "${RANGER_POLICY_DB}"
	pushd "${RANGER_HOME}"
	generate_config "${RANGER_TEST_CONF_DIR}/install.properties.template" install.properties
	python ./db_setup.py
	popd
	fi

	echo "Copying common conf files from local cluster:"
	CLUSTER_HADOOP_CONF_DIR=$(${CLUSTER_DIR}/admin get_hadoop_client_conf_dir)
	for file in core-site.xml hdfs-site.xml ozone-site.xml yarn-site.xml ; do
	echo ... $file
	# These need to be copied instead of symlinked so that they can be accessed when the
	# directory is bind-mounted into /opt/impala/conf in docker containers.
	cp ${CLUSTER_HADOOP_CONF_DIR}/$file .
	done

	if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
	# KERBEROS TODO: Without this, the yarn daemons can see these
	# files, but mapreduce jobs cannot see these files. This seems
	# strange, but making these symlinks also results in data loading
	# failures in the non-kerberized case. Without these, mapreduce
	# jobs die in a kerberized cluster because they can't find their
	# kerberos principals. Obviously this has to be sorted out before
	# a kerberized cluster can load data.
	echo "Linking yarn and mapred from local cluster"
	ln -s ${CLUSTER_HADOOP_CONF_DIR}/mapred-site.xml
	fi

	generate_config log4j.properties.template log4j.properties
	generate_config hbase-site.xml.template hbase-site.xml

	if [[ "${IMPALA_KERBERIZE}" = "true" ]]; then
	generate_config hbase-jaas-server.conf.template hbase-jaas-server.conf
	generate_config hbase-jaas-client.conf.template hbase-jaas-client.conf
	fi

	popd

	RANGER_SERVER_CONF_DIR="${RANGER_HOME}/ews/webapp/WEB-INF/classes/conf"
	RANGER_SERVER_CONFDIST_DIR="${RANGER_HOME}/ews/webapp/WEB-INF/classes/conf.dist"
	RANGER_SERVER_LIB_DIR="${RANGER_HOME}/ews/webapp/WEB-INF/lib"
	RANGER_ADMIN_LOGBACK_CONF_FILE="${RANGER_SERVER_CONFDIST_DIR}/logback.xml"
	RANGER_ADMIN_LOG4J2_CONF_FILE="${RANGER_HOME}/ews/webapp/WEB-INF/log4j2.properties"
	RANGER_LOG_DIR="${IMPALA_CLUSTER_LOGS_DIR}/ranger"
	if [[ ! -d "${RANGER_SERVER_CONF_DIR}" ]]; then
	mkdir -p "${RANGER_SERVER_CONF_DIR}"
	fi

	cp -f "${RANGER_TEST_CONF_DIR}/java_home.sh" "${RANGER_SERVER_CONF_DIR}"
	cp -f "${RANGER_TEST_CONF_DIR}/ranger-admin-env-logdir.sh" "${RANGER_SERVER_CONF_DIR}"
	cp -f "${RANGER_TEST_CONF_DIR}/ranger-admin-env-piddir.sh" "${RANGER_SERVER_CONF_DIR}"
	cp -f "${RANGER_SERVER_CONFDIST_DIR}/security-applicationContext.xml" \
	"${RANGER_SERVER_CONF_DIR}"
	# For Apache Ranger, we need logback.xml under ${RANGER_SERVER_CONF_DIR} so that the log
	# files like ranger-admin-$(hostname)-$(whoami).log could be created under
	# ${RANGER_LOG_DIR}.
	if [[ -f ${RANGER_ADMIN_LOGBACK_CONF_FILE} ]]; then
	cp -f ${RANGER_ADMIN_LOGBACK_CONF_FILE} ${RANGER_SERVER_CONF_DIR}
	fi
	# For CDP Ranger, we change the value of the property 'log.dir' in the corresponding
	# log4j2.properties so that the log files like ranger-admin-server.log could be created
	# under ${RANGER_LOG_DIR}.
	if [[ -f ${RANGER_ADMIN_LOG4J2_CONF_FILE} ]]; then
	# Use vertical bar instead of slash as the separator to prevent the slash(es) in
	# ${RANGER_LOG_DIR} from interfering with the parsing of sed.
	sed -i "s\|property\.log\.dir=.*\|property.log.dir=${RANGER_LOG_DIR}\|g" \
	${RANGER_ADMIN_LOG4J2_CONF_FILE}
	fi

	# Prepend the following 5 URL's to the line starting with "<intercept-url pattern="/**"".
	# Before the end-to-end tests could be performed in a Kerberized environment
	# automatically, we need to allow the requests for the following links so that the
	# statements like CREATE/DROP ROLE <role_name>,
	# GRANT/REVOKE ROLE <role_name> TO/FROM GROUP <group_name>, and SHOW ROLES could work in a
	# non-Kerberized environment. It is better to add the allowed links using sed than to use
	# a hardcoded configuration file consisting of those links since some other configurations
	# could change after CDP_BUILD_NUMBER is bumped up, e.g., the version of jquery.
	sed -i '/<intercept-url pattern="\/\\"/i \
	<intercept-url pattern="/service/public/v2/api/roles/*" access="permitAll"/> \
	<intercept-url pattern="/service/public/v2/api/roles/name/*" access="permitAll"/> \
	<intercept-url pattern="/service/public/v2/api/roles/grant/*" access="permitAll"/> \
	<intercept-url pattern="/service/public/v2/api/roles/revoke/*" access="permitAll"/> \
	<intercept-url pattern="/service/public/v2/api/roles/names/*" access="permitAll"/>' \
	"${RANGER_SERVER_CONF_DIR}/security-applicationContext.xml"

	if [[ -f "${POSTGRES_JDBC_DRIVER}" ]]; then
	cp -f "${POSTGRES_JDBC_DRIVER}" "${RANGER_SERVER_LIB_DIR}"
	else
	# IMPALA-8261: Running this script should not fail when FE has not been built.
	MAVEN_URL="https://repo.maven.apache.org/maven2/org/postgresql/postgresql"
	JDBC_JAR="postgresql-${IMPALA_POSTGRES_JDBC_DRIVER_VERSION}.jar"
	wget -P "${RANGER_SERVER_LIB_DIR}" \
	"${MAVEN_URL}/${IMPALA_POSTGRES_JDBC_DRIVER_VERSION}/${JDBC_JAR}"
	fi

	pushd "${RANGER_SERVER_CONF_DIR}"
	generate_config "${RANGER_TEST_CONF_DIR}/ranger-admin-default-site.xml.template" \
	ranger-admin-default-site.xml
	generate_config "${RANGER_TEST_CONF_DIR}/ranger-admin-site.xml.template" \
	ranger-admin-site.xml
	popd

	echo "Completed config generation"

	# Creates a symlink in TARGET_DIR to all subdirectories under SOURCE_DIR
	function symlink_subdirs {
	SOURCE_DIR=$1
	TARGET_DIR=$2
	if [ -d "${SOURCE_DIR}" ]; then
	find ${SOURCE_DIR}/ -maxdepth 1 -mindepth 1 -type d -exec ln -f -s {} ${TARGET_DIR} \;
	else
	echo "No auxiliary tests found at: ${SOURCE_DIR}"
	fi
	}

	# The Impala test framework support running additional tests outside of the main repo.
	# This is an optional feature that can be enabled by setting the IMPALA_AUX_* environment
	# variables to valid locations.
	echo "Searching for auxiliary tests, workloads, and datasets (if any exist)."
	symlink_subdirs ${IMPALA_AUX_WORKLOAD_DIR} ${IMPALA_WORKLOAD_DIR}
	symlink_subdirs ${IMPALA_AUX_DATASET_DIR} ${IMPALA_DATASET_DIR}

	if [ -d ${IMPALA_AUX_TEST_HOME}/tests/functional ]; then
	symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests/functional ${IMPALA_HOME}/tests
	else
	# For compatibility with older auxiliary tests, which aren't in the
	# functional subdirectory.
	symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests ${IMPALA_HOME}/tests
	fi