blob: 6e799fb66bf526c2e15599d063dfee6004a1b815 [file] [log] [blame]
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Create the test environment needed by Impala. Includes generation of the
# Hadoop config files: core-site.xml, hbase-site.xml, hive-site.xml as well
# as creation of the Hive metastore.
set -euo pipefail
. $IMPALA_HOME/bin/report_build_error.sh
setup_report_build_error
CREATE_METASTORE=0
CREATE_SENTRY_POLICY_DB=0
: ${IMPALA_KERBERIZE=}
# parse command line options
for ARG in $*
do
case "$ARG" in
-create_metastore)
CREATE_METASTORE=1
;;
-create_sentry_policy_db)
CREATE_SENTRY_POLICY_DB=1
;;
-k|-kerberize|-kerberos|-kerb)
# This could also come in through the environment...
export IMPALA_KERBERIZE=1
;;
-help|*)
echo "[-create_metastore] : If true, creates a new metastore."
echo "[-create_sentry_policy_db] : If true, creates a new sentry policy db."
echo "[-kerberize] : Enable kerberos on the cluster"
exit 1
;;
esac
done
# If this isn't sourced, bad things will always happen
if [ "${IMPALA_CONFIG_SOURCED}" != "1" ]; then
echo "You must source bin/impala-config.sh"
exit 1
fi
${CLUSTER_DIR}/admin create_cluster
if [ ! -z "${IMPALA_KERBERIZE}" ]; then
# Sanity check...
if ! ${CLUSTER_DIR}/admin is_kerberized; then
echo "Kerberized cluster not created, even though told to."
exit 1
fi
# Set some more environment variables.
. ${MINIKDC_ENV}
# For hive-site.xml further down...
export HIVE_S2_AUTH=KERBEROS
else
export HIVE_S2_AUTH=NONE
fi
export CURRENT_USER=`whoami`
CONFIG_DIR=${IMPALA_HOME}/fe/src/test/resources
echo "Config dir: ${CONFIG_DIR}"
echo "Current user: ${CURRENT_USER}"
echo "Metastore DB: ${METASTORE_DB}"
pushd ${CONFIG_DIR}
# Cleanup any existing files
rm -f {core,hdfs,hbase,hive,yarn,mapred}-site.xml
rm -f authz-provider.ini
if [ $CREATE_METASTORE -eq 1 ]; then
echo "Creating postgresql database for Hive metastore"
dropdb -U hiveuser ${METASTORE_DB} || true
createdb -U hiveuser ${METASTORE_DB}
# Hive schema SQL scripts include other scripts using \i, which expects absolute paths.
# Switch to the scripts directory to make this work.
pushd ${HIVE_HOME}/scripts/metastore/upgrade/postgres
psql -q -U hiveuser -d ${METASTORE_DB} -f hive-schema-2.1.1.postgres.sql
popd
# Increase the size limit of PARAM_VALUE from SERDE_PARAMS table to be able to create
# HBase tables with large number of columns.
echo "alter table \"SERDE_PARAMS\" alter column \"PARAM_VALUE\" type character varying" \
| psql -q -U hiveuser -d ${METASTORE_DB}
fi
if [ $CREATE_SENTRY_POLICY_DB -eq 1 ]; then
echo "Creating Sentry Policy Server DB"
dropdb -U hiveuser $SENTRY_POLICY_DB 2> /dev/null || true
createdb -U hiveuser $SENTRY_POLICY_DB
fi
# Perform search-replace on $1, output to $2.
# Search $1 ($GCIN) for strings that look like "${FOO}". If FOO is defined in
# the environment then replace "${FOO}" with the environment value. Also
# remove or leave special kerberos settings as desired. Sanity check at end.
function generate_config {
GCIN="$1"
GCOUT="$2"
perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
"${GCIN}" > "${GCOUT}.tmp"
if [ "${IMPALA_KERBERIZE}" = "" ]; then
sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
"${GCOUT}.tmp" > "${GCOUT}"
else
cp "${GCOUT}.tmp" "${GCOUT}"
fi
rm -f "${GCOUT}.tmp"
# Check for anything that might have been missed.
# Assumes that environment variables will be ALL CAPS...
if grep '\${[A-Z_]*}' "${GCOUT}"; then
echo "Found undefined variables in ${GCOUT}, aborting"
exit 1
fi
echo "Generated `pwd`/${GCOUT}"
}
echo "Linking core-site.xml from local cluster"
CLUSTER_HADOOP_CONF_DIR=$(${CLUSTER_DIR}/admin get_hadoop_client_conf_dir)
ln -s ${CLUSTER_HADOOP_CONF_DIR}/core-site.xml
echo "Linking hdfs-site.xml from local cluster"
ln -s ${CLUSTER_HADOOP_CONF_DIR}/hdfs-site.xml
if ${CLUSTER_DIR}/admin is_kerberized; then
# KERBEROS TODO: Without this, the yarn daemons can see these
# files, but mapreduce jobs *cannot* see these files. This seems
# strange, but making these symlinks also results in data loading
# failures in the non-kerberized case. Without these, mapreduce
# jobs die in a kerberized cluster because they can't find their
# kerberos principals. Obviously this has to be sorted out before
# a kerberized cluster can load data.
echo "Linking yarn and mapred from local cluster"
ln -s ${CLUSTER_HADOOP_CONF_DIR}/yarn-site.xml
ln -s ${CLUSTER_HADOOP_CONF_DIR}/mapred-site.xml
fi
generate_config postgresql-hive-site.xml.template hive-site.xml
generate_config log4j.properties.template log4j.properties
generate_config hive-log4j2.properties.template hive-log4j2.properties
generate_config hbase-site.xml.template hbase-site.xml
generate_config authz-policy.ini.template authz-policy.ini
generate_config sentry-site.xml.template sentry-site.xml
if [ ! -z "${IMPALA_KERBERIZE}" ]; then
generate_config hbase-jaas-server.conf.template hbase-jaas-server.conf
generate_config hbase-jaas-client.conf.template hbase-jaas-client.conf
fi
popd
echo "Completed config generation"
# Creates a symlink in TARGET_DIR to all subdirectories under SOURCE_DIR
function symlink_subdirs {
SOURCE_DIR=$1
TARGET_DIR=$2
if [ -d "${SOURCE_DIR}" ]; then
find ${SOURCE_DIR}/ -maxdepth 1 -mindepth 1 -type d -exec ln -f -s {} ${TARGET_DIR} \;
else
echo "No auxiliary tests found at: ${SOURCE_DIR}"
fi
}
# The Impala test framework support running additional tests outside of the main repo.
# This is an optional feature that can be enabled by setting the IMPALA_AUX_* environment
# variables to valid locations.
echo "Searching for auxiliary tests, workloads, and datasets (if any exist)."
symlink_subdirs ${IMPALA_AUX_WORKLOAD_DIR} ${IMPALA_WORKLOAD_DIR}
symlink_subdirs ${IMPALA_AUX_DATASET_DIR} ${IMPALA_DATASET_DIR}
if [ -d ${IMPALA_AUX_TEST_HOME}/tests/functional ]; then
symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests/functional ${IMPALA_HOME}/tests
else
# For compatibility with older auxiliary tests, which aren't in the
# functional subdirectory.
symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests ${IMPALA_HOME}/tests
fi