blob: 46a1eddb56dbd4c649760423f3ffd4a3a646a85d [file] [log] [blame]
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
set -euo pipefail
. $IMPALA_HOME/bin/report_build_error.sh
setup_report_build_error
HIVE_SERVER_PORT=10000
export HIVE_SERVER2_THRIFT_PORT=11050
HIVE_METASTORE_PORT=9083
LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive
HIVES2_TRANSPORT="plain_sasl"
METASTORE_TRANSPORT="buffered"
ONLY_METASTORE=0
CLUSTER_BIN=${IMPALA_HOME}/testdata/bin
if ${CLUSTER_DIR}/admin is_kerberized; then
# Making a kerberized cluster... set some more environment variables.
. ${MINIKDC_ENV}
HIVES2_TRANSPORT="kerberos"
# The metastore isn't kerberized yet:
# METASTORE_TRANSPORT="kerberos"
fi
mkdir -p ${LOGDIR}
while [ -n "$*" ]
do
case $1 in
-only_metastore)
ONLY_METASTORE=1
;;
-help|-h|*)
echo "run-hive-server.sh : Starts the hive server and the metastore."
echo "[-only_metastore] : Only starts the hive metastore."
exit 1;
;;
esac
shift;
done
# TODO: We should have a retry loop for every service we start.
# Kill for a clean start.
${CLUSTER_BIN}/kill-hive-server.sh &> /dev/null
export HIVE_METASTORE_HADOOP_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,\
suspend=n,address=30010"
# If this is CDP Hive we need to manually add the sentry jars in the classpath since
# CDH Hive metastore scripts do not do so. This is currently to make sure that we can run
# all the tests including sentry tests
# TODO: This can be removed when we move to Ranger completely
if [[ "$USE_CDP_HIVE" = "true" && -n "$SENTRY_HOME" ]]; then
for f in ${SENTRY_HOME}/lib/sentry-binding-hive*.jar; do
FILE_NAME=$(basename $f)
# exclude all the hive jars from being included in the classpath since Sentry
# depends on Hive 2.1.1
if [[ ! $FILE_NAME == hive* ]]; then
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${f}
fi
done
fi
# For Hive 3, we use Tez for execution. We have to add it to the classpath.
# NOTE: it would seem like this would only be necessary on the HS2 classpath,
# but compactions are initiated from the HMS in Hive 3. This may change at
# some point in the future, in which case we can add this to only the
# HS2 classpath.
if ${USE_CDP_HIVE} ; then
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_HOME}/*
# This is a little hacky, but Tez bundles a bunch of junk into lib/, such
# as extra copies of the hadoop libraries, etc, and we want to avoid conflicts.
# So, we'll be a bit choosy about what we add to the classpath here.
for jar in $TEZ_HOME/lib/* ; do
case $(basename $jar) in
commons-*|RoaringBitmap*)
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar
;;
esac
done
fi
# Add kudu-hive.jar to the Hive Metastore classpath, so that Kudu's HMS
# plugin can be loaded.
for file in ${IMPALA_KUDU_JAVA_HOME}/*kudu-hive*jar; do
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${file}
done
# Default to skip validation on Kudu tables if KUDU_SKIP_HMS_PLUGIN_VALIDATION
# is unset.
export KUDU_SKIP_HMS_PLUGIN_VALIDATION=${KUDU_SKIP_HMS_PLUGIN_VALIDATION:-1}
# Starts a Hive Metastore Server on the specified port.
# To debug log4j2 loading issues, add to HADOOP_CLIENT_OPTS:
# -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE
HADOOP_CLIENT_OPTS="-Xmx2024m -Dhive.log.file=hive-metastore.log" hive \
--service metastore -p $HIVE_METASTORE_PORT > ${LOGDIR}/hive-metastore.out 2>&1 &
# Wait for the Metastore to come up because HiveServer2 relies on it being live.
${CLUSTER_BIN}/wait-for-metastore.py --transport=${METASTORE_TRANSPORT}
if [ ${ONLY_METASTORE} -eq 0 ]; then
# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
# environment variable. HADOOP_HEAPSIZE should be set to at least 2048 to avoid OOM
# when loading ORC tables like widerow.
HADOOP_CLIENT_OPTS="-Xmx2048m -Dhive.log.file=hive-server2.log" hive \
--service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &
# Wait for the HiveServer2 service to come up because callers of this script
# may rely on it being available.
${CLUSTER_BIN}/wait-for-hiveserver2.py --transport=${HIVES2_TRANSPORT}
fi