testdata/bin/run-hive-server.sh - impala - Git at Google

 #!/bin/bash
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 set -euo pipefail
 . $IMPALA_HOME/bin/report_build_error.sh
 setup_report_build_error

 HIVE_SERVER_PORT=10000
 export HIVE_SERVER2_THRIFT_PORT=11050
 HIVE_METASTORE_PORT=9083
 LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive
 HIVES2_TRANSPORT="plain_sasl"
 METASTORE_TRANSPORT="buffered"
 ONLY_METASTORE=0

 CLUSTER_BIN=${IMPALA_HOME}/testdata/bin

 if ${CLUSTER_DIR}/admin is_kerberized; then
     # Making a kerberized cluster... set some more environment variables.
     . ${MINIKDC_ENV}

     HIVES2_TRANSPORT="kerberos"
     # The metastore isn't kerberized yet:
     # METASTORE_TRANSPORT="kerberos"
 fi

 mkdir -p ${LOGDIR}

 while [ -n "$*" ]
 do
   case $1 in
     -only_metastore)
       ONLY_METASTORE=1
       ;;
     -help|-h|*)
       echo "run-hive-server.sh : Starts the hive server and the metastore."
       echo "[-only_metastore] : Only starts the hive metastore."
       exit 1;
       ;;
     esac
   shift;
 done

 # TODO: We should have a retry loop for every service we start.
 # Kill for a clean start.
 ${CLUSTER_BIN}/kill-hive-server.sh &> /dev/null

 export HIVE_METASTORE_HADOOP_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,\
 suspend=n,address=30010"

 # If this is CDP Hive we need to manually add the sentry jars in the classpath since
 # CDH Hive metastore scripts do not do so. This is currently to make sure that we can run
 # all the tests including sentry tests
 # TODO: This can be removed when we move to Ranger completely
 if [[ "$USE_CDP_HIVE" = "true" && -n "$SENTRY_HOME" ]]; then
   for f in ${SENTRY_HOME}/lib/sentry-binding-hive*.jar; do
     FILE_NAME=$(basename $f)
     # exclude all the hive jars from being included in the classpath since Sentry
     # depends on Hive 2.1.1
     if [[ ! $FILE_NAME == hive* ]]; then
       export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${f}
     fi
   done
 fi

 # For Hive 3, we use Tez for execution. We have to add it to the classpath.
 # NOTE: it would seem like this would only be necessary on the HS2 classpath,
 # but compactions are initiated from the HMS in Hive 3. This may change at
 # some point in the future, in which case we can add this to only the
 # HS2 classpath.
 if ${USE_CDP_HIVE} ; then
   export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_HOME}/*
   # This is a little hacky, but Tez bundles a bunch of junk into lib/, such
   # as extra copies of the hadoop libraries, etc, and we want to avoid conflicts.
   # So, we'll be a bit choosy about what we add to the classpath here.
   for jar in $TEZ_HOME/lib/* ; do
     case $(basename $jar) in
       commons-*|RoaringBitmap*)
         export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar
         ;;
     esac
   done
 fi

 # Add kudu-hive.jar to the Hive Metastore classpath, so that Kudu's HMS
 # plugin can be loaded.
 for file in ${IMPALA_KUDU_JAVA_HOME}/*kudu-hive*jar; do
   export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${file}
 done
 # Default to skip validation on Kudu tables if KUDU_SKIP_HMS_PLUGIN_VALIDATION
 # is unset.
 export KUDU_SKIP_HMS_PLUGIN_VALIDATION=${KUDU_SKIP_HMS_PLUGIN_VALIDATION:-1}

 # Starts a Hive Metastore Server on the specified port.
 # To debug log4j2 loading issues, add to HADOOP_CLIENT_OPTS:
 #   -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE
 HADOOP_CLIENT_OPTS="-Xmx2024m -Dhive.log.file=hive-metastore.log" hive \
   --service metastore -p $HIVE_METASTORE_PORT > ${LOGDIR}/hive-metastore.out 2>&1 &

 # Wait for the Metastore to come up because HiveServer2 relies on it being live.
 ${CLUSTER_BIN}/wait-for-metastore.py --transport=${METASTORE_TRANSPORT}

 if [ ${ONLY_METASTORE} -eq 0 ]; then
   # Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
   # environment variable. HADOOP_HEAPSIZE should be set to at least 2048 to avoid OOM
   # when loading ORC tables like widerow.
   HADOOP_CLIENT_OPTS="-Xmx2048m -Dhive.log.file=hive-server2.log" hive \
       --service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &

   # Wait for the HiveServer2 service to come up because callers of this script
   # may rely on it being available.
   ${CLUSTER_BIN}/wait-for-hiveserver2.py --transport=${HIVES2_TRANSPORT}
 fi
	#!/bin/bash
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	set -euo pipefail
	. $IMPALA_HOME/bin/report_build_error.sh
	setup_report_build_error

	HIVE_SERVER_PORT=10000
	export HIVE_SERVER2_THRIFT_PORT=11050
	HIVE_METASTORE_PORT=9083
	LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive
	HIVES2_TRANSPORT="plain_sasl"
	METASTORE_TRANSPORT="buffered"
	ONLY_METASTORE=0

	CLUSTER_BIN=${IMPALA_HOME}/testdata/bin

	if ${CLUSTER_DIR}/admin is_kerberized; then
	# Making a kerberized cluster... set some more environment variables.
	. ${MINIKDC_ENV}

	HIVES2_TRANSPORT="kerberos"
	# The metastore isn't kerberized yet:
	# METASTORE_TRANSPORT="kerberos"
	fi

	mkdir -p ${LOGDIR}

	while [ -n "$*" ]
	do
	case $1 in
	-only_metastore)
	ONLY_METASTORE=1
	;;
	-help\|-h\|*)
	echo "run-hive-server.sh : Starts the hive server and the metastore."
	echo "[-only_metastore] : Only starts the hive metastore."
	exit 1;
	;;
	esac
	shift;
	done

	# TODO: We should have a retry loop for every service we start.
	# Kill for a clean start.
	${CLUSTER_BIN}/kill-hive-server.sh &> /dev/null

	export HIVE_METASTORE_HADOOP_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,\
	suspend=n,address=30010"

	# If this is CDP Hive we need to manually add the sentry jars in the classpath since
	# CDH Hive metastore scripts do not do so. This is currently to make sure that we can run
	# all the tests including sentry tests
	# TODO: This can be removed when we move to Ranger completely
	if [[ "$USE_CDP_HIVE" = "true" && -n "$SENTRY_HOME" ]]; then
	for f in ${SENTRY_HOME}/lib/sentry-binding-hive*.jar; do
	FILE_NAME=$(basename $f)
	# exclude all the hive jars from being included in the classpath since Sentry
	# depends on Hive 2.1.1
	if [[ ! $FILE_NAME == hive* ]]; then
	export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${f}
	fi
	done
	fi

	# For Hive 3, we use Tez for execution. We have to add it to the classpath.
	# NOTE: it would seem like this would only be necessary on the HS2 classpath,
	# but compactions are initiated from the HMS in Hive 3. This may change at
	# some point in the future, in which case we can add this to only the
	# HS2 classpath.
	if ${USE_CDP_HIVE} ; then
	export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_HOME}/*
	# This is a little hacky, but Tez bundles a bunch of junk into lib/, such
	# as extra copies of the hadoop libraries, etc, and we want to avoid conflicts.
	# So, we'll be a bit choosy about what we add to the classpath here.
	for jar in $TEZ_HOME/lib/* ; do
	case $(basename $jar) in
	commons-\|RoaringBitmap)
	export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar
	;;
	esac
	done
	fi

	# Add kudu-hive.jar to the Hive Metastore classpath, so that Kudu's HMS
	# plugin can be loaded.
	for file in ${IMPALA_KUDU_JAVA_HOME}/kudu-hivejar; do
	export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${file}
	done
	# Default to skip validation on Kudu tables if KUDU_SKIP_HMS_PLUGIN_VALIDATION
	# is unset.
	export KUDU_SKIP_HMS_PLUGIN_VALIDATION=${KUDU_SKIP_HMS_PLUGIN_VALIDATION:-1}

	# Starts a Hive Metastore Server on the specified port.
	# To debug log4j2 loading issues, add to HADOOP_CLIENT_OPTS:
	# -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE
	HADOOP_CLIENT_OPTS="-Xmx2024m -Dhive.log.file=hive-metastore.log" hive \
	--service metastore -p $HIVE_METASTORE_PORT > ${LOGDIR}/hive-metastore.out 2>&1 &

	# Wait for the Metastore to come up because HiveServer2 relies on it being live.
	${CLUSTER_BIN}/wait-for-metastore.py --transport=${METASTORE_TRANSPORT}

	if [ ${ONLY_METASTORE} -eq 0 ]; then
	# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
	# environment variable. HADOOP_HEAPSIZE should be set to at least 2048 to avoid OOM
	# when loading ORC tables like widerow.
	HADOOP_CLIENT_OPTS="-Xmx2048m -Dhive.log.file=hive-server2.log" hive \
	--service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &

	# Wait for the HiveServer2 service to come up because callers of this script
	# may rely on it being available.
	${CLUSTER_BIN}/wait-for-hiveserver2.py --transport=${HIVES2_TRANSPORT}
	fi