blob: 0803a06b9c778a09c7b2fa69d450599ddb6ffc23 [file] [log] [blame]
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# A script that does rolling upgrade of Pinot components
# from one version to the other given 2 commit hashes. It first builds
# Pinot in the 2 given directories and then upgrades in the following order:
# Controller -> Broker -> Server
#
# TODO Some ideas to explore:
# It will be nice to have the script take arguments about what is to be done.
# For example, we may want to verify the upgrade path in a different order.
# Better yet, test all orders to decide that the upgrade can be done in any order.
# Or, we may want to test upgrade of a specific component only.
#
# For now, this script runs specific yaml files as a part of testing between
# component upgrades/rollbacks. Perhaps we can change it to take a directory name
# and run all the scripts in the directory in alpha order, one script at each
# "stage" of upgrade.
#
# We may modify to choose a minimal run in which the same set of operations are run
# between any two component upgrades/rollbacks -- this may consist of adding
# one more segment to table, adding some more rows to the stream topic, and
# running some queries with the new data.
RM="/bin/rm"
logCount=1
#Declare the number of mandatory args
cmdName=`basename $0`
source `dirname $0`/utils.inc
function cleanupControllerDirs() {
local dirName=$(grep -F controller.data.dir ${CONTROLLER_CONF} | awk '{print $3}')
if [ ! -z "$dirName" ]; then
${RM} -rf ${dirName}
fi
}
function cleanupServerDirs() {
local dirName=$(grep -F pinot.server.instance.dataDir ${SERVER_CONF} | awk '{print $3}')
if [ ! -z "$dirName" ]; then
${RM} -rf ${dirName}
fi
dirName=$(grep -F pinot.server.instance.segmentTarDir ${SERVER_CONF} | awk '{print $3}')
if [ ! -z "$dirName" ]; then
${RM} -rf ${dirName}
fi
}
# get usage of the script
function usage() {
echo "Usage: $cmdName -w <workingDir> -t <testSuiteDir> [-k]"
echo -e "MANDATORY:"
echo -e " -w, --working-dir Working directory where olderCommit and newCommit target files reside."
echo -e " -t, --test-suite-dir Test suite directory\n"
echo -e "OPTIONAL:"
echo -e " -k, --keep-cluster-on-failure Keep cluster on test failure"
echo -e " -h, --help Prints this help\n"
}
function waitForZkReady() {
status=1
while [ $status -ne 0 ]; do
sleep 1
echo Checking port ${ZK_PORT} for zk ready
echo x | nc localhost ${ZK_PORT} 1>/dev/null 2>&1
status=$(echo $?)
done
}
function waitForControllerReady() {
status=1
while [ $status -ne 0 ]; do
sleep 1
echo Checking port ${CONTROLLER_PORT} for controller ready
curl localhost:${CONTROLLER_PORT}/health 1>/dev/null 2>&1
status=$(echo $?)
done
}
function waitForKafkaReady() {
status=1
while [ $status -ne 0 ]; do
sleep 1
echo Checking port 19092 for kafka ready
echo x | nc localhost 19092 1>/dev/null 2>&1
status=$(echo $?)
done
}
function waitForBrokerReady() {
local status=1
while [ $status -ne 0 ]; do
sleep 1
echo Checking port ${BROKER_QUERY_PORT} for broker ready
curl localhost:${BROKER_QUERY_PORT}/debug/routingTable 1>/dev/null 2>&1
status=$(echo $?)
done
}
function waitForServerReady() {
local status=1
while [ $status -ne 0 ]; do
sleep 1
echo Checking port ${SERVER_ADMIN_PORT} for server ready
curl localhost:${SERVER_ADMIN_PORT}/health 1>/dev/null 2>&1
status=$(echo $?)
done
}
function waitForClusterReady() {
waitForBrokerReady
waitForServerReady
waitForKafkaReady
}
#set config file is present or not
function setConfigFileArg() {
if [[ -f $1 ]]; then
echo "-configFileName ${1}"
fi
}
# Given a component and directory, start that version of the specific component
# Start the service in background.
# Record the pid file in $2/$1.pid
# $1 is service name
# $2 is directory name
# TODO get rid of exit from this function. Exit only returns from a function.
function startService() {
serviceName=$1
dirName=$2
echo Starting $serviceName in $dirName
local configFileArg=$(setConfigFileArg "$3")
# Upon start, save the pid of the process for a component into a file in /working_dir/{component}.pid, which is then used to stop it
pushd "$dirName"/pinot-tools/target/pinot-tools-pkg/bin 1>/dev/null || exit 1
if [ "$serviceName" = "zookeeper" ]; then
# Remove all previous zk data
${RM} -rf ${dirName}/zkdir
./pinot-admin.sh StartZookeeper -dataDir ${LOG_DIR}/zkdir 1>${LOG_DIR}/zookeeper.${logCount}.log 2>&1 &
echo $! >${PID_DIR}/zookeeper.pid
elif [ "$serviceName" = "controller" ]; then
./pinot-admin.sh StartController ${configFileArg} 1>${LOG_DIR}/controller.${logCount}.log 2>&1 &
echo $! >${PID_DIR}/controller.pid
elif [ "$serviceName" = "broker" ]; then
./pinot-admin.sh StartBroker ${configFileArg} 1>${LOG_DIR}/broker.${logCount}.log 2>&1 &
echo $! >${PID_DIR}/broker.pid
elif [ "$serviceName" = "server" ]; then
./pinot-admin.sh StartServer ${configFileArg} 1>${LOG_DIR}/server.${logCount}.log 2>&1 &
echo $! >${PID_DIR}/server.pid
elif [ "$serviceName" = "kafka" ]; then
./pinot-admin.sh StartKafka -zkAddress localhost:${ZK_PORT}/kafka 1>${LOG_DIR}/kafka.${logCount}.log 2>&1 &
echo $! >${PID_DIR}/kafka.pid
fi
# Keep log files distinct so we can debug
logCount=$((logCount + 1))
echo "${serviceName} started"
popd 1>/dev/null || exit 1
}
# Given a component, check if it known to be running and stop that specific component
function stopService() {
serviceName=$1
if [ -f "${PID_DIR}/${serviceName}".pid ]; then
pid=$(cat "${PID_DIR}/${serviceName}".pid)
kill -9 $pid 1>/dev/null 2>&1
# TODO Kill without -9 and add a while loop waiting for process to die
status=0
while [ $status -ne 1 ]; do
echo "Waiting for $serviceName (pid $pid) to die"
sleep 1
ps -p $pid
status=$(echo $?)
done
${RM} -f "${PID_DIR}/${serviceName}".pid
echo "${serviceName} stopped"
else
echo "Pid file ${PID_DIR}/${serviceName}.pid not found. Failed to stop component ${serviceName}"
fi
}
# Starts a Pinot cluster given a specific target directory
function startServices() {
dirName=$1
startService zookeeper "$dirName" "unused"
# Controller depends on zookeeper, if not wait zookeeper to be ready, controller will crash.
waitForZkReady
startService controller "$dirName" "$CONTROLLER_CONF"
# Broker depends on controller, if not wait controller to be ready, broker will crash.
waitForControllerReady
startService broker "$dirName" "$BROKER_CONF"
startService server "$dirName" "$SERVER_CONF"
startService kafka "$dirName" "unused"
echo "Cluster started."
waitForClusterReady
}
# Stops the currently running Pinot cluster
function stopServices() {
stopService controller
stopService broker
stopService server
stopService zookeeper
stopService kafka
echo "Cluster stopped."
}
# Setup the path and classpath prefix for compatibility tester executable
function setupCompatTester() {
COMPAT_TESTER="$(dirname $0)/../${COMPAT_TESTER_PATH}"
local pinotCompatibilityVerifierRelDir="$(dirname $0)/../pinot-compatibility-verifier/target"
local pinotCompatibilityVerifierAbsDir=$( (
cd ${pinotCompatibilityVerifierRelDir}
pwd
))
JAR_LIST="$(ls ${pinotCompatibilityVerifierAbsDir}/pinot-compatibility-verifier-*.jar)"
CLASSPATH_PREFIX="$(echo $JAR_LIST | tr ' ' :)"
echo "CLASSPATH_PREFIX is set as: $CLASSPATH_PREFIX"
export CLASSPATH_PREFIX
}
function setupControllerVariables() {
if [ -f ${CONTROLLER_CONF} ]; then
local port=$(grep -F controller.port ${CONTROLLER_CONF} | awk '{print $3}')
if [ ! -z "$port" ]; then
CONTROLLER_PORT=$port
fi
fi
}
function setupBrokerVariables() {
if [ -f ${BROKER_CONF} ]; then
local port=$(grep -F pinot.broker.client.queryPort ${BROKER_CONF} | awk '{print $3}')
if [ ! -z "$port" ]; then
BROKER_QUERY_PORT=$port
fi
fi
}
function setupServerVariables() {
if [ -f ${SERVER_CONF} ]; then
local port
port=$(grep -F pinot.server.adminapi.port ${SERVER_CONF} | awk '{print $3}')
if [ ! -z "$port" ]; then
SERVER_ADMIN_PORT=$port
fi
port=$(grep -F pinot.server.netty.port ${SERVER_CONF} | awk '{print $3}')
if [ ! -z "$port" ]; then
SERVER_NETTY_PORT=$port
fi
fi
}
#
# Main
#
# create subdirectories for given commits
workingDir=
testSuiteDir=
keepClusterOnFailure="false"
# Args while-loop
while [ "$1" != "" ]; do
case $1 in
-w | --working-dir)
shift
workingDir=$(absPath $1)
;;
-t | --test-suite-dir)
shift
testSuiteDir=$(absPath $1)
;;
-k | keep-cluster-on-failure)
keepClusterOnFailure="true"
;;
-h | --help)
usage
exit 0
;;
*)
echo "illegal option $1"
usage
exit 1 # error
;;
esac
shift
done
if [ -z "$workingDir" -o -z "$testSuiteDir" ]; then
usage
exit 1
fi
COMPAT_TESTER_PATH="pinot-compatibility-verifier/target/pinot-compatibility-verifier-pkg/bin/pinot-compat-test-runner.sh"
BROKER_CONF=${testSuiteDir}/config/BrokerConfig.properties
CONTROLLER_CONF=${testSuiteDir}/config/ControllerConfig.properties
SERVER_CONF=${testSuiteDir}/config/ServerConfig.properties
cleanupControllerDirs
cleanupServerDirs
BROKER_QUERY_PORT=8099
ZK_PORT=2181
CONTROLLER_PORT=9000
SERVER_ADMIN_PORT=8097
SERVER_NETTY_PORT=8098
PID_DIR=${workingDir}/pids
LOG_DIR=${workingDir}/logs
${RM} -rf ${PID_DIR}
${RM} -rf ${LOG_DIR}
setupControllerVariables
setupBrokerVariables
setupServerVariables
export JAVA_OPTS="-DControllerPort=${CONTROLLER_PORT} -DBrokerQueryPort=${BROKER_QUERY_PORT} -DServerAdminPort=${SERVER_ADMIN_PORT}"
mkdir ${PID_DIR}
mkdir ${LOG_DIR}
oldTargetDir="$workingDir"/oldTargetDir
newTargetDir="$workingDir"/newTargetDir
setupCompatTester
# check that the default ports are open
if [ "$(lsof -t -i:${SERVER_ADMIN_PORT} -s TCP:LISTEN)" ] || [ "$(lsof -t -i:${SERVER_NETTY_PORT} -sTCP:LISTEN)" ] || [ "$(lsof -t -i:${BROKER_QUERY_PORT} -sTCP:LISTEN)" ] ||
[ "$(lsof -t -i:${CONTROLLER_PORT} -sTCP:LISTEN)" ] || [ "$(lsof -t -i:${ZK_PORT} -sTCP:LISTEN)" ]; then
echo "Cannot start the components since the default ports are not open. Check any existing process that may be using the default ports."
exit 1
fi
# Setup initial cluster with olderCommit and do rolling upgrade
# Provide abspath of filepath to $COMPAT_TESTER
echo "Setting up cluster before upgrade"
startServices "$oldTargetDir"
genNum=0
if [ -f $testSuiteDir/pre-controller-upgrade.yaml ]; then
genNum=$((genNum+1))
$COMPAT_TESTER $testSuiteDir/pre-controller-upgrade.yaml $genNum
if [ $? -ne 0 ]; then
if [ $keepClusterOnFailure == "false" ]; then
stopServices
fi
echo Failed before controller upgrade
exit 1
fi
fi
echo "Upgrading controller"
stopService controller
startService controller "$newTargetDir" "$CONTROLLER_CONF"
waitForControllerReady
if [ -f $testSuiteDir/pre-broker-upgrade.yaml ]; then
genNum=$((genNum+1))
echo "Running tests after controller upgrade"
$COMPAT_TESTER $testSuiteDir/pre-broker-upgrade.yaml $genNum
if [ $? -ne 0 ]; then
if [ $keepClusterOnFailure == "false" ]; then
stopServices
fi
echo Failed before broker upgrade
exit 1
fi
fi
echo "Upgrading broker"
stopService broker
startService broker "$newTargetDir" "$BROKER_CONF"
waitForBrokerReady
if [ -f $testSuiteDir/pre-server-upgrade.yaml ]; then
echo "Running tests after broker upgrade"
genNum=$((genNum+1))
$COMPAT_TESTER $testSuiteDir/pre-server-upgrade.yaml $genNum
if [ $? -ne 0 ]; then
if [ $keepClusterOnFailure == "false" ]; then
stopServices
fi
echo Failed before server upgrade
exit 1
fi
fi
echo "Upgrading server"
stopService server
startService server "$newTargetDir" "$SERVER_CONF"
waitForServerReady
if [ -f $testSuiteDir/post-server-upgrade.yaml ]; then
echo "Running tests after server upgrade"
genNum=$((genNum+1))
$COMPAT_TESTER $testSuiteDir/post-server-upgrade.yaml $genNum
if [ $? -ne 0 ]; then
if [ $keepClusterOnFailure == "false" ]; then
stopServices
fi
echo Failed after server upgrade
exit 1
fi
fi
echo "Downgrading server"
# Upgrade completed, now do a rollback
stopService server
startService server "$oldTargetDir" "$SERVER_CONF"
waitForServerReady
if [ -f $testSuiteDir/post-server-rollback.yaml ]; then
echo "Running tests after server downgrade"
genNum=$((genNum+1))
$COMPAT_TESTER $testSuiteDir/post-server-rollback.yaml $genNum
if [ $? -ne 0 ]; then
if [ $keepClusterOnFailure == "false" ]; then
stopServices
fi
echo Failed after server downgrade
exit 1
fi
fi
echo "Downgrading broker"
stopService broker
startService broker "$oldTargetDir" "$BROKER_CONF"
waitForBrokerReady
if [ -f $testSuiteDir/post-broker-rollback.yaml ]; then
echo "Running tests after broker downgrade"
genNum=$((genNum+1))
$COMPAT_TESTER $testSuiteDir/post-broker-rollback.yaml $genNum
if [ $? -ne 0 ]; then
if [ $keepClusterOnFailure == "false" ]; then
stopServices
fi
echo Failed after broker downgrade
exit 1
fi
fi
echo "Downgrading controller"
stopService controller
startService controller "$oldTargetDir" "$CONTROLLER_CONF"
waitForControllerReady
waitForControllerReady
if [ -f $testSuiteDir/post-controller-rollback.yaml ]; then
echo "Running tests after controller downgrade"
genNum=$((genNum+1))
$COMPAT_TESTER $testSuiteDir/post-controller-rollback.yaml $genNum
if [ $? -ne 0 ]; then
if [ $keepClusterOnFailure == "false" ]; then
stopServices
fi
echo Failed after controller downgrade
exit 1
fi
fi
stopServices
echo "All tests passed"
exit 0