blob: 776179de8e1a7d75d7203cff12ea2523aa887d1e [file] [log] [blame]
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#This script polls the specified url (typically a service we want to see running) and process
#If it finds that the web request fails it also kills the process being monitored and exits
#If it finds that the process is not alive any more we exit
#Typically used in startup scripts for services such as solr that should be terminated if the
#server is not running
#Example usage in a shell script : bigtop-monitor-service $$ http://127.0.0.1:8983/solr
function info() {
echo "INFO:" "$@"
}
function monitor() {
USAGE="$0 polling_interval_seconds process_id_to_kill url_to_monitor "
if [ $# -ne 3 ]
then
echo $USAGE >&2
exit 1
fi
interval="$1"
pid="$2"
url="$3"
if ! expr "$interval" : '^[0-9][0-9]*$' >/dev/null
then
echo "Invalid value for polling_interval_seconds $interval - must be a positive integer" >&2
kill -9 $pid
exit 1
fi
if [ $interval -le 0 ]
then
echo "Invalid value for polling_interval_seconds $interval - must be >= 1" >&2
kill -9 $pid
exit 1
fi
eval exec {3..255}\>\&-
cd /
info "Starting a watchdog process monitoring process '$pid' and url '$url'"
while :
do
sleep $interval
info "Sending a heartbeat request to $url"
HTTP_CODE=`curl -m$interval --retry 5 -L -k -s --negotiate -u : -o /dev/null -w "%{http_code}" "$url"`
HTTP_CODE=${HTTP_CODE:-600}
# If we're getting 5xx+ (server side error) kill the service and exit
# Because curl is weird (it tries to proxy HTTP exit codes to be its
# UNIX exit codes times 10 AND at the same time prints 000 as HTTP exit
# code) we should also treat exit code of 0 as a failure.
if [ $HTTP_CODE -ge 500 -o $HTTP_CODE -eq 0 ] ; then
info "Got $HTTP_CODE HTTP code from the server. Watchdog is now killing process: $pid"
kill -9 $pid
exit 0
fi
# If we're getting 4xx (client side error) we better exit silently
# 401 (Unauthorized) is a special case of when we should keep running
if [ $HTTP_CODE -ge 400 -a $HTTP_CODE -lt 500 -a $HTTP_CODE -ne 401 ] ; then
info "Got $HTTP_CODE HTTP code. This is confusing. Watchdog is now exiting..."
exit 0
fi
if kill -0 $pid >>/dev/null 2>&1 ;then
echo "Process $pid is alive"
else
echo "Process $pid is dead"
exit 1
fi
done
}
monitor "$@" &