| #!/usr/bin/env bash |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| |
| #This script polls the specified url (typically a service we want to see running) and process |
| #If it finds that the web request fails it also kills the process being monitored and exits |
| #If it finds that the process is not alive any more we exit |
| #Typically used in startup scripts for services such as solr that should be terminated if the |
| #server is not running |
| #Example usage in a shell script : bigtop-monitor-service $$ http://127.0.0.1:8983/solr |
| |
| function info() { |
| echo "INFO:" "$@" |
| } |
| |
| function monitor() { |
| USAGE="$0 polling_interval_seconds process_id_to_kill url_to_monitor " |
| if [ $# -ne 3 ] |
| then |
| echo $USAGE >&2 |
| exit 1 |
| fi |
| interval="$1" |
| pid="$2" |
| url="$3" |
| |
| if ! expr "$interval" : '^[0-9][0-9]*$' >/dev/null |
| then |
| echo "Invalid value for polling_interval_seconds $interval - must be a positive integer" >&2 |
| kill -9 $pid |
| exit 1 |
| fi |
| |
| if [ $interval -le 0 ] |
| then |
| echo "Invalid value for polling_interval_seconds $interval - must be >= 1" >&2 |
| kill -9 $pid |
| exit 1 |
| fi |
| |
| eval exec {3..255}\>\&- |
| cd / |
| |
| info "Starting a watchdog process monitoring process '$pid' and url '$url'" |
| |
| while : |
| do |
| sleep $interval |
| info "Sending a heartbeat request to $url" |
| |
| HTTP_CODE=`curl -m$interval --retry 5 -L -k -s --negotiate -u : -o /dev/null -w "%{http_code}" "$url"` |
| HTTP_CODE=${HTTP_CODE:-600} |
| |
| # If we're getting 5xx+ (server side error) kill the service and exit |
| # Because curl is weird (it tries to proxy HTTP exit codes to be its |
| # UNIX exit codes times 10 AND at the same time prints 000 as HTTP exit |
| # code) we should also treat exit code of 0 as a failure. |
| if [ $HTTP_CODE -ge 500 -o $HTTP_CODE -eq 0 ] ; then |
| info "Got $HTTP_CODE HTTP code from the server. Watchdog is now killing process: $pid" |
| kill -9 $pid |
| exit 0 |
| fi |
| |
| # If we're getting 4xx (client side error) we better exit silently |
| # 401 (Unauthorized) is a special case of when we should keep running |
| if [ $HTTP_CODE -ge 400 -a $HTTP_CODE -lt 500 -a $HTTP_CODE -ne 401 ] ; then |
| info "Got $HTTP_CODE HTTP code. This is confusing. Watchdog is now exiting..." |
| exit 0 |
| fi |
| |
| if kill -0 $pid >>/dev/null 2>&1 ;then |
| echo "Process $pid is alive" |
| else |
| echo "Process $pid is dead" |
| exit 1 |
| fi |
| done |
| } |
| |
| monitor "$@" & |