| #!/bin/bash |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| help() { |
| printf "Usage: $0 |
| -i identifier (required for CLI compatibility; value ignored by local-only heartbeat) |
| -p path (required for CLI compatibility; value ignored by local-only heartbeat) |
| -m mount point (local path where heartbeat will be written) |
| -h host (host IP/name to include in heartbeat filename) |
| -r write/read hb log (read-check mode) |
| -c cleanup (trigger emergency reboot) |
| -t interval between read hb log\n" |
| exit 1 |
| } |
| |
| #set -x |
| NfsSvrIP= |
| NfsSvrPath= |
| MountPoint= |
| HostIP= |
| interval= |
| rflag=0 |
| cflag=0 |
| |
| while getopts 'i:p:m:h:t:rc' OPTION |
| do |
| case $OPTION in |
| i) |
| NfsSvrIP="$OPTARG" |
| ;; # retained for CLI compatibility but unused for this script |
| p) |
| NfsSvrPath="$OPTARG" |
| ;; # retained for CLI compatibility but unused for this script |
| m) |
| MountPoint="$OPTARG" |
| ;; |
| h) |
| HostIP="$OPTARG" |
| ;; |
| r) |
| rflag=1 |
| ;; |
| t) |
| interval="$OPTARG" |
| ;; |
| c) |
| cflag=1 |
| ;; |
| *) |
| help |
| ;; |
| esac |
| done |
| |
| # For heartbeat we require a mountpoint |
| if [ -z "$MountPoint" ] |
| then |
| echo "Mount point (-m) is required" |
| help |
| fi |
| |
| # Validate mount point exists, is (if possible) a mounted filesystem, and is writable |
| if [ ! -d "$MountPoint" ]; then |
| echo "Mount point directory does not exist: $MountPoint" >&2 |
| exit 1 |
| fi |
| |
| # If the 'mountpoint' utility is available, ensure this is an actual mount |
| if command -v mountpoint >/dev/null 2>&1; then |
| if ! mountpoint -q "$MountPoint"; then |
| echo "Mount point is not a mounted filesystem: $MountPoint" >&2 |
| exit 1 |
| fi |
| fi |
| |
| # Ensure the mount point is writable |
| if [ ! -w "$MountPoint" ]; then |
| echo "Mount point is not writable: $MountPoint" >&2 |
| exit 1 |
| fi |
| #delete VMs on this mountpoint (best-effort) |
| deleteVMs() { |
| local mountPoint=$1 |
| # ensure it ends with a single trailing slash |
| mountPoint="${mountPoint%/}/" |
| |
| vmPids=$(ps aux | grep qemu | grep "$mountPoint" | awk '{print $2}' 2> /dev/null) |
| |
| if [ -z "$vmPids" ] |
| then |
| return |
| fi |
| |
| for pid in $vmPids |
| do |
| kill -9 $pid &> /dev/null |
| done |
| } |
| |
| #checking is there the mount point present under $MountPoint? |
| if grep -q "^[^ ]\+ $MountPoint " /proc/mounts |
| then |
| # mount exists; nothing to do here; keep for compatibility with original flow |
| : |
| else |
| # mount point not present |
| # if not in read-check mode, consider deleting VMs similar to original behavior |
| if [ "$rflag" == "0" ] |
| then |
| deleteVMs $MountPoint |
| fi |
| fi |
| |
| hbFolder="$MountPoint/KVMHA" |
| hbFile="$hbFolder/hb-$HostIP" |
| |
| write_hbLog() { |
| #write the heart beat log |
| stat "$hbFile" &> /dev/null |
| if [ $? -gt 0 ] |
| then |
| # create a new one |
| mkdir -p "$hbFolder" &> /dev/null |
| # touch will be done by atomic write below; ensure folder is writable |
| if [ ! -w "$hbFolder" ]; then |
| printf "Folder not writable: $hbFolder" >&2 |
| return 2 |
| fi |
| fi |
| |
| timestamp=$(date +%s) |
| # Write atomically to avoid partial writes (write to tmp then mv) |
| tmpfile="${hbFile}.$$" |
| printf "%s\n" "$timestamp" > "$tmpfile" 2>/dev/null |
| if [ $? -ne 0 ]; then |
| printf "Failed to write heartbeat to $tmpfile" >&2 |
| return 2 |
| fi |
| mv -f "$tmpfile" "$hbFile" 2>/dev/null |
| return $? |
| } |
| |
| check_hbLog() { |
| hb_diff=0 |
| if [ ! -f "$hbFile" ]; then |
| # signal large difference if file missing |
| hb_diff=999999 |
| return 1 |
| fi |
| now=$(date +%s) |
| hb=$(cat "$hbFile" 2>/dev/null) |
| if [ -z "$hb" ]; then |
| hb_diff=999998 |
| return 1 |
| fi |
| diff=`expr $now - $hb 2>/dev/null` |
| if [ $? -ne 0 ] |
| then |
| hb_diff=999997 |
| return 1 |
| fi |
| if [ -z "$interval" ]; then |
| # if no interval provided, consider 0 as success |
| if [ $diff -gt 0 ]; then |
| hb_diff=$diff |
| return 1 |
| else |
| hb_diff=0 |
| return 0 |
| fi |
| fi |
| if [ $diff -gt $interval ] |
| then |
| hb_diff=$diff |
| return 1 |
| fi |
| hb_diff=0 |
| return 0 |
| } |
| |
| if [ "$rflag" == "1" ] |
| then |
| check_hbLog |
| status=$? |
| diff="${hb_diff:-0}" |
| if [ $status -eq 0 ] |
| then |
| echo "=====> ALIVE <=====" |
| else |
| echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======" |
| fi |
| exit 0 |
| elif [ "$cflag" == "1" ] |
| then |
| /usr/bin/logger -t heartbeat "kvmsmpheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage." |
| sync & |
| sleep 5 |
| echo b > /proc/sysrq-trigger |
| exit $? |
| else |
| write_hbLog |
| exit $? |
| fi |