blob: 6449ec713ae7bedd155b67ec28d248e78c42e568 [file]
#!/bin/bash
#
# @@@ START COPYRIGHT @@@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# @@@ END COPYRIGHT @@@
#
# sqcheck 'all' will return:
# -1 - Not up ($?=255)
# 0 - Fully up and operational
# 1 - Partially up and operational
# 2 - Partially up and NOT operational
function Usage {
echo
echo "Usage: $0 [ -c <cc> | -i <nn> | -d <nn> | -h | -f | -q | -v | -r | -j ]"
echo
echo "-i <nn> Number of times the check for Trafodion processes will be done (Default $max_checks)"
echo "-d <nn> Duration of sleep (in seconds) between each check (Default $sleep_duration)"
echo "-c <cc> Which component to check: [all | dtm | dcs | rms | rest ] (Default $sqck_component)"
echo "-f fast - (it's like running $0 -i 1 -d 0)"
echo "-r reset the iteration counter if the process count increases (as compared to the count in the last iteration)"
echo "-j Format the output in JSON. Used by REST server"
echo "-h Help"
echo "-v Verbose"
echo "-q Quiet"
echo "-n <nid> Check if the node is up"
echo
}
function GetOpts {
while getopts "c:i:d:fqvrjn:h" arg
do
case $arg in
c)
t_ck=${OPTARG}
if
(
[ $t_ck == "all" ] ||
[ $t_ck == "dtm" ] ||
[ $t_ck == "dcs" ] ||
[ $t_ck == "rms" ] ||
[ $t_ck == "rest" ]
); then
sqck_component=${OPTARG}
else
Usage
exit 0;
fi
;;
d)
sleep_duration=${OPTARG}
;;
i)
max_checks=${OPTARG}
;;
v)
sqc_verbose=1
;;
q)
sqc_verbose=0
;;
f)
sleep_duration=0
max_checks=1
;;
r)
reset_counter=1
;;
j)
json_output=1
;;
n)
check_node=${OPTARG}
;;
h)
Usage;
exit 1;
esac
done
}
function fillArray {
let count=0
for item in $1; do
#echo $item
if [[ "$2" == "TM" ]]; then
act_dtms[count]="$item"
fi
let ++count
done
}
function getDcsInfo {
if [ -d $DCS_INSTALL_DIR ];then
### Get the configured number of DcsMaster's
if [ -s ${DCS_INSTALL_DIR}/conf/masters ]; then
let cfg_dcsmaster_cnt=`/bin/egrep -cv '#|^$' ${DCS_INSTALL_DIR}/conf/masters`
else
let cfg_dcsmaster_cnt=1
fi
### Get the configured number of DcsServer and mxosrvr
if [ -f ${DCS_INSTALL_DIR}/conf/servers ]; then
let cfg_mxo_cnt=`cat $DCS_INSTALL_DIR/conf/servers|awk '{if ($2=="") k=1;else k=$2;cnt+=k} END {print cnt}'`
let cfg_dcsserver_cnt=`cat $DCS_INSTALL_DIR/conf/servers|awk '{if ($2=="") p=1;else p+=1;} END {print p}'`
fi
### Check if there are any DcsMaster and DcsServer's that are started
actual_dcsmaster_cnt=`cat $tmpjps |grep DcsMaster |wc -l`
actual_dcsserver_cnt=`cat $tmpjps |grep DcsServer |wc -l`
if ( [ "$cfg_dcsserver_cnt" '!=' "$actual_dcsserver_cnt" ] ); then
let down_dcsserver_cnt=cfg_dcsserver_cnt-actual_dcsserver_cnt
else
down_dcsserver_cnt=''
fi
if ( [ "$cfg_dcsmaster_cnt" '!=' "$actual_dcsmaster_cnt" ] ); then
let down_dcsmaster_cnt=cfg_dcsmaster_cnt-actual_dcsmaster_cnt
else
down_dcsmaster_cnt=''
fi
fi
}
function getRestInfo {
if [ -d $REST_INSTALL_DIR ];then
### Check if there are any DcsMaster and DcsServer's that are started
actual_restserver_cnt=`cat $tmpjps |grep TrafodionRest |wc -l`
let cfg_restserver_cnt=actual_restserver_cnt
if ( [ "$cfg_restserver_cnt" '!=' "$actual_restserver_cnt" ] ); then
let down_restserver_cnt=cfg_restserver_cnt-actual_restserver_cnt
else
down_restserver_cnt=''
fi
fi
}
function getNodeStatus {
sq_tmp_node_info=`mktemp -t`
sqshell -c zone nid $check_node > $sq_tmp_node_info
sq_tmp_lnode_up=`mktemp -t`
grep -i 'up' $sq_tmp_node_info | awk '{ print $4 }' | grep -i 'up' > $sq_tmp_lnode_up
for lnodeup in `cat $sq_tmp_lnode_up`; do
let ++act_lnode_up_cnt
done
if ( [ $act_lnode_up_cnt '>' 0 ] ); then
exit 0;
else
exit -1;
fi
}
jpscmd=$JAVA_HOME/bin/jps
tmpjps=`mktemp -t`
if [[ $? != 0 ]]; then
echo "Error while getting a temporary file for tmpjps. Exiting."
exit 3
fi
begin_time_seconds=$(date +%s)
declare -i max_checks
declare -i sleep_duration
declare -i sqc_verbose
declare -i reset_counter
declare -i json_output
declare -i check_node
let max_checks=2
let sleep_duration=1
let sqc_verbose=0
let reset_counter=0
let json_output=0
let check_node=-1
# Set debug_display to 1 to eanble debugging display of values
let debug_display=0
sqck_component="all"
ft_pair=""
GetOpts $1 $2 $3 $4 $5 $6 $7 $8 $9
if [ $sqc_verbose '==' 1 ]; then
echo "Max number of times to check the Trafodion environment: $max_checks, Sleep Duration: $sleep_duration seconds"
fi
declare -i num_checks
declare -i sleep_duration
declare -i l_result
declare -i cmp_result
declare -i sq_up
declare -i cfg_dtm_cnt
declare -i cfg_rms_cnt
declare -i cfg_mxo_cnt
declare -i cfg_lnode_cnt
declare -i cfg_pnode_cnt
let act_dtm_cnt=0
let act_rms_cnt=0
let act_mxo_cnt=0
let act_lnode_down_cnt=0
let act_pnode_down_cnt=0
let act_lnode_up_cnt=0
let act_pnode_up_cnt=0
let cfg_lnode_cnt=0
let cfg_pnode_cnt=0
let cfg_mxo_cnt=0
let act_mxo_down_cnt=0
let last_dtm_cnt=0
let last_rms_cnt=0
let num_checks=0
let chk_cnt_result=1
let cmp_result=0
let sq_up=0
if [[ -z $SQSCRIPTS_DIR ]]; then
SQSCRIPTS_DIR=$TRAF_HOME/sql/scripts
fi
if [ $check_node '>' -1 ]; then
getNodeStatus
fi
### CONFIGURED NODEs
sq_tmp_node_info=`mktemp -t`
sqshell -c node info > $sq_tmp_node_info
sq_tmp_lnode_up=`mktemp -t`
grep -i 'up' $sq_tmp_node_info | awk '{ print $4 }' | grep -i 'up' > $sq_tmp_lnode_up
for lnodeup in `cat $sq_tmp_lnode_up`; do
let ++act_lnode_up_cnt
done
sq_tmp_pnode_up=`mktemp -t`
grep -i 'up' $sq_tmp_node_info | awk '{ print $3 }' | grep -i 'up' > $sq_tmp_pnode_up
for pnodeup in `cat $sq_tmp_pnode_up`; do
let ++act_pnode_up_cnt
done
sq_tmp_lnode_dn=`mktemp -t`
grep -i 'down' $sq_tmp_node_info | awk '{ print $4 }' | grep -i 'down' > $sq_tmp_lnode_dn
for lnodedn in `cat $sq_tmp_lnode_dn`; do
let ++act_lnode_down_cnt
done
sq_tmp_pnode_dn=`mktemp -t`
grep -i 'down' $sq_tmp_node_info | awk '{ print $3 }' | grep -i 'down' > $sq_tmp_pnode_dn
for pnodedn in `cat $sq_tmp_pnode_dn`; do
let ++act_pnode_down_cnt
done
if [ $act_lnode_up_cnt '>' 0 ]; then
cfg_lnode_cnt=(act_lnode_up_cnt+act_lnode_down_cnt)
cfg_pnode_cnt=(act_pnode_up_cnt+act_pnode_down_cnt)
fi
rm -f $sq_tmp_lnode_up
rm -f $sq_tmp_pnode_up
rm -f $sq_tmp_lnode_dn
rm -f $sq_tmp_pnode_dn
rm -f $sq_tmp_node_info
### CONFIGURED DTMs (based on logical nodes configured)
let cfg_dtm_cnt=cfg_lnode_cnt
let num_dtms=0
while [[ $num_dtms -lt $cfg_dtm_cnt ]];
do
dtm_name=`echo \\$TM$num_dtms`
cfg_dtms[num_dtms]=$dtm_name
let ++num_dtms
done
# Trace print
#echo ${cfg_dtms[*]}
### CONFIGURED MXOSRVR
if [ -z ${DCS_INSTALL_DIR} ]; then
let cfg_mxo_cnt=0
else
if [ -f ${DCS_INSTALL_DIR}/conf/servers ]; then
let cfg_mxo_cnt=`cat $DCS_INSTALL_DIR/conf/servers|awk '{if ($2=="") k=1;else k=$2;cnt+=k} END {print cnt}'`
fi
fi
### CONFIGURED RMSs
if [[ $SQ_START_RMS == "1" ]]; then
let cfg_sscp_cnt=cfg_lnode_cnt
let cfg_ssmp_cnt=cfg_lnode_cnt
let cfg_rms_cnt=$cfg_sscp_cnt+$cfg_ssmp_cnt
sq_tmp_rms_names=`mktemp -t`
let num_sscp=0
let num_ssmp=0
let num_rms=0
while [[ $num_sscp -lt $cfg_lnode_cnt ]];
do
rms_name=`echo \\$ZSC$num_sscp`
cfg_rms[num_rms]=$rms_name
let ++num_sscp
let ++num_rms
done
while [[ $num_ssmp -lt $cfg_lnode_cnt ]];
do
rms_name=`echo \\$ZSM$num_ssmp`
cfg_rms[num_rms]=$rms_name
let ++num_ssmp
let ++num_rms
done
else
let cfg_rms_cnt=0
fi
# Trace print
#echo ${cfg_rms[*]}
sq_tmp_ps=`mktemp -t`
if [[ $? != 0 ]]; then
echo "Error while getting a temporary file. Exiting."
exit 3
fi
if [ $json_output '==' 0 ]; then
echo
echo "*** Checking Trafodion Environment ***"
echo
echo "Checking if processes are up."
fi
declare -i pcmp_result
declare -i total_procs_result
let pcmp_result=0
let total_procs_result=0
while [ $chk_cnt_result '==' 1 ];
do
let ++num_checks
let chk_cnt_result=(num_checks '<' max_checks)
current_time_seconds=$(date +%s)
let elapsed_seconds=current_time_seconds-begin_time_seconds
if [ $json_output '==' 0 ]; then
echo -en "\rChecking attempt: $num_checks; user specified max: $max_checks. Execution time in seconds: $elapsed_seconds."
fi
# Get the current process status (all processes)
sqps > $sq_tmp_ps 2>&1
#DTM
let act_dtm_cnt=`< $sq_tmp_ps grep -a ' DTM ' | wc -l`
if [ $reset_counter '==' 1 ]; then
let lv_cnt_check=(act_dtm_cnt '>' last_dtm_cnt)
if [ $lv_cnt_check '==' 1 ]; then
let num_checks=0
fi
let last_dtm_cnt=act_dtm_cnt
fi
let duplicate=0
if ( [ $act_dtm_cnt '!=' $cfg_dtm_cnt ] &&
( [ "$sqck_component" == "all" ] || [ "$sqck_component" == "dtm" ] ) ); then
for dtms in ${cfg_dtms[*]}; do
if [ -z "`grep -i \"$dtms\>\" $sq_tmp_ps`" ]; then
for dups in ${down_dtms[*]}; do
if [[ "$dups" == "$dtms" ]]; then
let duplicate=1
fi
done
if [ $duplicate '==' 0 ]; then
down_dtms[${#down_dtms[@]}]=$dtms
fi
let duplicate=0
fi
done
fi
#Get the list of DcsMaster, DcsServer and TrafodionRest
if [ -e $SQ_PDSH ]; then
$SQ_PDSH $MY_NODES $jpscmd |egrep 'DcsMaster|DcsServer|TrafodionRest' > $tmpjps
else
$jpscmd |egrep 'DcsMaster|DcsServer|TrafodionRest' > $tmpjps
fi
#TrafodionRestServer
getRestInfo
#MXOSRVR
getDcsInfo
let act_mxo_cnt=`< $sq_tmp_ps egrep -a -i ' mxosrvr' | wc -l`
if ( [ "$cfg_mxo_cnt" '!=' "$act_mxo_cnt" ] ); then
let act_mxo_down_cnt=cfg_mxo_cnt-act_mxo_cnt
else
act_mxo_down_cnt=''
fi
#Cleanup the tmpjps file
rm -f $tmpjps
#RMS
let act_rms_cnt=`< $sq_tmp_ps egrep -a -i ' mxssmp| mxsscp' | wc -l`
if [ $reset_counter '==' 1 ]; then
let lv_cnt_check=(act_rms_cnt '>' last_rms_cnt)
if [ $lv_cnt_check '==' 1 ]; then
let num_checks=0
fi
let last_rms_cnt=act_rms_cnt
fi
let duplicate=0
if ( [ $act_rms_cnt '!=' $cfg_rms_cnt ] &&
( [ "$sqck_component" == "all" ] || [ "$sqck_component" == "rms" ] ) ); then
for rms in ${cfg_rms[*]}; do
if [ -z "`grep -i $rms $sq_tmp_ps`" ]; then
for dups in ${down_rms[*]}; do
if [[ "$dups" == "$rms" ]]; then
let duplicate=1
fi
done
if [ $duplicate '==' 0 ]; then
down_rms[${#down_rms[@]}]=$rms
fi
let duplicate=0
fi
done
fi
if [[ $sqck_component == "all" ]]; then
# Get nodes up count
let cmp_result=(act_lnode_up_cnt '>' 0)
if [ $debug_display '==' 1 ]; then
echo
echo "cmp_result=$cmp_result act_lnode_up_cnt=$act_lnode_up_cnt"
fi
if [ $cmp_result '==' 0 ]; then
break
fi
# Check DTM
let tmp_cfg_dtm=(act_lnode_up_cnt)
if ( [ $act_dtm_cnt '!=' $cfg_dtm_cnt ] ); then
let cmp_result=(cmp_result '&&' tmp_cfg_dtm '>' 0 '&&' act_dtm_cnt '==' tmp_cfg_dtm)
let pcmp_result=(act_dtm_cnt '==' tmp_cfg_dtm '&&' cmp_result)
else
let cmp_result=(cmp_result '&&' act_dtm_cnt '==' cfg_dtm_cnt)
fi
if [ $debug_display '==' 1 ]; then
echo "cmp_result=$cmp_result pcmp_result=$pcmp_result act_dtm_cnt=$act_dtm_cnt cfg_dtm_cnt=$cfg_dtm_cnt tmp_cfg_dtm=$tmp_cfg_dtm"
fi
# Check RMS
let tmp_cfg_rms=(act_lnode_up_cnt*2)
if ( [ $act_rms_cnt '!=' $cfg_rms_cnt ] ); then
let cmp_result=(cmp_result '&&' tmp_cfg_rms '>' 0 '&&' act_rms_cnt '==' tmp_cfg_rms)
let pcmp_result=(cmp_result)
else
let cmp_result=(cmp_result '&&' act_rms_cnt '==' cfg_rms_cnt)
fi
if [ $debug_display '==' 1 ]; then
echo "cmp_result=$cmp_result pcmp_result=$pcmp_result act_rms_cnt=$act_rms_cnt cfg_rms_cnt=$cfg_rms_cnt tmp_cfg_rms=$tmp_cfg_rms"
fi
elif [[ $sqck_component == "dtm" ]]; then
let tmp_cfg_dtm=(act_lnode_up_cnt)
if ( [ $act_dtm_cnt '!=' $cfg_dtm_cnt ] ); then
let cmp_result=(tmp_cfg_dtm '>' 0 '&&' act_dtm_cnt '==' tmp_cfg_dtm)
let pcmp_result=(cmp_result)
else
let cmp_result=(act_dtm_cnt '==' cfg_dtm_cnt)
fi
elif [[ $sqck_component == "rms" ]]; then
let tmp_cfg_rms=(act_lnode_up_cnt*2)
if ( [ $act_rms_cnt '!=' $cfg_rms_cnt ] ); then
let cmp_result=(tmp_cfg_rms '>' 0 '&&' act_rms_cnt '==' tmp_cfg_rms)
let pcmp_result=(cmp_result)
else
let cmp_result=(act_rms_cnt '==' cfg_rms_cnt)
fi
elif [[ $sqck_component == "dcs" ]]; then
let cmp_result=(act_mxo_cnt '>' 0 )
fi
if [ $debug_display '==' 1 ]; then
echo cmp_result=$cmp_result
fi
if [ $cmp_result '==' 1 ]; then
let sq_up=1
if [ $json_output '==' 0 ]; then
if [[ $sqck_component == "all" ]]; then
if ([ $act_lnode_up_cnt == $cfg_lnode_cnt ] ); then
echo
echo
echo "The Trafodion environment is up!"
else
echo
echo
echo "The Trafodion environment is up! Node(s) are down and environment is in a degraded state."
echo
echo
sqshell -c node info
fi
fi
fi
break
fi
sleep $sleep_duration
done
let total_procs_result=($act_dtm_cnt+
$act_rms_cnt)
if [ $debug_display '==' 1 ]; then
echo act_dtm_cnt=$act_dtm_cnt
echo act_rms_cnt=$act_rms_cnt
echo total_procs_result=$total_procs_result
fi
rm -f $sq_tmp_ps
if [ $json_output '==' 0 ]; then
echo
if [[ $sqck_component == "all" ]]; then
if [ $sq_up '==' 1 ]; then
if [ $pcmp_result '==' 1 ]; then
echo
echo "The Trafodion environment is partially up and operational."
fi
else
echo
echo "The Trafodion environment is not up at all, or partially up and not operational. Check the logs."
fi
fi
fi
if [ $json_output '==' 1 ]; then
if [[ $sqck_component == "all" ]]; then
json_dtm=""\""PROCESS"\"":"\""DTM"\"","\""CONFIGURED"\"":$cfg_dtm_cnt,"\""ACTUAL"\"":$act_dtm_cnt,"\""DOWN"\"":\"${down_dtms[*]}\""
json_rms=""\""PROCESS"\"":"\""RMS"\"","\""CONFIGURED"\"":$cfg_rms_cnt,"\""ACTUAL"\"":$act_rms_cnt,"\""DOWN"\"":\"${down_rms[*]}\""
json_mxosrvr=""\""PROCESS"\"":"\""MXOSRVR"\"","\""CONFIGURED"\"":$cfg_mxo_cnt,"\""ACTUAL"\"":$act_mxo_cnt,"\""DOWN"\"":\"${act_mxo_down_cnt}\""
json_dcsmaster=""\""PROCESS"\"":"\""DCSMASTER"\"","\""CONFIGURED"\"":$cfg_dcsmaster_cnt,"\""ACTUAL"\"":$actual_dcsmaster_cnt,"\""DOWN"\"":\"${down_dcsmaster_cnt}\""
json_restserver=""\""PROCESS"\"":"\""RESTSERVER"\"","\""CONFIGURED"\"":$cfg_restserver_cnt,"\""ACTUAL"\"":$actual_restserver_cnt,"\""DOWN"\"":\"${down_restserver_cnt}\""
echo "[{$json_dtm},{$json_rms},{$json_dcsmaster},{$json_mxosrvr}, {$json_restserver}]"
elif [[ $sqck_component == "dtm" ]]; then
json_dtm=""\""PROCESS"\"":"\""DTM"\"","\""CONFIGURED"\"":$cfg_dtm_cnt,"\""ACTUAL"\"":$act_dtm_cnt,"\""DOWN"\"":\"${down_dtms[*]}\""
echo "[{$json_dtm}]"
elif [[ $sqck_component == "rms" ]]; then
json_rms=""\""PROCESS"\"":"\""RMS"\"","\""CONFIGURED"\"":$cfg_rms_cnt,"\""ACTUAL"\"":$act_rms_cnt,"\""DOWN"\"":\"${down_rms[*]}\""
echo "[{$json_rms}]"
elif [[ $sqck_component == "dcs" ]]; then
json_mxosrvr=""\""PROCESS"\"":"\""MXOSRVR"\"","\""CONFIGURED"\"":$cfg_mxo_cnt,"\""ACTUAL"\"":$act_mxo_cnt,"\""DOWN"\"":\"${act_mxo_down_cnt}\""
json_dcsmaster=""\""PROCESS"\"":"\""DCSMASTER"\"","\""CONFIGURED"\"":$cfg_dcsmaster_cnt,"\""ACTUAL"\"":$actual_dcsmaster_cnt,"\""DOWN"\"":\"${down_dcsmaster_cnt}\""
echo "[{$json_dcsmaster},{$json_mxosrvr}]"
elif [[ $sqck_component == "rest" ]]; then
json_restserver=""\""PROCESS"\"":"\""RESTSERVER"\"","\""CONFIGURED"\"":$cfg_restserver_cnt,"\""ACTUAL"\"":$actual_restserver_cnt,"\""DOWN"\"":\"${down_restserver_cnt}\""
echo "[{$json_restserver}]"
fi
else
echo
echo -e "Process\t\tConfigured\tActual\t Down"
echo -e "-------\t\t----------\t------\t ----"
if [[ $sqck_component == "all" ]]; then
echo -e "DTM\t\t$cfg_dtm_cnt\t\t$act_dtm_cnt\t ${down_dtms[*]}"
echo -e "RMS\t\t$cfg_rms_cnt\t\t$act_rms_cnt\t ${down_rms[*]}"
echo -e "DcsMaster\t$cfg_dcsmaster_cnt\t\t$actual_dcsmaster_cnt\t $down_dcsmaster_cnt"
echo -e "DcsServer\t$cfg_dcsserver_cnt\t\t$actual_dcsserver_cnt\t $down_dcsserver_cnt"
echo -e "mxosrvr\t\t$cfg_mxo_cnt\t\t$act_mxo_cnt\t ${act_mxo_down_cnt}"
echo -e "RestServer\t$cfg_restserver_cnt\t\t$actual_restserver_cnt\t $down_restserver_cnt\n"
elif [[ $sqck_component == "dtm" ]]; then
echo -e "DTM\t\t$cfg_dtm_cnt\t\t$act_dtm_cnt\t ${down_dtms[*]}\n"
elif [[ $sqck_component == "rms" ]]; then
echo -e "RMS\t\t$cfg_rms_cnt\t\t$act_rms_cnt\t ${down_rms[*]}\n"
elif [[ $sqck_component == "dcs" ]]; then
echo -e "DcsMaster\t$cfg_dcsmaster_cnt\t\t$actual_dcsmaster_cnt\t $down_dcsmaster_cnt"
echo -e "DcsServer\t$cfg_dcsserver_cnt\t\t$actual_dcsserver_cnt\t $down_dcsserver_cnt"
echo -e "mxosrvr\t\t$cfg_mxo_cnt\t\t$act_mxo_cnt\t ${act_mxo_down_cnt}\n"
elif [[ $sqck_component == "rest" ]]; then
echo -e "RestServer\t$cfg_restserver_cnt\t\t$actual_restserver_cnt\t $down_restserver_cnt\n"
fi
fi
if [ $sq_up '==' 1 ]; then
if [ $pcmp_result '==' 1 ]; then
exit 1;
else
exit 0;
fi
elif ( [ $total_procs_result '==' 0 ] ); then
if [ $json_output '==' 0 ]; then
echo
echo "The Trafodion environment is down."
fi
exit -1;
else
exit 2;
fi