| #!/bin/sh |
| # ------------------------------------------------------------------------ |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ------------------------------------------------------------------------ |
| # |
| ## diag.sh - a script to run diagnostic commands and save output to a log file |
| # |
| # Author: Cal Lott <calvin.lott@ihg.com> |
| # |
| PATH=$PATH:$HOME/bin:/bin/:/usr/bin:/sbin:/usr/sbin |
| |
| # Set a script name string for later use. |
| SCRIPT_NAME=`basename $0` |
| ORIGINAL_COMMAND_LINE="$0 $*" |
| LOCALHOST=`uname -n | awk -F. '{print $1}'` |
| |
| # Declare some functions. |
| |
| # Prints argument to stdout, prepended with "[YYYYMMDD-HH:MM:SS]". |
| dateprint() { |
| echo "[`date +%Y%m%d-%H:%M:%S`] $*" |
| } |
| |
| # Runs argument list as a single shell command. |
| BAR_SEPARATOR='------------------------------------------------' |
| run() { |
| echo |
| echo $BAR_SEPARATOR |
| dateprint "START: $*" |
| executable=`echo $* | awk '{print $1}'` |
| dateprint "path to executable: `which $executable 2>/dev/null`" |
| $* |
| last_exit_code=$? |
| cumulative_exit_code=`expr $cumulative_exit_code + $last_exit_code` |
| dateprint "exit $last_exit_code" |
| dateprint "END: $*" |
| echo $BAR_SEPARATOR |
| echo |
| } |
| |
| # Print usage information. |
| usage() { |
| cat << EOF |
| |
| $SCRIPT_NAME - run a set of standard diagnostic commands |
| |
| Usage: $SCRIPT_NAME [-l logfile] [-jz] [-hv] <pid1> <pid2> ... <pidN> |
| |
| This script runs several diagnostic commands on the local system and saves the |
| output to a gzipped log file. You may also specify an optional list of pids, |
| which will cause additional diagnostic commands (lsoft, jstat, etc.) to be run |
| against those specific processes only. |
| |
| The default path for the datestamped log file is as follows: |
| |
| \$HOME/log/$SCRIPT_NAME.\$LOCALHOST.YYYYMMDD-HHMMSS.log |
| |
| You can override the default file name with the -l option. (See below.) |
| |
| |
| Options: |
| -j Also process any pids listed in output of "jps" command. |
| -l <logfile> Use specified log file instead of default. |
| -z Do not gzip log file. |
| |
| Other Options: |
| -h Print this message and exit. |
| |
| EOF |
| } |
| |
| # Print help information. |
| help() { |
| usage |
| } |
| |
| # Command line options processing. |
| while getopts "hjl:vz" OPTION |
| do |
| case $OPTION in |
| h) |
| help |
| exit 0 |
| ;; |
| j) |
| USE_JPS='TRUE' |
| ;; |
| l) |
| LOGFILE=$OPTARG |
| ;; |
| z) |
| COMPRESSION_OPTION='DISABLED' |
| ;; |
| ?) |
| usage |
| exit 1 |
| ;; |
| esac |
| done |
| shift `expr $OPTIND - 1` # remove processed arguments from $* |
| PIDLIST=$* # save list of remaining args as pidlist |
| |
| # If requested, use jps to get an additional list of java pids. |
| if [ "$USE_JPS" = 'TRUE' ] |
| then |
| # Save the current list of pids. |
| arg_pids=$PIDLIST |
| # Use jps to grab additional java pids, filtering the jps pid itself. |
| jps_pids=`jps 2>/dev/null | awk '$2 !~ /^Jps/ {print $1}'` |
| # Now obtain an unique sorted list of pids from both groups. |
| PIDLIST=`echo $arg_pids $jps_pids | sort | uniq` |
| fi |
| |
| # Use the default logfile if the user did not specify one. |
| if [ "$LOGFILE" = '' ] |
| then |
| LOGFILE="$HOME/log/$SCRIPT_NAME.$LOCALHOST.`date +%Y%m%d-%H%M%S`.log" |
| fi |
| # Create log directory if it does not already exist |
| mkdir `dirname $LOGFILE` >/dev/null 2>&1 |
| |
| # Print start message to stdout. |
| dateprint "BEGIN - running diagnostics for host: `uname -n`" |
| |
| # Complain and die if we cannot create the logfile. |
| touch $LOGFILE >/dev/null 2>&1 |
| if [ $? -eq 0 ] |
| then |
| dateprint "logfile created, running diagnostic commands . . ." |
| else |
| dateprint "ERROR: cannot create $LOGFILE, exiting!" |
| exit 1 |
| fi |
| |
| # Save stdout and stderr descriptors and redirect output to logfile. |
| exec 6>&1 # Link file descriptor 6 with stdout (saves stdout) |
| exec 7>&2 # Link file descriptor 7 with stderr (saves stderr) |
| exec >$LOGFILE 2>&1 # Redirect stdout and stdin to logfile |
| |
| ## |
| ## Begin diagnostic output |
| ## |
| dateprint "BEGIN DIAGNOSTIC OUTPUT" |
| dateprint "running on host: `uname -n`" |
| dateprint "unprocessed command line: \"$ORIGINAL_COMMAND_LINE\"" |
| dateprint "after options processing: \"$0 $PIDLIST > $LOGFILE\"" |
| |
| # Each of the following shell commands will be run once. |
| for cmd in \ |
| "crontab -l" \ |
| "id" \ |
| "ifconfig -a" \ |
| "df -kl" \ |
| "env" \ |
| "iostat -x" \ |
| "jps -v" \ |
| "netstat -anp" \ |
| "netstat -r" \ |
| "ps -ef" \ |
| "sar -A" \ |
| "ulimit -a" \ |
| "uname -a" \ |
| "uptime" \ |
| "w" \ |
| ; do |
| run $cmd |
| done |
| |
| # As above, but these commands are customized for a specific flavor of Unix. |
| OS=`uname -s` |
| case $OS in |
| Linux) |
| for cmd in \ |
| "swapon -s" \ |
| "top -b -n1" \ |
| ; do |
| run $cmd |
| done |
| ;; |
| SunOS) |
| for cmd in \ |
| "swap -s" \ |
| "top -b -d1" \ |
| ; do |
| run $cmd |
| done |
| ;; |
| *) |
| dateprint "WARNING: OS \"$OS\" unknown - running no OS-specific commands" |
| ;; |
| esac |
| |
| # Each of these commands are run once per each pid argument |
| for pid in $PIDLIST |
| do |
| for cmd in \ |
| "lsof -p $pid" \ |
| "jstat -gcutil $pid" \ |
| "jstack -l $pid" \ |
| ; do |
| run $cmd |
| done |
| done |
| |
| dateprint "END DIAGNOSTIC OUTPUT" |
| dateprint "cumulative exit code: $cumulative_exit_code" |
| |
| exec 1>&6 6>&- # Restore stdout and close file descriptor 6. |
| exec 2>&7 7>&- # Restore stderr and close file descriptor 7. |
| ## |
| ## End diagnostic output |
| ## |
| |
| # Compress the logfile if requested and end script execution |
| if [ "$COMPRESSION_OPTION" = 'DISABLED' ] |
| then |
| dateprint "FINISH - output stored at: $LOGFILE" |
| else |
| nohup gzip $LOGFILE >/dev/null 2>&1 & |
| dateprint "FINISH - output stored at: $LOGFILE.gz" |
| fi |
| |
| exit $cumulative_exit_code |