blob: acc90cf80a32c9ab6ebc1d7bd80906381350c285 [file]
#!/bin/bash
# @@@ START COPYRIGHT @@@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# @@@ END COPYRIGHT @@@
#
#
# sqcore - script to collect any user core files generated in /database or $TRAF_HOME
# across the cluster, and move them to a central location on the head node. Currently,
# the script assumes it is run on the head node. The script assumes that the user
# will take space limitations into consideration.
#
function Usage {
echo
echo "Usage: $0 [ -d <directory> | -q | -r | -h ]"
echo
echo "-d Head node directory where the users cluster core files are to be moved"
echo " The default location is \$TRAF_HOME/logs"
echo "-q Quiet mode (no prompts)"
echo "-r Remove all of a users cluster core files (excluding head node)"
echo "-h Help"
echo
}
function GetOpts {
while getopts "i:d:qrh" arg
do
case $arg in
d)
to_path=${OPTARG}
;;
q)
SQ_QUIET=1
;;
r)
SQ_REMOVE=1
;;
h)
Usage;
exit 1;
;;
*)
Usage;
exit 1;
;;
esac
done
}
function DisplayInput {
echo
echo "Headnode : $head"
echo "Current Node : $current"
echo -n "Directory to move files (-d) : "
if [ ! -z $to_path ]; then
echo $to_path
else
echo "Not Specified"
fi
echo
}
###########################################################
# MAIN portion of sqcore begins
###########################################################
declare -i SQ_QUIET=0
declare -i SQ_REMOVE=0
declare -i ERR_FLAG=0
head=`headnode`
current=`uname -n`
to_path=$TRAF_HOME/logs
GetOpts $@
if [[ $head != $current ]]; then
echo
echo "The $0 script must be run on the headnode ($head)."
echo
exit 1
fi
if [ ! -d "$to_path" ]; then
echo
echo "Directory to hold core file does not exist."
echo "Please create $to_path (or validate path) and retry. Exiting..."
echo
exit 2;
fi
DisplayInput
if [ $SQ_QUIET '==' 0 ]; then
if [ $SQ_REMOVE '==' 1 ]; then
echo "All of the users core files will be removed from cluster (excluding head node)"
fi
echo -n "Do you want to continue (Enter n to exit, Any other key to continue): "
read ans
if [[ $ans == "n" ]]; then
echo "Exiting..."
exit 3;
fi
echo
fi
# Create a list of directory to check for core files (/database and $TRAF_HOME/sql searches.
# Note that '$' is used as the field separator to parse out node, volume, and path
# The following can be used to pull the database locations from the sqconfig file,
# which assumes that the file is correctly configured. Note, that if used the zero-based
# node calculation should be un-commented from the below "if" statement.
#dirlist=`grep database $TRAF_HOME/sql/scripts/sqconfig | /bin/gawk '{print $2"$"$3"/"$1}'; $SQ_PDSH -a "find -L $TRAF_HOME/sql -name core\.\* | /bin/gawk -F\/core '{print \\\$1}' | uniq" | /bin/gawk '{sub(":",""); print substr($1,2)"$"$2}'`
MY_NODES_PRM=" -a "
if [ -n "$MY_NODES" ];then
MY_NODES_PRM=$MY_NODES
fi
dirlist=`$PDSH $MY_NODES_PRM $PDSH_SSH_CMD "find -L $TRAF_HOME/sql -name core\.\* 2> /dev/null | /bin/gawk -F\/core '{print \\\$1}' | uniq; find -L $TRAF_HOME/export -name core\.\* 2> /dev/null | /bin/gawk -F\/core '{print \\\$1}' | uniq; find -L /database/u*/$USER -name core\.\* 2> /dev/null | /bin/gawk -F \/core '{print \\\$1}'" | /bin/gawk '{sub(":",""); print substr($1,2)"$"$2}' 2> /dev/null`
if [[ $dirlist == "" ]]; then
echo "No core files were found. Exiting..."
echo
exit 4;
fi
for i in $dirlist
do
node=`echo $i | /bin/gawk -F$ '{print $1}'`
basevolpath=`echo $i | /bin/gawk -F$ '{print $2}'`
volume=`echo $i | /bin/gawk -F$ '{print $3}'`
# Create path w/escape chars in order to recognize the "\$" in pdsh/scp commands
if [[ $volume != "" ]]; then
# Need to add one to the zero-based node number if pulled from sqconfig
#node=$((1 + `echo $i | /bin/gawk -F$ '{print $1}'`))
path="$basevolpath\\\$$volume"
else
path=`echo $basevolpath | /bin/gawk -F\/core '{print $1}'`
fi
echo "Searching for core files in n$node:$path..."
corelist=`$PDSH -w n$node $PDSH_SSH_CMD "ls $path | grep ^core."`
# Create a list of core files
for j in $corelist
do
file=`echo $j | grep -o "core.*"`
if [ $file ]; then
# Remove core file if -r supplied
if [ $SQ_REMOVE '==' 1 ]; then
echo " Removing $path/$j..."
$PDSH -w n$node $PDSH_SSH_CMD rm -rf $path/$j
# Otherwise move core file to head node; in order to prevent overwrites
# the core files will be moved using the following format:
# <filename>.<node>.<volume>
else
# Copy core file only if have proper ownership
cowner=`$PDSH -w n$node $PDSH_SSH_CMD ls -l $path/$j | tail -1 | /bin/gawk '{print $4}'`
if [[ $cowner != $USER ]]; then
echo " Current user ($USER) cannot move $path/$j ($cowner). Skipping...";
else
echo " Moving $path/$j..."
if [[ `ls $to_path | grep $file` == "" ]]; then
if [[ $volume != "" ]]; then
scp n$node:$path/$j $head:$to_path/$file.n$node.$volume &> /dev/null
else
scp n$node:$path/$j $head:$to_path/$file.n$node.TRAF_HOME &> /dev/null
fi
if [[ $? != 0 ]]; then
echo " Error while moving n$node:$path/$j..."
ERR_FLAG=1
else
# If successfully copied off the file, now remove it.
# Is there an scp equivalent for moving files?
$PDSH -w n$node $PDSH_SSH_CMD rm -rf $path/$j
fi
fi
fi
fi
fi
done
done
echo
if [ $ERR_FLAG '==' 1 ]; then
echo "ERROR: All core files were NOT moved to: $to_path"
else
if [ $SQ_REMOVE '==' 1 ]; then
echo "All user core files were removed (excluding head node)."
else
echo "Moved core files reside in: $to_path"
fi
echo "Done with $0."
fi
echo
exit 0