| #!/bin/bash |
| |
| # @@@ START COPYRIGHT @@@ |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # @@@ END COPYRIGHT @@@ |
| # |
| # |
| # sqcore - script to collect any user core files generated in /database or $TRAF_HOME |
| # across the cluster, and move them to a central location on the head node. Currently, |
| # the script assumes it is run on the head node. The script assumes that the user |
| # will take space limitations into consideration. |
| # |
| |
| function Usage { |
| |
| echo |
| echo "Usage: $0 [ -d <directory> | -q | -r | -h ]" |
| echo |
| echo "-d Head node directory where the users cluster core files are to be moved" |
| echo " The default location is \$TRAF_HOME/logs" |
| echo "-q Quiet mode (no prompts)" |
| echo "-r Remove all of a users cluster core files (excluding head node)" |
| echo "-h Help" |
| echo |
| |
| } |
| |
| function GetOpts { |
| |
| while getopts "i:d:qrh" arg |
| do |
| case $arg in |
| d) |
| to_path=${OPTARG} |
| ;; |
| q) |
| SQ_QUIET=1 |
| ;; |
| r) |
| SQ_REMOVE=1 |
| ;; |
| h) |
| Usage; |
| exit 1; |
| ;; |
| *) |
| Usage; |
| exit 1; |
| ;; |
| esac |
| done |
| |
| } |
| |
| function DisplayInput { |
| |
| echo |
| echo "Headnode : $head" |
| echo "Current Node : $current" |
| echo -n "Directory to move files (-d) : " |
| if [ ! -z $to_path ]; then |
| echo $to_path |
| else |
| echo "Not Specified" |
| fi |
| echo |
| |
| } |
| |
| |
| ########################################################### |
| # MAIN portion of sqcore begins |
| ########################################################### |
| declare -i SQ_QUIET=0 |
| declare -i SQ_REMOVE=0 |
| declare -i ERR_FLAG=0 |
| head=`headnode` |
| current=`uname -n` |
| to_path=$TRAF_HOME/logs |
| |
| GetOpts $@ |
| |
| if [[ $head != $current ]]; then |
| echo |
| echo "The $0 script must be run on the headnode ($head)." |
| echo |
| exit 1 |
| fi |
| |
| if [ ! -d "$to_path" ]; then |
| echo |
| echo "Directory to hold core file does not exist." |
| echo "Please create $to_path (or validate path) and retry. Exiting..." |
| echo |
| exit 2; |
| fi |
| |
| DisplayInput |
| |
| if [ $SQ_QUIET '==' 0 ]; then |
| if [ $SQ_REMOVE '==' 1 ]; then |
| echo "All of the users core files will be removed from cluster (excluding head node)" |
| fi |
| |
| echo -n "Do you want to continue (Enter n to exit, Any other key to continue): " |
| read ans |
| if [[ $ans == "n" ]]; then |
| echo "Exiting..." |
| exit 3; |
| fi |
| echo |
| fi |
| |
| # Create a list of directory to check for core files (/database and $TRAF_HOME/sql searches. |
| # Note that '$' is used as the field separator to parse out node, volume, and path |
| |
| # The following can be used to pull the database locations from the sqconfig file, |
| # which assumes that the file is correctly configured. Note, that if used the zero-based |
| # node calculation should be un-commented from the below "if" statement. |
| #dirlist=`grep database $TRAF_HOME/sql/scripts/sqconfig | /bin/gawk '{print $2"$"$3"/"$1}'; $SQ_PDSH -a "find -L $TRAF_HOME/sql -name core\.\* | /bin/gawk -F\/core '{print \\\$1}' | uniq" | /bin/gawk '{sub(":",""); print substr($1,2)"$"$2}'` |
| |
| MY_NODES_PRM=" -a " |
| if [ -n "$MY_NODES" ];then |
| MY_NODES_PRM=$MY_NODES |
| fi |
| |
| dirlist=`$PDSH $MY_NODES_PRM $PDSH_SSH_CMD "find -L $TRAF_HOME/sql -name core\.\* 2> /dev/null | /bin/gawk -F\/core '{print \\\$1}' | uniq; find -L $TRAF_HOME/export -name core\.\* 2> /dev/null | /bin/gawk -F\/core '{print \\\$1}' | uniq; find -L /database/u*/$USER -name core\.\* 2> /dev/null | /bin/gawk -F \/core '{print \\\$1}'" | /bin/gawk '{sub(":",""); print substr($1,2)"$"$2}' 2> /dev/null` |
| |
| if [[ $dirlist == "" ]]; then |
| echo "No core files were found. Exiting..." |
| echo |
| exit 4; |
| fi |
| |
| for i in $dirlist |
| do |
| node=`echo $i | /bin/gawk -F$ '{print $1}'` |
| basevolpath=`echo $i | /bin/gawk -F$ '{print $2}'` |
| volume=`echo $i | /bin/gawk -F$ '{print $3}'` |
| # Create path w/escape chars in order to recognize the "\$" in pdsh/scp commands |
| if [[ $volume != "" ]]; then |
| # Need to add one to the zero-based node number if pulled from sqconfig |
| #node=$((1 + `echo $i | /bin/gawk -F$ '{print $1}'`)) |
| path="$basevolpath\\\$$volume" |
| else |
| path=`echo $basevolpath | /bin/gawk -F\/core '{print $1}'` |
| fi |
| |
| echo "Searching for core files in n$node:$path..." |
| corelist=`$PDSH -w n$node $PDSH_SSH_CMD "ls $path | grep ^core."` |
| |
| # Create a list of core files |
| for j in $corelist |
| do |
| file=`echo $j | grep -o "core.*"` |
| if [ $file ]; then |
| # Remove core file if -r supplied |
| if [ $SQ_REMOVE '==' 1 ]; then |
| echo " Removing $path/$j..." |
| $PDSH -w n$node $PDSH_SSH_CMD rm -rf $path/$j |
| # Otherwise move core file to head node; in order to prevent overwrites |
| # the core files will be moved using the following format: |
| # <filename>.<node>.<volume> |
| else |
| # Copy core file only if have proper ownership |
| cowner=`$PDSH -w n$node $PDSH_SSH_CMD ls -l $path/$j | tail -1 | /bin/gawk '{print $4}'` |
| if [[ $cowner != $USER ]]; then |
| echo " Current user ($USER) cannot move $path/$j ($cowner). Skipping..."; |
| else |
| echo " Moving $path/$j..." |
| if [[ `ls $to_path | grep $file` == "" ]]; then |
| if [[ $volume != "" ]]; then |
| scp n$node:$path/$j $head:$to_path/$file.n$node.$volume &> /dev/null |
| else |
| scp n$node:$path/$j $head:$to_path/$file.n$node.TRAF_HOME &> /dev/null |
| fi |
| if [[ $? != 0 ]]; then |
| echo " Error while moving n$node:$path/$j..." |
| ERR_FLAG=1 |
| else |
| # If successfully copied off the file, now remove it. |
| # Is there an scp equivalent for moving files? |
| $PDSH -w n$node $PDSH_SSH_CMD rm -rf $path/$j |
| fi |
| fi |
| fi |
| fi |
| fi |
| done |
| done |
| |
| echo |
| if [ $ERR_FLAG '==' 1 ]; then |
| echo "ERROR: All core files were NOT moved to: $to_path" |
| else |
| if [ $SQ_REMOVE '==' 1 ]; then |
| echo "All user core files were removed (excluding head node)." |
| else |
| echo "Moved core files reside in: $to_path" |
| fi |
| echo "Done with $0." |
| fi |
| echo |
| |
| exit 0 |
| |