| #!/usr/bin/perl -w |
| ############################################################################### |
| # $Id$ |
| ############################################################################### |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| ############################################################################### |
| |
| =head1 NAME |
| |
| VCL::Provisioning::xCAT - VCL module to support the xCAT provisioning engine |
| |
| =head1 SYNOPSIS |
| |
| From another VCL module instantiated normally for a reservation: |
| $self->provisioner->load(); |
| |
| From a script: |
| my $xcat = new VCL::Module::Provisioning::xCAT(); |
| |
| =head1 DESCRIPTION |
| |
| This module provides VCL support for xCAT (Extreme Cluster Administration |
| Toolkit) version 2.x. xCAT is a scalable distributed computing management and |
| provisioning tool that provides a unified interface for hardware control, |
| discovery, and OS diskful/diskfree deployment. http://xcat.sourceforge.net |
| |
| =cut |
| |
| ############################################################################### |
| package VCL::Module::Provisioning::xCAT; |
| |
| # Specify the lib path using FindBin |
| use FindBin; |
| use lib "$FindBin::Bin/../../.."; |
| |
| # Configure inheritance |
| use base qw(VCL::Module::Provisioning); |
| |
| # Specify the version of this module |
| our $VERSION = '2.5.1'; |
| |
| # Specify the version of Perl to use |
| use 5.008000; |
| |
| use strict; |
| use warnings; |
| use diagnostics; |
| use English qw(-no_match_vars); |
| |
| use VCL::utils; |
| use Fcntl qw(:DEFAULT :flock); |
| use File::Copy; |
| use IO::Seekable; |
| use Socket; |
| use version; |
| |
| ############################################################################### |
| |
| =head1 CLASS ATTRIBUTES |
| |
| =cut |
| |
| =head2 $XCAT_ROOT |
| |
| Data type : scalar |
| Description : $XCAT_ROOT stores the location of the xCAT binary files. xCAT |
| should set the XCATROOT environment variable. This is used if |
| it is set. If XCATROOT is not set, /opt/xcat is used. |
| |
| =cut |
| |
| # Class attributes to store xCAT configuration details |
| my $XCAT_ROOT; |
| |
| ############################################################################### |
| |
| =head1 OBJECT METHODS |
| |
| =cut |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 initialize |
| |
| Parameters : none |
| Returns : boolean |
| Description : Checks to make sure xCAT appears to be installed on the |
| management node. |
| |
| =cut |
| |
| sub initialize { |
| my $self = shift; |
| |
| # Check the XCAT_ROOT environment variable, it should be defined |
| if (defined($ENV{XCATROOT}) && $ENV{XCATROOT}) { |
| $XCAT_ROOT = $ENV{XCATROOT}; |
| } |
| elsif (defined($ENV{XCATROOT})) { |
| notify($ERRORS{'OK'}, 0, "XCATROOT environment variable is not defined, using /opt/xcat"); |
| $XCAT_ROOT = '/opt/xcat'; |
| } |
| else { |
| notify($ERRORS{'OK'}, 0, "XCATROOT environment variable is not set, using /opt/xcat"); |
| $XCAT_ROOT = '/opt/xcat'; |
| } |
| |
| # Remove trailing / from $XCAT_ROOT if exists |
| $XCAT_ROOT =~ s/\/$//; |
| |
| # Make sure the xCAT root path is valid |
| if (!-d $XCAT_ROOT) { |
| notify($ERRORS{'WARNING'}, 0, "unable to initialize xCAT module, $XCAT_ROOT directory does not exist"); |
| return; |
| } |
| |
| # Check to make sure one of the expected executables is where it should be |
| if (!-x "$XCAT_ROOT/bin/rpower") { |
| notify($ERRORS{'WARNING'}, 0, "unable to initialize xCAT module, expected executable was not found: $XCAT_ROOT/bin/rpower"); |
| return; |
| } |
| |
| # Check to make sure one of the xCAT 2.x executables not included in 1/x exists |
| if (!-x "$XCAT_ROOT/bin/lsdef") { |
| notify($ERRORS{'WARNING'}, 0, "unable to initialize xCAT module, xCAT version is not supported, expected xCAT 2.x+ executable was not found: $XCAT_ROOT/bin/lsdef"); |
| return; |
| } |
| |
| notify($ERRORS{'DEBUG'}, 0, "xCAT module initialized"); |
| return 1; |
| } ## end sub initialize |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 unload |
| |
| Parameters : none |
| Returns : boolean |
| Description : Powers-off computer with the image defined in the reservation data. |
| |
| =cut |
| |
| sub unload { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| if (!$self->power_off()) { |
| return 0; |
| } |
| |
| return 1; |
| |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 load |
| |
| Parameters : none |
| Returns : boolean |
| Description : Loads a computer with the image defined in the reservation data. |
| |
| =cut |
| |
| sub load { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the data |
| my $reservation_id = $self->data->get_reservation_id(); |
| my $image_name = $self->data->get_image_name(); |
| my $image_reload_time_minutes = $self->data->get_image_reload_time() || 10; |
| my $computer_id = $self->data->get_computer_id(); |
| my $computer_node_name = $self->data->get_computer_node_name(); |
| my $management_node_hostname = $self->data->get_management_node_hostname(); |
| |
| insertloadlog($reservation_id, $computer_id, "startload", "$computer_node_name $image_name"); |
| |
| # Insert a computerloadlog record and edit nodetype table to set the image information for the computer |
| insertloadlog($reservation_id, $computer_id, "editnodetype", "updating nodetype table"); |
| $self->_edit_nodetype($computer_node_name, $image_name) || return; |
| |
| # Insert a computerloadlog record and edit nodelist table to set the xCAT groups for the computer |
| $self->_edit_nodelist($computer_node_name, $image_name) || return; |
| |
| # Check to see if management node throttle is configured |
| my $throttle_limit = get_variable("xcat|throttle|$management_node_hostname", 0) || get_variable("$management_node_hostname|xcat|throttle", 0) || get_variable("xcat|throttle", 0); |
| if (!$throttle_limit || $throttle_limit !~ /^\d+$/) { |
| $throttle_limit = 10; |
| notify($ERRORS{'DEBUG'}, 0, "xCAT load throttle limit variable is NOT set in database: 'xcat|throttle', using default value: $throttle_limit"); |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "xCAT load throttle limit variable is set in database: $throttle_limit"); |
| } |
| |
| my $throttle_limit_wait_seconds = (30 * 60); |
| if (!$self->code_loop_timeout(sub{!$self->_is_throttle_limit_reached(@_)}, [$throttle_limit], 'checking throttle limit', $throttle_limit_wait_seconds, 1, 10)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to load image due to throttle limit, waited $throttle_limit_wait_seconds seconds"); |
| return; |
| } |
| |
| # Set the computer to install on next boot |
| $self->_nodeset($computer_node_name, 'install') || return; |
| |
| # Restart the node |
| $self->power_reset($computer_node_name) || return; |
| |
| # Run lsdef to retrieve the node's configuration including its MAC address |
| my $node_info = $self->_lsdef($computer_node_name); |
| if (!$node_info) { |
| notify($ERRORS{'WARNING'}, 0, "unable to monitor loading of $computer_node_name, failed to retrieve node info"); |
| return; |
| } |
| my $mac_address = $node_info->{mac}; |
| if ($mac_address) { |
| notify($ERRORS{'DEBUG'}, 0, "retrieved MAC address of $computer_node_name: $mac_address"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "unable to monitor loading of $computer_node_name, node info does not contain the MAC address:\n" . format_data($node_info)); |
| return; |
| } |
| |
| # nodeset changes xCAT state to 'install' |
| # node is power cycled or powered on (nodeset/nodestat status: install/noping) |
| # Wait for node to boot from network (may take from 30 seconds to several minutes if node is using UEFI) |
| # In /var/log/messages:, node makes DHCP request & requests PXE boot information from DHCP server running on management node: |
| # Apr 1 09:36:39 vclmgt dhcpd: DHCPDISCOVER from xx:xx:xx:xx:xx:xx via ethX |
| # Apr 1 09:36:39 vclmgt dhcpd: DHCPOFFER on 10.yy.yy.yy to xx:xx:xx:xx:xx:xx via ethX |
| # Apr 1 09:36:43 vclmgt dhcpd: DHCPREQUEST for 10.yy.yy.yy (10.mn.mn.mn) from xx:xx:xx:xx:xx:xx via ethX |
| # Apr 1 09:36:43 vclmgt dhcpd: DHCPACK on 10.yy.yy.yy to xx:xx:xx:xx:xx:xx via ethX |
| # |
| # Node requests PXE boot files from TFTP server running on management node: |
| # Apr 1 09:36:43 vclmgt atftpd[27522]: Serving pxelinux.0 to 10.yy.yy.yy:2070 |
| # Apr 1 09:36:43 vclmgt atftpd[27522]: Serving pxelinux.0 to 10.yy.yy.yy:2071 |
| # Apr 1 09:36:43 vclmgt atftpd[27522]: Serving pxelinux.cfg/xx-xx-xx-xx-xx-xx to 10.yy.yy.yy:57089 |
| # Apr 1 09:36:43 vclmgt atftpd[27522]: Serving pxelinux.cfg/0A0A0132 to 10.yy.yy.yy:57090 |
| # Apr 1 09:36:43 vclmgt atftpd[27522]: Serving xcat/rhel6/x86_64/vmlinuz to 10.yy.yy.yy:57091 |
| # Apr 1 09:36:43 vclmgt atftpd[27522]: Serving xcat/rhel6/x86_64/initrd.img to 10.yy.yy.yy:57092 |
| # |
| # Node boots using files downloaded from TFTP/PXE server, makes another DHCP request: |
| # Apr 1 09:37:15 vclmgt dhcpd: DHCPDISCOVER from xx:xx:xx:xx:xx:xx via ethX |
| # Apr 1 09:37:15 vclmgt dhcpd: DHCPOFFER on 10.yy.yy.yy to xx:xx:xx:xx:xx:xx via ethX |
| # Apr 1 09:37:15 vclmgt dhcpd: DHCPREQUEST for 10.yy.yy.yy (10.mn.mn.mn) from xx:xx:xx:xx:xx:xx via ethX |
| # Apr 1 09:37:15 vclmgt dhcpd: DHCPACK on 10.yy.yy.yy to xx:xx:xx:xx:xx:xx via ethX |
| # OS installation begins (nodeset/nodestat status: install/installing prep) |
| # If Kickstart, Linux packages are installed (nodestat status: 'installing <package> (x%)') |
| # If Kickstart, postscripts are installed (nodestat status: 'installing post scripts') |
| # When installation is complete, xCAT status is changed to 'boot' and node is restarted (nodeset/nodestat status: boot/noping) |
| # Node boots from hard drive (nodeset/nodestat status: boot/boot) |
| |
| # Open the /var/log/messages file for reading |
| my $messages_file_path = '/var/log/messages'; |
| my $log = IO::File->new($messages_file_path, "r"); |
| if (!$log) { |
| my $error = $! || 'none'; |
| notify($ERRORS{'WARNING'}, 0, "failed to open $messages_file_path for reading, error: $error"); |
| return; |
| } |
| # Go to the end of the messages file |
| if (!$log->seek(0, SEEK_END)) { |
| my $error = $! || 'none'; |
| notify($ERRORS{'CRITICAL'}, 0, "failed to seek end of $messages_file_path, error: $error"); |
| } |
| |
| insertloadlog($reservation_id, $computer_id, "xcatstage5", "loading image $image_name"); |
| |
| if ($image_reload_time_minutes < 10) { |
| $image_reload_time_minutes = 10; |
| } |
| my $nochange_timeout_seconds = ($image_reload_time_minutes * 60); |
| |
| my $monitor_start_time = time; |
| my $last_change_time = $monitor_start_time; |
| my $nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds); |
| |
| # Sanity check, timeout the load monitoring after a set amount of time |
| # This is done in case there is an endless loop which causes the node status to change over and over again |
| # Overall timeout is the lesser of 60 minutes or 2x image reload time |
| my $overall_timeout_minutes; |
| if ($image_reload_time_minutes < 30) { |
| $overall_timeout_minutes = 60; |
| } |
| else { |
| $overall_timeout_minutes = ($image_reload_time_minutes * 2); |
| } |
| my $overall_timeout_time = ($monitor_start_time + $overall_timeout_minutes * 60); |
| |
| # Number of seconds to wait between checks |
| # Set to a short delay at the beginning of monitoring, this will be increased once installation start is detected |
| my $monitor_delay_seconds = 5; |
| |
| # Keep track of when reservation.lastcheck was last updated |
| my $update_lastcheck_interval_seconds = 60; |
| my $update_lastcheck_time = time; |
| update_reservation_lastcheck($reservation_id); |
| |
| my $previous_nodestat_status; |
| my $previous_nodeset_status; |
| my $current_time; |
| my $install_started = 0; |
| my $dhcp_ack = 0; |
| MONITOR_LOADING: while (($current_time = time) < $nochange_timeout_time && $current_time < $overall_timeout_time) { |
| my $total_elapsed_seconds = ($current_time - $monitor_start_time); |
| my $nochange_elapsed_seconds = ($current_time - $last_change_time); |
| my $nochange_remaining_seconds = ($nochange_timeout_time - $current_time); |
| my $overall_remaining_seconds = ($overall_timeout_time - $current_time); |
| notify($ERRORS{'DEBUG'}, 0, "monitoring $image_name loading on $computer_node_name\n" . |
| "seconds since monitor start/until unconditional timeout: $total_elapsed_seconds/$overall_remaining_seconds\n" . |
| "seconds since last change/until no change timeout: $nochange_elapsed_seconds/$nochange_remaining_seconds" |
| ); |
| |
| # Flag to set if anything changes |
| my $reset_timeout = 0; |
| |
| # Check if any lines have shown in in /var/log/messages for the node |
| my @lines = $log->getlines; |
| my @dhcp_lines = grep(/dhcpd:.+DHCP.+\s$mac_address\s/i, @lines); |
| if (@dhcp_lines) { |
| if (grep(/DHCPREQUEST/i, @dhcp_lines)) { |
| insertloadlog($reservation_id, $computer_id, "xcatstage1", "requested DHCP lease"); |
| } |
| |
| if (my ($dhcpack_line) = grep(/DHCPACK/i, @dhcp_lines)) { |
| notify($ERRORS{'DEBUG'}, 0, "$computer_node_name acquired DHCP lease: '$dhcpack_line'"); |
| if (!$dhcp_ack) { |
| insertloadlog($reservation_id, $computer_id, "xcatstage2", "acquired DHCP lease"); |
| insertloadlog($reservation_id, $computer_id, "xcatround2", "waiting for boot flag"); |
| $dhcp_ack=1; |
| } |
| } |
| |
| $reset_timeout = 1; |
| notify($ERRORS{'DEBUG'}, 0, "DHCP activity detected in $messages_file_path:\n" . join("\n", @dhcp_lines)); |
| } |
| |
| # Get the current status of the node |
| # Set previous status to current status if this is the first iteration |
| my $current_nodestat_status = $self->_nodestat($computer_node_name); |
| $previous_nodestat_status = $current_nodestat_status if !defined($previous_nodestat_status); |
| |
| my $current_nodeset_status = $self->_nodeset($computer_node_name, 'stat'); |
| $previous_nodeset_status = $current_nodeset_status if !defined($previous_nodeset_status); |
| |
| if (!$install_started) { |
| # Check if the installation has started |
| if ($current_nodestat_status =~ /(install|partimage)/i) { |
| # Slow down the monitor looping |
| $monitor_delay_seconds = 20; |
| notify($ERRORS{'DEBUG'}, 0, "installation has started, increasing wait between monitoring checks to $monitor_delay_seconds seconds"); |
| $install_started = 1; |
| } |
| |
| # If installation start was missed, nodeset will go from install to boot |
| if ($previous_nodeset_status =~ /install/i && $current_nodeset_status eq 'boot') { |
| notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is finished loading image, nodeset status changed: $previous_nodeset_status --> $current_nodeset_status"); |
| insertloadlog($reservation_id, $computer_id, "bootstate", "$computer_node_name image load complete: $current_nodestat_status, $current_nodeset_status"); |
| last MONITOR_LOADING; |
| } |
| } |
| else { |
| # nodestat will return 'sshd' if the computer is responding to SSH while it is being installed instead of the more detailed information |
| # Try to get the installation status directly using a socket |
| if ($current_nodestat_status eq 'sshd') { |
| $current_nodestat_status = $self->_get_install_status($computer_node_name) || 'sshd'; |
| } |
| |
| # Check if the installation has completed |
| if ($current_nodestat_status =~ /^(boot|complete)$/i || $current_nodeset_status =~ /^(boot)$/i) { |
| notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is finished loading image, current nodestat status: $current_nodestat_status, nodeset status: $current_nodeset_status"); |
| insertloadlog($reservation_id, $computer_id, "bootstate", "$computer_node_name image load complete: $current_nodestat_status, $current_nodeset_status"); |
| last MONITOR_LOADING; |
| } |
| } |
| |
| # Check if the nodestat status changed from previous iteration |
| if ($current_nodestat_status ne $previous_nodestat_status || $current_nodeset_status ne $previous_nodeset_status) { |
| $reset_timeout = 1; |
| notify($ERRORS{'DEBUG'}, 0, "status of $computer_node_name changed"); |
| |
| # Set previous status to the current status |
| $previous_nodestat_status = $current_nodestat_status; |
| $previous_nodeset_status = $current_nodeset_status; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "status of $computer_node_name has not changed: $current_nodestat_status"); |
| } |
| |
| # If any changes were detected, reset the nochange timeout |
| if ($reset_timeout) { |
| $last_change_time = $current_time; |
| $nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds); |
| |
| # Check how long ago reservation.lastcheck was updated |
| # Update it occasionally - used by parent reservation in cluster requests to detect that child reservations are still loading |
| # Updating reservation.lastcheck prevents the parent from timing out while waiting for children to finish loading |
| my $update_lastcheck_elapsed = ($current_time - $update_lastcheck_time); |
| if ($update_lastcheck_elapsed >= $update_lastcheck_interval_seconds) { |
| update_reservation_lastcheck($reservation_id); |
| $update_lastcheck_time = time; |
| } |
| } |
| |
| #notify($ERRORS{'DEBUG'}, 0, "sleeping for $monitor_delay_seconds seconds"); |
| sleep $monitor_delay_seconds; |
| } |
| |
| $log->close; |
| |
| # Check if timeout was reached |
| if ($current_time >= $nochange_timeout_time) { |
| notify($ERRORS{'WARNING'}, 0, "failed to load $image_name on $computer_node_name, timed out because no progress was detected for $nochange_timeout_seconds seconds, start of installation detected: " . ($install_started ? 'yes' : 'no')); |
| return; |
| } |
| elsif ($current_time >= $overall_timeout_time) { |
| notify($ERRORS{'CRITICAL'}, 0, "failed to load $image_name on $computer_node_name, timed out because loading took longer than $overall_timeout_minutes minutes, start of installation detected: " . ($install_started ? 'yes' : 'no')); |
| return; |
| } |
| |
| # Call the OS module's post_load() subroutine if implemented |
| insertloadlog($reservation_id, $computer_id, "xcatround3", "initiating OS post-load configuration"); |
| if ($self->os->can("post_load")) { |
| if ($self->os->post_load()) { |
| notify($ERRORS{'OK'}, 0, "performed OS post-load tasks on $computer_node_name"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to perform OS post-load tasks on VM $computer_node_name"); |
| return; |
| } |
| } |
| else { |
| notify($ERRORS{'OK'}, 0, "OS post-load tasks not necessary on $computer_node_name"); |
| } |
| |
| return 1; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 capture |
| |
| Parameters : none |
| Returns : boolean |
| Description : Captures the image which is currently loaded on the computer. |
| |
| =cut |
| |
| sub capture { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| my $image_name = $self->data->get_image_name(); |
| my $computer_node_name = $self->data->get_computer_node_name(); |
| |
| # Get the image repository path |
| my $image_repository_path = $self->get_image_repository_directory_path($image_name); |
| if (!$image_repository_path) { |
| notify($ERRORS{'CRITICAL'}, 0, "xCAT image repository information could not be determined"); |
| return; |
| } |
| my $capture_done_file_path = "$image_repository_path/$image_name.img.capturedone"; |
| my $capture_failed_file_path = "$image_repository_path/$image_name.img.capturefailed"; |
| |
| # Print some preliminary information |
| notify($ERRORS{'OK'}, 0, "attempting to capture image '$image_name' on $computer_node_name"); |
| |
| # Check if pre_capture() subroutine has been implemented by the OS module |
| if ($self->os->can("pre_capture")) { |
| # Call OS pre_capture() - it should perform all OS steps necessary to capture an image |
| # pre_capture() should shut down the computer when it is done |
| if (!$self->os->pre_capture({end_state => 'off'})) { |
| notify($ERRORS{'WARNING'}, 0, "OS module pre_capture() failed"); |
| return; |
| } |
| |
| # The OS module should turn the computer power off |
| # Wait up to 2 minutes for the computer's power status to be off |
| if ($self->_wait_for_off($computer_node_name, 120)) { |
| notify($ERRORS{'OK'}, 0, "computer $computer_node_name power is off"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "$computer_node_name power is still on, turning computer off"); |
| |
| # Attempt to power off computer |
| if ($self->power_off()) { |
| notify($ERRORS{'OK'}, 0, "$computer_node_name was powered off"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to power off $computer_node_name"); |
| return; |
| } |
| } |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "OS module does implement a pre_capture() subroutine"); |
| return; |
| } |
| |
| # Set the xCAT nodetype to the new image for the node |
| $self->_edit_nodetype($computer_node_name, $image_name) || return; |
| |
| # Create the .tmpl file for the image |
| $self->_create_template($image_name) || return; |
| |
| # Edit xCAT's nodelist table to set the correct node groups |
| $self->_edit_nodelist($computer_node_name, $image_name) || return; |
| |
| # Call xCAT's nodeset to configure xCAT to save image on next reboot |
| $self->_nodeset($computer_node_name, 'image') || return; |
| |
| # Power on the node in order to capture the image |
| if (!$self->power_on()) { |
| notify($ERRORS{'WARNING'}, 0, "failed to power on computer before monitoring image capture"); |
| return; |
| } |
| |
| |
| my $nochange_timeout_minutes = 20; |
| my $nochange_timeout_seconds = ($nochange_timeout_minutes * 60); |
| my $monitor_delay_seconds = 30; |
| |
| my $monitor_start_time = time; |
| my $last_change_time = $monitor_start_time; |
| my $nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds); |
| |
| # Sanity check, timeout the monitoring after 4 hours |
| my $overall_timeout_hours = 6; |
| my $overall_timeout_minutes = ($overall_timeout_hours * 60); |
| my $overall_timeout_time = ($monitor_start_time + $overall_timeout_minutes * 60); |
| |
| my $previous_status; |
| my $previous_image_size = 0; |
| my $current_time; |
| MONITOR_CAPTURE: while (($current_time = time) < $nochange_timeout_time && $current_time < $overall_timeout_time) { |
| my $total_elapsed_seconds = ($current_time - $monitor_start_time); |
| my $nochange_elapsed_seconds = ($current_time - $last_change_time); |
| my $nochange_remaining_seconds = ($nochange_timeout_time - $current_time); |
| my $overall_remaining_seconds = ($overall_timeout_time - $current_time); |
| notify($ERRORS{'DEBUG'}, 0, "monitoring capture of $image_name on $computer_node_name:\n" . |
| "seconds since monitor start/until unconditional timeout: $total_elapsed_seconds/$overall_remaining_seconds\n" . |
| "seconds since last change/until no change timeout: $nochange_elapsed_seconds/$nochange_remaining_seconds" |
| ); |
| |
| if ($self->mn_os->file_exists($capture_done_file_path)) { |
| notify($ERRORS{'OK'}, 0, "capture of $image_name on $computer_node_name complete, file exists: $capture_done_file_path"); |
| $self->mn_os->delete_file($capture_done_file_path); |
| last MONITOR_CAPTURE; |
| } |
| elsif ($self->mn_os->file_exists($capture_failed_file_path)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to capture $image_name on $computer_node_name, file exists: $capture_failed_file_path"); |
| $self->mn_os->delete_file($capture_failed_file_path); |
| return; |
| } |
| |
| # Check if the image size has changed |
| my $current_image_size = $self->get_image_size($image_name); |
| if ($current_image_size ne $previous_image_size) { |
| notify($ERRORS{'DEBUG'}, 0, "size of $image_name changed: $previous_image_size --> $current_image_size, reset monitoring timeout to $nochange_timeout_seconds seconds"); |
| |
| # Set previous image size to the current image size |
| $previous_image_size = $current_image_size; |
| |
| $last_change_time = $current_time; |
| $nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds); |
| } |
| else { |
| # Get the current status of the node |
| my $current_status = $self->_nodestat($computer_node_name); |
| # Set previous status to current status if this is the first iteration |
| $previous_status = $current_status if !defined($previous_status); |
| if ($current_status ne $previous_status) { |
| |
| # If the node status changed to 'boot' and the image size > 0, assume image capture complete |
| if ($current_status =~ /boot/ && $current_image_size > 0) { |
| notify($ERRORS{'DEBUG'}, 0, "image capture appears to be complete, node status changed: $previous_status --> $current_status, image size > 0: $current_image_size"); |
| last MONITOR_CAPTURE; |
| } |
| |
| notify($ERRORS{'DEBUG'}, 0, "status of $computer_node_name changed: $previous_status --> $current_status, reset monitoring timeout to $nochange_timeout_seconds seconds"); |
| |
| # Set previous status to the current status |
| $previous_status = $current_status; |
| |
| $last_change_time = $current_time; |
| $nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds); |
| } |
| } |
| |
| notify($ERRORS{'DEBUG'}, 0, "sleeping for $monitor_delay_seconds seconds"); |
| sleep $monitor_delay_seconds; |
| } |
| |
| # Check if timeout was reached |
| if ($current_time >= $nochange_timeout_time) { |
| notify($ERRORS{'WARNING'}, 0, "failed to capture $image_name on $computer_node_name, timed out because no progress was detected for $nochange_timeout_minutes minutes"); |
| return; |
| } |
| elsif ($current_time >= $overall_timeout_time) { |
| notify($ERRORS{'CRITICAL'}, 0, "failed to capture $image_name on $computer_node_name, timed out because capture took longer than $overall_timeout_hours hours"); |
| return; |
| } |
| |
| # Set the permissions on the captured image files |
| $self->mn_os->set_file_permissions("$image_repository_path/$image_name\*", 644, 1); |
| |
| notify($ERRORS{'OK'}, 0, "successfully captured $image_name on $computer_node_name"); |
| return 1; |
| } |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 does_image_exist |
| |
| Parameters : $image_name (optional) |
| Returns : boolean |
| Description : Checks the management node's local image repository for the |
| existence of the requested image and xCAT template (.tmpl) file. |
| If the image files exist but the .tmpl file does not, it creates |
| the .tmpl file. If a .tmpl file exists but the image files do |
| not, it deletetes the orphaned .tmpl file. |
| |
| This subroutine does not attempt to copy the image from another |
| management node. The retrieve_image() subroutine does this. |
| Callers of does_image_exist must also call retrieve_image if |
| image library retrieval functionality is desired. |
| |
| =cut |
| |
| sub does_image_exist { |
| my $self = shift; |
| unless (ref($self) && $self->isa('VCL::Module')) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a VCL::Module module object method"); |
| return; |
| } |
| |
| # Get the image name, first try passed argument, then data |
| my $image_name = shift || $self->data->get_image_name(); |
| if (!$image_name) { |
| notify($ERRORS{'WARNING'}, 0, "unable to determine image name"); |
| return; |
| } |
| |
| # Get the image install type |
| my $image_os_install_type = $self->data->get_image_os_install_type(); |
| if (!$image_os_install_type) { |
| notify($ERRORS{'WARNING'}, 0, "image OS install type could not be determined"); |
| return; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "image OS install type: $image_os_install_type"); |
| } |
| |
| # Get the image repository path |
| my $image_repository_path = $self->get_image_repository_directory_path($image_name); |
| if (!$image_repository_path) { |
| notify($ERRORS{'WARNING'}, 0, "image repository path could not be determined"); |
| return; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "image repository path: $image_repository_path"); |
| } |
| |
| # Run du to get the size of the image files if the image exists |
| my $du_command; |
| if ($image_os_install_type eq 'kickstart') { |
| $du_command = "du -c $image_repository_path 2>&1 | grep total 2>&1" |
| } |
| else { |
| $du_command = "du -c $image_repository_path/*$image_name* 2>&1 | grep total 2>&1" |
| } |
| |
| my ($du_exit_status, $du_output) = $self->mn_os->execute($du_command); |
| |
| # If the partner doesn't have the image, a "no such file" error should be displayed |
| my $image_files_exist; |
| if (!defined($du_output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute command $du_command"); |
| return; |
| } |
| elsif (grep(/no such file/i, @$du_output)) { |
| notify($ERRORS{'OK'}, 0, "$image_name does NOT exist"); |
| $image_files_exist = 0; |
| } |
| elsif (!grep(/\d+\s+total/i, @$du_output)) { |
| notify($ERRORS{'WARNING'}, 0, "du output does not contain a total line:\n" . join("\n", @$du_output)); |
| return; |
| } |
| |
| # Return 1 if the image size > 0 |
| my ($image_size) = (@$du_output[0] =~ /(\d+)\s+total/); |
| if ($image_size && $image_size > 0) { |
| my $image_size_mb = int($image_size / 1024); |
| notify($ERRORS{'DEBUG'}, 0, "$image_name exists in $image_repository_path, size: $image_size_mb MB"); |
| $image_files_exist = 1; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "image does NOT exist: $image_name"); |
| $image_files_exist = 0; |
| } |
| |
| # Image files exist, make sure template (.tmpl) file exists |
| # Get the tmpl repository path |
| my $tmpl_repository_path = $self->_get_tmpl_directory_path($image_name); |
| if (!$tmpl_repository_path) { |
| notify($ERRORS{'WARNING'}, 0, "image template path could not be determined for $image_name"); |
| return; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "template repository path for $image_name: $tmpl_repository_path"); |
| } |
| |
| # Check if template file exists for the image |
| # -s File has nonzero size |
| my $tmpl_file_exists; |
| if (-s "$tmpl_repository_path/$image_name.tmpl") { |
| $tmpl_file_exists = 1; |
| notify($ERRORS{'DEBUG'}, 0, "template file exists: $image_name.tmpl"); |
| } |
| else { |
| $tmpl_file_exists = 0; |
| notify($ERRORS{'DEBUG'}, 0, "template file does not exist: $tmpl_repository_path/$image_name.tmpl"); |
| } |
| |
| # Check if either tmpl file or image files exist, but not both |
| # Attempt to correct the situation: |
| # tmpl file exists but not image files: delete tmpl file |
| # image files exist but not tmpl file: create tmpl file |
| if ($tmpl_file_exists && !$image_files_exist && $image_os_install_type ne 'kickstart') { |
| notify($ERRORS{'WARNING'}, 0, "template file exists but image files do not for $image_name"); |
| |
| # Attempt to delete the orphaned tmpl file for the image |
| if ($self->_delete_template($image_name)) { |
| notify($ERRORS{'OK'}, 0, "deleted orphaned template file for image $image_name"); |
| $tmpl_file_exists = 0; |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to delete orphaned template file for image $image_name, returning undefined"); |
| return; |
| } |
| } ## end if ($tmpl_file_exists && !$image_files_exist) |
| elsif (!$tmpl_file_exists && $image_files_exist && $image_os_install_type ne 'kickstart') { |
| notify($ERRORS{'WARNING'}, 0, "image files exist but template file does not for $image_name"); |
| |
| # Attempt to create the missing tmpl file for the image |
| if ($self->_create_template($image_name)) { |
| notify($ERRORS{'OK'}, 0, "created missing template file for image $image_name"); |
| $tmpl_file_exists = 1; |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to create missing template file for image $image_name, returning undefined"); |
| return; |
| } |
| } ## end elsif (!$tmpl_file_exists && $image_files_exist) [ if ($tmpl_file_exists && !$image_files_exist) |
| |
| # Check if both image files and tmpl file were found and return |
| if ($tmpl_file_exists && $image_files_exist) { |
| notify($ERRORS{'DEBUG'}, 0, "image $image_name exists on this management node"); |
| return 1; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "image $image_name does NOT exist on this management node"); |
| return 0; |
| } |
| |
| } ## end sub does_image_exist |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 get_image_size |
| |
| Parameters : $image_name (optional) |
| Returns : integer |
| Description : Retrieves the image size in megabytes. |
| |
| =cut |
| |
| sub get_image_size { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Either use a passed parameter as the image name or use the one stored in this object's DataStructure |
| my $image_name = shift || $self->data->get_image_name(); |
| if (!$image_name) { |
| notify($ERRORS{'CRITICAL'}, 0, "image name could not be determined"); |
| return; |
| } |
| |
| my $image_repository_path = $self->get_image_repository_directory_path($image_name); |
| if (!$image_repository_path) { |
| notify($ERRORS{'CRITICAL'}, 0, "unable to determine image repository location, returning 0"); |
| return; |
| } |
| |
| # Execute the command |
| my $du_command = "du -c $image_repository_path/$image_name* 2>&1"; |
| #notify($ERRORS{'DEBUG'}, 0, "du command: $du_command"); |
| my $du_output = `$du_command`; |
| |
| # Save the exit status |
| my $du_exit_status = $? >> 8; |
| |
| # Make sure du produced output |
| if (!defined($du_output) || length($du_output) == 0) { |
| notify($ERRORS{'WARNING'}, 0, "du did not product any output, du exit status: $du_exit_status"); |
| return; |
| } |
| |
| # Check if image doesn't exist |
| if ($du_output && $du_output =~ /No such file.*0\s+total/is) { |
| notify($ERRORS{'OK'}, 0, "image does not exist: $image_repository_path/$image_name.*, returning 0"); |
| return 0; |
| } |
| |
| # Check the du command output |
| my ($size_bytes) = $du_output =~ /(\d+)\s+total/s; |
| if (!defined $size_bytes) { |
| notify($ERRORS{'WARNING'}, 0, "du command did not produce expected output, du exit staus: $du_exit_status, output:\n$du_output"); |
| return; |
| } |
| |
| # Calculate the size in MB |
| my $size_mb = int($size_bytes / 1024); |
| notify($ERRORS{'DEBUG'}, 0, "returning image size: $size_mb MB ($size_bytes bytes)"); |
| return $size_mb; |
| |
| } ## end sub get_image_size |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 get_nodetype_image_os_name |
| |
| Parameters : $image_name |
| Returns : string |
| Description : Determines the name of the directory where installation files |
| should reside under the management node's install path. |
| Examples: |
| * image |
| * centos5 |
| * rhels7.2 |
| * ubuntu16.04.1 |
| |
| The path is determined by first checking if a directory exists |
| matching the database values: |
| * managementnode.installpath (ex: /install) |
| * OS.sourcepath (ex: rhel7) |
| * image.architecture (ex: x86_64) |
| |
| Based on these values, the default path will be: |
| /install/rhel7/x86_64 |
| |
| If a directory exactly matching OS.sourcepath cannot be located |
| on the managementnode node, an attempt is made to locate an |
| alternate suitable directory matching the distribution and major |
| version. Example, if OS.sourcepath = 'rhel7' and the default |
| directory does not exist: |
| /install/rhel7/x86_64 |
| |
| Any of the following paths which exist on the management node may |
| be returned: |
| /install/rhel7.1/x86_64 |
| /install/rhels7.2/x86_64 |
| |
| If all of these paths exist, the path with the highest version is |
| returned: |
| rhels7.2 |
| |
| Note: for 'rhel', both 'rhel' and 'rhels' are checked. |
| |
| =cut |
| |
| |
| sub get_nodetype_image_os_name { |
| my $self = shift; |
| unless (ref($self) && $self->isa('VCL::Module')) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a VCL::Module module object method"); |
| return; |
| } |
| |
| # Get the image name argument |
| my $image_name = shift || $self->data->get_image_name(); |
| |
| # Check if path has already been determined |
| if (defined($self->{xcat_image_os_name}{$image_name})) { |
| return $self->{xcat_image_os_name}{$image_name}; |
| } |
| |
| my $management_node_hostname = $self->data->get_management_node_hostname(); |
| my $management_node_install_path = $self->data->get_management_node_install_path() || return; |
| |
| # Create a DataStructure object containing info about the image |
| my $image_data = $self->create_datastructure_object({image_identifier => $image_name}) || return; |
| my $os_install_type = $image_data->get_image_os_install_type() || return; |
| my $os_source_path = $image_data->get_image_os_source_path() || return; |
| my $image_architecture = $image_data->get_image_architecture() || return; |
| |
| if ($os_install_type =~ /image/i) { |
| notify($ERRORS{'DEBUG'}, 0, "OS install type for image $image_name is $os_install_type, returning 'image'"); |
| $self->{xcat_image_os_name}{$image_name} = 'image'; |
| return 'image'; |
| } |
| elsif ($os_install_type !~ /(kickstart|netboot)/) { |
| notify($ERRORS{'WARNING'}, 0, "unable to determine nodetype image OS name for image $image_name, OS install type is not supported: $os_install_type"); |
| return; |
| } |
| |
| # Remove trailing / from $management_node_install_path if exists |
| $management_node_install_path =~ s/\/+$//g; |
| |
| # Remove leading and trailing slashes from $os_source_path if exists |
| $os_source_path =~ s/^\/+//g; |
| $os_source_path =~ s/\/+$//g; |
| |
| notify($ERRORS{'DEBUG'}, 0, "attempting to determine nodetype OS name for image on $management_node_hostname:\n" . |
| "image name : $image_name\n" . |
| "OS install type : $os_install_type\n" . |
| "install path : $management_node_install_path\n" . |
| "OS source path : $os_source_path\n" . |
| "architecture : $image_architecture" |
| ); |
| |
| my $installation_repository_directory_path = "$management_node_install_path/$os_source_path/$image_architecture"; |
| |
| # Check if the default path exists - it's often named something different |
| # xCAT's copycds command will use something like /install/rhels6.6 |
| # OS.sourcepath is probably set to rhel6 |
| # Creating a symlink doesn't work correctly because xCAT fails to parse directory names which don't contain a period correctly |
| if ($self->mn_os->file_exists($installation_repository_directory_path)) { |
| $self->{xcat_image_os_name}{$image_name} = $os_source_path; |
| notify($ERRORS{'DEBUG'}, 0, "default installation repository directory exists: $installation_repository_directory_path, returning '$self->{xcat_image_os_name}{$image_name}'"); |
| return $self->{xcat_image_os_name}{$image_name}; |
| } |
| |
| # Parse the version of the requested OS source path |
| my ($os_distribution_name, $os_version_string, $major_os_version_string) = $os_source_path =~ /^([a-z]+)((\d+)[\d\.]*)$/ig; |
| if (!defined($os_distribution_name) || !defined($os_version_string) || !defined($major_os_version_string)) { |
| $self->{xcat_image_os_name}{$image_name} = $os_source_path; |
| notify($ERRORS{'WARNING'}, 0, "failed to determine nodetype OS name for image $image_name, OS.sourcepath could not be parsed: $os_source_path, returning default path: '$self->{xcat_image_os_name}{$image_name}'"); |
| return $self->{xcat_image_os_name}{$image_name}; |
| } |
| |
| notify($ERRORS{'DEBUG'}, 0, "default installation repository directory path does not exist: $installation_repository_directory_path, attempting to locate another suitable path matching distribution: $os_distribution_name, version: $os_version_string, major version: $major_os_version_string"); |
| |
| # Fix regex for 'rhel' and 'rhels' |
| my $os_distribution_regex = $os_distribution_name; |
| if ($os_distribution_name =~ /rhel/) { |
| $os_distribution_regex = 'rhels?'; |
| } |
| |
| my $highest_version_string; |
| my $highest_version_directory_path; |
| my $highest_version_nodetype_os_name; |
| |
| # Retrieve list of directories under the root management node install path |
| my @check_directory_paths = $self->mn_os->find_files($management_node_install_path, "*", 0, 'd'); |
| for my $check_directory_path (@check_directory_paths) { |
| # Remove trailing slash |
| $check_directory_path =~ s/\/+$//g; |
| |
| next if $check_directory_path eq $management_node_install_path; |
| |
| # Ignore directories that don't contain the Linux OS distribution name |
| if ($check_directory_path !~ /$os_distribution_regex/) { |
| #notify($ERRORS{'DEBUG'}, 0, "ignoring directory: $check_directory_path, it does not match the pattern for the OS distribution: '$os_distribution_regex'"); |
| next; |
| } |
| |
| my ($check_nodetype_os_name) = $check_directory_path =~ /\/([^\/]+)$/; |
| if (!defined($check_nodetype_os_name)) { |
| notify($ERRORS{'WARNING'}, 0, "ignoring directory: $check_directory_path, failed to parse directory name (nodetype OS name)"); |
| next; |
| } |
| |
| # Parse the version and major version from the directory name |
| my ($directory_version_string, $directory_major_version_string) = $check_directory_path =~ /$os_distribution_regex((\d+)[\d\.]*)/; |
| if (!defined($directory_version_string) || !defined($directory_major_version_string)) { |
| notify($ERRORS{'DEBUG'}, 0, "ignoring directory: $check_directory_path, version could not be determined"); |
| next; |
| } |
| |
| # Make sure the major version matches |
| if ($directory_major_version_string ne $major_os_version_string) { |
| notify($ERRORS{'DEBUG'}, 0, "ignoring directory: $check_directory_path, major version $directory_major_version_string does not match requested major version $major_os_version_string"); |
| next; |
| } |
| |
| # Make sure the correct architecture subdirectory exists |
| my $check_installation_repository_directory_path = "$check_directory_path/$image_architecture"; |
| if (!$self->mn_os->file_exists($check_installation_repository_directory_path)) { |
| notify($ERRORS{'DEBUG'}, 0, "ignoring directory: $check_directory_path, '$image_architecture' subdirectory does not exist"); |
| next; |
| } |
| |
| if (!$highest_version_string) { |
| notify($ERRORS{'DEBUG'}, 0, "1st matching directory is possibly an alternate path: $check_installation_repository_directory_path, version: $directory_version_string"); |
| $highest_version_string = $directory_version_string; |
| $highest_version_directory_path = $check_installation_repository_directory_path; |
| $highest_version_nodetype_os_name = $check_nodetype_os_name; |
| next; |
| } |
| |
| # Check if the version isn't less than one previously checked |
| # Use version->declare->numify to correctly compare versions, otherwise 6.9 > 6.10 |
| my $matching_version_numified = version->declare("$directory_version_string")->numify; |
| my $highest_matching_version_numified = version->declare("$highest_version_string")->numify; |
| if ($matching_version_numified <= $highest_matching_version_numified) { |
| notify($ERRORS{'DEBUG'}, 0, "directory ignored, version $directory_version_string ($matching_version_numified) is not higher than $highest_version_string ($highest_matching_version_numified): $check_directory_path"); |
| next; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "directory version $directory_version_string ($matching_version_numified) is greater than $highest_version_string ($highest_matching_version_numified): $check_installation_repository_directory_path"); |
| $highest_version_string = $directory_version_string; |
| $highest_version_directory_path = $check_installation_repository_directory_path; |
| $highest_version_nodetype_os_name = $check_nodetype_os_name; |
| next; |
| } |
| } |
| |
| if ($highest_version_nodetype_os_name) { |
| $self->{xcat_image_os_name}{$image_name} = $highest_version_nodetype_os_name; |
| notify($ERRORS{'OK'}, 0, "located alternate repository directory path on the local management node for kickstart image $image_name: $highest_version_directory_path, returning nodetype OS name: $self->{xcat_image_os_name}{$image_name}"); |
| return $self->{xcat_image_os_name}{$image_name}; |
| } |
| else { |
| $self->{xcat_image_os_name}{$image_name} = $os_source_path; |
| notify($ERRORS{'WARNING'}, 0, "failed to locate repository directory path on the local management node for kickstart image $image_name, returning default nodetype OS name: $self->{xcat_image_os_name}{$image_name}"); |
| return $self->{xcat_image_os_name}{$image_name}; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 get_image_repository_directory_path |
| |
| Parameters : $image_name, $management_node_identifier (optional) |
| Returns : string |
| Description : Determines the path where the image resides on the management |
| node. Examples: |
| Partimage image: /install/image/x86 |
| Kickstart image: /install/centos5/x86_64 |
| |
| =cut |
| |
| sub get_image_repository_directory_path { |
| my $self = shift; |
| unless (ref($self) && $self->isa('VCL::Module')) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a VCL::Module module object method"); |
| return; |
| } |
| |
| # Get the image name argument |
| my $image_name = shift || $self->data->get_image_name(); |
| |
| # Check if a management node identifier argument was passed |
| my $management_node_identifier = shift; |
| my $management_node_hostname; |
| if ($management_node_identifier) { |
| $management_node_hostname = $self->data->get_management_node_hostname($management_node_identifier); |
| if ($management_node_hostname) { |
| notify($ERRORS{'DEBUG'}, 0, "management node identifier argument was specified: $management_node_identifier, hostname: $management_node_hostname"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "management node hostname could not be determined from argument: $management_node_identifier"); |
| return; |
| } |
| } |
| else { |
| $management_node_hostname = $self->data->get_management_node_hostname(); |
| } |
| |
| # Check if path has already been determined |
| if (defined($self->{xcat_image_repository_directory_path}{$image_name}{$management_node_hostname})) { |
| return $self->{xcat_image_repository_directory_path}{$image_name}{$management_node_hostname}; |
| } |
| |
| my $management_node_install_path = $self->data->get_management_node_install_path($management_node_identifier) || return; |
| |
| # Create a DataStructure object containing info about the image |
| my $image_data = $self->create_datastructure_object({image_identifier => $image_name}) || return; |
| my $os_install_type = $image_data->get_image_os_install_type() || return; |
| my $os_source_path = $image_data->get_image_os_source_path() || return; |
| my $image_architecture = $image_data->get_image_architecture() || return; |
| |
| # Remove trailing / from $management_node_install_path if exists |
| $management_node_install_path =~ s/\/+$//; |
| |
| # Remove trailing / from $os_source_path if exists |
| $os_source_path =~ s/\/+$//; |
| |
| notify($ERRORS{'DEBUG'}, 0, "attempting to determine repository path for image on $management_node_hostname:\n" . |
| "install path : $management_node_install_path\n" . |
| "image name : $image_name\n" . |
| "OS install type : $os_install_type\n" . |
| "OS source path : $os_source_path\n" . |
| "architecture : $image_architecture" |
| ); |
| |
| |
| my $image_repository_directory_path; |
| if ($os_source_path =~ /^\//) { |
| # If image OS source path has a leading /, assume it was meant to be absolute |
| $image_repository_directory_path = $os_source_path; |
| } |
| elsif ($os_install_type eq 'kickstart') { |
| my $nodetype_image_os_name = $self->get_nodetype_image_os_name($image_name) || $os_source_path; |
| $image_repository_directory_path = "$management_node_install_path/$nodetype_image_os_name/$image_architecture"; |
| } |
| else { |
| # Partimage |
| $image_repository_directory_path = "$management_node_install_path/$os_source_path/$image_architecture"; |
| } |
| |
| $self->{xcat_image_repository_directory_path}{$image_name}{$management_node_hostname} = $image_repository_directory_path; |
| notify($ERRORS{'DEBUG'}, 0, "determined repository directory path: $self->{xcat_image_repository_directory_path}{$image_name}{$management_node_hostname}"); |
| return $self->{xcat_image_repository_directory_path}{$image_name}{$management_node_hostname}; |
| } ## end sub get_image_repository_directory_path |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 get_image_repository_search_paths |
| |
| Parameters : $management_node_identifier (optional) |
| Returns : array |
| Description : Returns an array containing all of the possible paths where an |
| image may reside on the management node. |
| |
| =cut |
| |
| sub get_image_repository_search_paths { |
| my $self = shift; |
| if (ref($self) !~ /VCL::Module/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| my $management_node_identifier = shift || $self->data->get_management_node_hostname(); |
| my $management_node_install_path = $self->data->get_management_node_install_path($management_node_identifier) || return; |
| my $image_name = $self->data->get_image_name(); |
| my $image_architecture = $self->data->get_image_architecture(); |
| |
| # Remove trailing slash if it exists |
| $management_node_install_path =~ s/[\\\/]+$//; |
| |
| my @repository_search_directory_paths; |
| for my $base_directory_path ($management_node_install_path, '/install') { |
| push @repository_search_directory_paths, $base_directory_path; |
| push @repository_search_directory_paths, "$base_directory_path/image"; |
| push @repository_search_directory_paths, "$base_directory_path/images"; |
| |
| for my $directory_name ($image_architecture, "x86", "x86_64") { |
| push @repository_search_directory_paths, "$base_directory_path/image/$directory_name"; |
| push @repository_search_directory_paths, "$base_directory_path/images/$directory_name"; |
| push @repository_search_directory_paths, "$base_directory_path/$directory_name"; |
| } |
| } |
| |
| my @repository_search_paths; |
| for my $repository_search_directory_path (@repository_search_directory_paths) { |
| push @repository_search_paths, "$repository_search_directory_path/$image_name-*"; |
| push @repository_search_paths, "$repository_search_directory_path/$image_name.*"; |
| } |
| |
| my %seen; |
| @repository_search_paths = grep { !$seen{$_}++ } @repository_search_paths; |
| |
| notify($ERRORS{'DEBUG'}, 0, "repository search paths on $management_node_identifier:\n" . join("\n", @repository_search_paths)); |
| return @repository_search_paths; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 power_reset |
| |
| Parameters : $computer_node_name (optional) |
| Returns : boolean |
| Description : Powers off and then powers on the computer. |
| |
| =cut |
| |
| sub power_reset { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift || $self->data->get_computer_node_name(); |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified and could not be retrieved from \$self->data"); |
| return; |
| } |
| |
| # Turn computer off |
| my $off_attempts = 0; |
| while (!$self->power_off($computer_node_name)) { |
| $off_attempts++; |
| if ($off_attempts == 3) { |
| notify($ERRORS{'WARNING'}, 0, "failed to turn $computer_node_name off, rpower status not is off after 3 attempts"); |
| return; |
| } |
| sleep 2; |
| } |
| |
| # Turn computer on |
| my $on_attempts = 0; |
| while (!$self->power_on($computer_node_name)) { |
| $on_attempts++; |
| if ($on_attempts == 3) { |
| notify($ERRORS{'WARNING'}, 0, "failed to turn $computer_node_name on, rpower status not is on after 3 attempts"); |
| return; |
| } |
| sleep 2; |
| } |
| |
| notify($ERRORS{'OK'}, 0, "successfully reset power on $computer_node_name"); |
| return 1; |
| } ## end sub power_reset |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 power_on |
| |
| Parameters : $computer_node_name (optional) |
| Returns : boolean |
| Description : Powers on the computer then checks to verify the computer is |
| powered on. |
| |
| =cut |
| |
| sub power_on { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift || $self->data->get_computer_node_name(); |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified and could not be retrieved from \$self->data"); |
| return; |
| } |
| |
| # Turn computer on |
| my $on_attempts = 0; |
| my $power_status = 'unknown'; |
| while ($power_status !~ /on/) { |
| $on_attempts++; |
| if ($on_attempts == 3) { |
| notify($ERRORS{'WARNING'}, 0, "failed to turn $computer_node_name on, rpower status not is on after 3 attempts"); |
| return; |
| } |
| $self->_rpower($computer_node_name, 'on'); |
| # Wait up to 1 minute for the computer power status to be on |
| if ($self->_wait_for_on($computer_node_name, 60)) { |
| last; |
| } |
| $power_status = $self->power_status($computer_node_name); |
| } ## end while ($power_status !~ /on/) |
| |
| notify($ERRORS{'OK'}, 0, "successfully powered on $computer_node_name"); |
| return 1; |
| } ## end sub power_on |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 power_off |
| |
| Parameters : $computer_node_name (optional) |
| Returns : boolean |
| Description : Powers off the computer then checks to verify the computer is |
| powered off. |
| |
| =cut |
| |
| sub power_off { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift || $self->data->get_computer_node_name(); |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified and could not be retrieved from \$self->data"); |
| return; |
| } |
| |
| # Turn computer off |
| my $power_status = 'unknown'; |
| my $off_attempts = 0; |
| while ($power_status !~ /off/) { |
| $off_attempts++; |
| if ($off_attempts == 3) { |
| notify($ERRORS{'WARNING'}, 0, "failed to turn $computer_node_name off, rpower status not is off after 3 attempts"); |
| return; |
| } |
| |
| # Attempt to run rpower <node> off |
| $self->_rpower($computer_node_name, 'off'); |
| |
| # Wait up to 1 minute for the computer power status to be off |
| if ($self->_wait_for_off($computer_node_name, 60)) { |
| last; |
| } |
| |
| $power_status = $self->power_status($computer_node_name); |
| if (!defined($power_status)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to powered off $computer_node_name, failed to determine power_status"); |
| return; |
| } |
| } ## end while ($power_status !~ /off/) |
| |
| notify($ERRORS{'OK'}, 0, "successfully powered off $computer_node_name"); |
| return 1; |
| } ## end sub power_off |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 power_status |
| |
| Parameters : $computer_node_name (optional) |
| Returns : string |
| Description : Retrieves the power status of the computer. The return value will |
| either be 'on', 'off', or undefined if an error occurred. |
| |
| =cut |
| |
| sub power_status { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift || $self->data->get_computer_node_name(); |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified and could not be retrieved from \$self->data"); |
| return; |
| } |
| |
| # Call rpower to determine power status |
| my $rpower_stat = $self->_rpower($computer_node_name, 'stat'); |
| if (!defined($rpower_stat)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve power status of $computer_node_name"); |
| return; |
| } |
| elsif ($rpower_stat =~ /^(on|off)$/i) { |
| notify($ERRORS{'DEBUG'}, 0, "retrieved power status of $computer_node_name: $rpower_stat"); |
| return lc($1); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to determine power status, unexpected output returned from rpower: $rpower_stat"); |
| return; |
| } |
| } ## end sub power_status |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _edit_nodelist |
| |
| Parameters : $computer_node_name, $image_name |
| Returns : boolean |
| Description : Edits the nodelist table to assign the xCAT node to the correct |
| groups. For image-based images: all,blade,image. Otherwise, |
| image.project is checked. If image.project = 'vcl', the groups |
| are all,blade,compute. If image.project is something other than |
| 'vcl', the groups are all,blade,<image.project>. |
| |
| =cut |
| |
| sub _edit_nodelist { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| # Get the image name argument |
| my $image_name = shift; |
| if (!$image_name) { |
| notify($ERRORS{'WARNING'}, 0, "image name argument was not specified"); |
| return; |
| } |
| |
| # Create a DataStructure object containing info about the image |
| my $image_data = $self->create_datastructure_object({image_identifier => $image_name}) || return; |
| my $image_os_install_type = $image_data->get_image_os_install_type() || return; |
| my $image_project = $image_data->get_image_project() || return; |
| |
| my $request_state_name = $self->data->get_request_state_name(); |
| |
| # Determine the postscript group name |
| # If image project is 'vcl', postscript group = 'compute' |
| # Otherwise postscript group is the same as the image project |
| # For HPC, use image project = vclhpc. There should be an xCAT postscript group named 'vclhpc' configured with specific HPC postscripts |
| |
| my $groups; |
| if ($request_state_name =~ /(image|checkpoint)/) { |
| # Image-based install or capture |
| $groups = "all,blade,image"; |
| } |
| elsif ($image_project eq "vcl") { |
| $groups = "all,blade,compute"; |
| } |
| else { |
| # Likely a Kickstart based install |
| $groups = "all,blade,$image_project"; |
| } |
| |
| my $command = "$XCAT_ROOT/bin/nodech $computer_node_name nodelist.groups=$groups"; |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute command to set xCAT groups for $computer_node_name"); |
| return; |
| } |
| elsif (grep(/Error/i, @$output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to set xCAT groups for $computer_node_name\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| return; |
| } |
| elsif (grep(/\w/, @$output)) { |
| # nodech normally doesn't produce any output if successful, display a warning if the output is not blank |
| notify($ERRORS{'WARNING'}, 0, "unexpected output encountered attempting to set xCAT groups for $computer_node_name\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| return 1; |
| } |
| else { |
| notify($ERRORS{'OK'}, 0, "set xCAT groups for $computer_node_name, command: '$command'"); |
| return 1; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _edit_nodetype |
| |
| Parameters : $computer_node_name, $image_name |
| Returns : boolean |
| Description : Edits the nodetype table for the computer to set nodetype.os, |
| nodetype.arch, and nodetype.profile to the image. |
| |
| =cut |
| |
| sub _edit_nodetype { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| # Get the image name argument |
| my $image_name = shift; |
| if (!$image_name) { |
| notify($ERRORS{'WARNING'}, 0, "image name argument was not specified"); |
| return; |
| } |
| |
| # Create a DataStructure object containing info about the image |
| my $image_data = $self->create_datastructure_object({image_identifier => $image_name}) || return; |
| |
| my $image_architecture = $image_data->get_image_architecture(); |
| my $image_os_install_type = $image_data->get_image_os_install_type(); |
| my $image_os_name = $image_data->get_image_os_name(); |
| |
| my $request_state_name = $self->data->get_request_state_name(); |
| |
| my $nodetype_os; |
| if ($request_state_name =~ /(image|checkpoint)/ || $image_os_install_type =~ /image/) { |
| $nodetype_os = 'image'; |
| } |
| elsif ($image_os_install_type =~ /kickstart/i) { |
| # Try to dynamically determine the value for nodetype.os |
| $nodetype_os = $self->get_nodetype_image_os_name($image_name); |
| } |
| else { |
| $nodetype_os = $image_os_name; |
| } |
| |
| my $command = "$XCAT_ROOT/bin/nodech $computer_node_name nodetype.os=$nodetype_os nodetype.arch=$image_architecture nodetype.profile=$image_name"; |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute command to edit xCAT configuration of $computer_node_name: $command"); |
| return; |
| } |
| elsif (grep(/Error/i, @$output)) { |
| # If an error occurs the output will look like this: |
| # Error: Invalid nodes and/or groups in noderange: vclh3-00 |
| notify($ERRORS{'WARNING'}, 0, "failed to edit xCAT configuration of $computer_node_name, command: '$command'\noutput:\n" . join("\n", @$output)); |
| return; |
| } |
| elsif (grep(/\w/, @$output)) { |
| # nodech normally doesn't produce any output if successful, display a warning if the output is not blank |
| notify($ERRORS{'WARNING'}, 0, "unexpected output encountered attempting to edit xCAT configuration of $computer_node_name\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| return 1; |
| } |
| else { |
| notify($ERRORS{'OK'}, 0, "edited xCAT configuration of $computer_node_name, command: '$command'"); |
| return 1; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _lsdef |
| |
| Parameters : $computer_node_name |
| Returns : hash reference |
| Description : Runs lsdef to retrieve the xCAT object definition of the node. |
| |
| =cut |
| |
| sub _lsdef { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| my $command = "$XCAT_ROOT/bin/lsdef $computer_node_name"; |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute lsdef command for $computer_node_name"); |
| return; |
| } |
| |
| # Expected output: |
| # Object name: vclh3-4 |
| # arch=x86_64 |
| # cons=blade |
| # currchain=boot |
| # currstate=install centos5-x86_64-centos5-base641008-v0 |
| # installnic=eth0 |
| # kernel=xcat/centos5/x86_64/vmlinuz |
| # mac=xx:xx:xx:xx:xx:xx |
| # ... |
| |
| my $node_info = {}; |
| for my $line (@$output) { |
| my ($property, $value) = $line =~ /^[\s\t]+(\w[^=]+)=(.+)$/; |
| if (defined($property) && defined($value)) { |
| $node_info->{$property} = $value; |
| } |
| } |
| |
| if (grep(/Error:/i, @$output) || !keys(%$node_info)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to run lsdef for $computer_node_name, output:\n" . join("\n", @$output)); |
| return; |
| } |
| |
| notify($ERRORS{'DEBUG'}, 0, "retrieved xCAT object definition for $computer_node_name:\n" . format_data($node_info)); |
| return $node_info; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _nodestat |
| |
| Parameters : $computer_name |
| Returns : string |
| Description : |
| |
| =cut |
| |
| sub _nodestat { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| my $command = "$XCAT_ROOT/bin/nodestat $computer_node_name"; |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute nodestat command for $computer_node_name"); |
| return; |
| } |
| |
| # Expected output: |
| # vclh3-4: installing prep |
| for my $line (@$output) { |
| my ($status) = $line =~ /^$computer_node_name:\s+(.+)$/; |
| if ($status) { |
| notify($ERRORS{'DEBUG'}, 0, "retrieved nodestat status of $computer_node_name: '$status'"); |
| return $status; |
| } |
| } |
| |
| # Line containing node name was not found |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve nodestat status of $computer_node_name\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| return; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _nodeset |
| |
| Parameters : $computer_name, $nodeset_option |
| Returns : boolean or string |
| Description : Runs nodeset to set the boot state of the node. |
| |
| =cut |
| |
| sub _nodeset { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| # Get the nodeset option argument |
| my $nodeset_option = shift; |
| if (!$nodeset_option) { |
| notify($ERRORS{'WARNING'}, 0, "nodeset option argument was not specified"); |
| return; |
| } |
| |
| my $command = "$XCAT_ROOT/sbin/nodeset $computer_node_name $nodeset_option"; |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute nodeset command for $computer_node_name"); |
| return; |
| } |
| elsif (grep(/(Error:|nodeset failure)/, @$output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute nodeset command for $computer_node_name\ncommand: $command\noutput:\n" . join("\n", @$output)); |
| return; |
| } |
| |
| # Expected output: |
| # $ nodeset vclh3-4 boot |
| # vclh3-4: boot |
| # $ nodeset vclh3-4 image |
| # vclh3-4: image image-x86-centos5image-arktest-v0 |
| # Find the line containing the node name |
| for my $line (@$output) { |
| my ($status) = $line =~ /^$computer_node_name:\s+(.+)$/; |
| if ($status) { |
| if ($nodeset_option eq 'stat') { |
| notify($ERRORS{'DEBUG'}, 0, "retrieved nodeset status of $computer_node_name: '$status'"); |
| return $status; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "set nodeset status of $computer_node_name to $nodeset_option, output:\n" . join("\n", @$output)); |
| return 1; |
| } |
| } |
| } |
| |
| # Line containing node name was not found |
| if ($nodeset_option eq 'stat') { |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve nodeset status of $computer_node_name\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to set nodeset status of $computer_node_name to $nodeset_option\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| } |
| return; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _get_nodeset_all_stat_info |
| |
| Parameters : none |
| Returns : hash reference |
| Description : Calls 'nodeset all stat' to retrieve the status of all nodes. A |
| hash is constructed. The keys are the node names. The values are |
| the status. |
| |
| =cut |
| |
| sub _get_nodeset_all_stat_info { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| my $command = "$XCAT_ROOT/sbin/nodeset all stat"; |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute command to retrieve xCAT nodeset status for all nodes"); |
| return; |
| } |
| elsif (grep(/^Error:/i, @$output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve xCAT nodeset status for all nodes\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| return; |
| } |
| |
| my $nodeset_stat_info = {}; |
| for my $line (@$output) { |
| my ($node, $status) = $line =~ /^([^:]+):\s+(.+)$/; |
| if ($node && $status) { |
| $nodeset_stat_info->{$node} = $status; |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "unable to parse nodeset stat output line: '$line'"); |
| } |
| } |
| |
| return $nodeset_stat_info; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _wait_for_on |
| |
| Parameters : $computer_node_name, $total_wait_seconds (optional) |
| Returns : boolean |
| Description : Loops until the computer's power status is 'on'. The default wait |
| time is 1 minute. |
| |
| =cut |
| |
| sub _wait_for_on { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| my $total_wait_seconds = shift || 60; |
| |
| return $self->code_loop_timeout( |
| sub { |
| my $power_status = $self->power_status(@_) || ''; |
| $power_status =~ /on/i ? 1 : 0; |
| }, |
| [$computer_node_name], "waiting for $computer_node_name to power on", $total_wait_seconds, 5 |
| ); |
| } ## end sub _wait_for_on |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _wait_for_off |
| |
| Parameters : $computer_node_name, $total_wait_seconds (optional) |
| Returns : boolean |
| Description : Loops until the computer's power status is 'off'. The default |
| wait time is 1 minute. |
| |
| =cut |
| |
| sub _wait_for_off { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| my $total_wait_seconds = shift || 60; |
| |
| return $self->code_loop_timeout( |
| sub { |
| my $power_status = $self->power_status(@_) || ''; |
| $power_status =~ /off/i ? 1 : 0; |
| }, |
| [$computer_node_name], "waiting for $computer_node_name to power off", $total_wait_seconds, 5 |
| ); |
| } ## end sub _wait_for_off |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _rpower |
| |
| Parameters : $computer_name, $rpower_option |
| Returns : string |
| Description : Controls the power of the node by running the xCAT rpower |
| command. Options: |
| on - Turn power on |
| off - Turn power off |
| stat | state - Return the current power state |
| reset - Send a hardware reset |
| boot - If off, then power on. If on, then hard reset. |
| cycle - Power off, then on |
| |
| Multiple rpower attempts will be attempted if an error is |
| detected. For non-timeout errors, the default number of attempts |
| is 3. This can be overridden if either of the following variables |
| exist in the variable table in the database: |
| xcat|rpower_error_limit|<management node hostname> |
| xcat|rpower_error_limit |
| |
| Timeout errors are counted separately and do not count towards |
| the general error limit. The default number of timeout errors |
| which may be encountered is 5. This can be overridden if either |
| of the following variables exist in the variable table in the |
| database: |
| xcat|timeout_error_limit|<management node hostname> |
| xcat|timeout_error_limit |
| |
| =cut |
| |
| sub _rpower { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| my $rpower_option = shift; |
| if (!$rpower_option) { |
| notify($ERRORS{'WARNING'}, 0, "rpower option argument was not specified"); |
| return; |
| } |
| |
| my $management_node_hostname = $self->data->get_management_node_hostname(); |
| |
| my $command = "$XCAT_ROOT/bin/rpower $computer_node_name $rpower_option"; |
| |
| my $rpower_attempt = 0; |
| my $rpower_error_limit = get_variable("xcat|rpower_error_limit|$management_node_hostname", 0) || get_variable("xcat|rpower_error_limit", 0); |
| if (!$rpower_error_limit || $rpower_error_limit !~ /^\d+$/) { |
| $rpower_error_limit = 3; |
| } |
| |
| my $timeout_error_count = 0; |
| my $timeout_error_limit = get_variable("xcat|timeout_error_limit|$management_node_hostname", 0) || get_variable("xcat|timeout_error_limit", 0); |
| if (!$timeout_error_limit || $timeout_error_limit !~ /^\d+$/) { |
| $timeout_error_limit = 5; |
| } |
| |
| my $rinv_attempted = 0; |
| RPOWER_ATTEMPT: while ($rpower_attempt <= ($rpower_error_limit+$timeout_error_count)) { |
| $rpower_attempt++; |
| |
| if ($rpower_attempt > 1) { |
| # Wait a random amount of time to prevent several cluster reservations from reattempting at the same time |
| my $rpower_attempt_delay = int(rand($rpower_attempt*2))+1; |
| |
| my $notify_string = "attempt $rpower_attempt/$rpower_error_limit"; |
| if ($timeout_error_count) { |
| $notify_string .= "+$timeout_error_count (timeout errors: $timeout_error_count/$timeout_error_limit)"; |
| } |
| $notify_string .= ": waiting $rpower_attempt_delay before issuing rpower $rpower_option command for $computer_node_name"; |
| notify($ERRORS{'DEBUG'}, 0, $notify_string); |
| sleep $rpower_attempt_delay; |
| } |
| |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute rpower command for $computer_node_name"); |
| return; |
| } |
| elsif (grep(/Error: Timeout/, @$output)) { |
| # blade2f3-14: Error: Timeout |
| $timeout_error_count++; |
| if ($timeout_error_count >= $timeout_error_limit) { |
| notify($ERRORS{'WARNING'}, 0, "attempt $rpower_attempt: failed to issue rpower $rpower_option command for $computer_node_name, timeout error limit reached: $timeout_error_count"); |
| return; |
| } |
| else { |
| # Wait a random amount of time to prevent several cluster reservations from reattempting at the same time |
| my $timeout_error_delay = int(rand($timeout_error_count*3))+1; |
| notify($ERRORS{'DEBUG'}, 0, "attempt $rpower_attempt: encountered timeout error $timeout_error_count/$timeout_error_limit"); |
| next RPOWER_ATTEMPT; |
| } |
| } |
| elsif (grep(/Error:/, @$output)) { |
| notify($ERRORS{'WARNING'}, 0, "attempt $rpower_attempt: failed to issue rpower command for $computer_node_name\ncommand: $command\noutput:\n" . join("\n", @$output)); |
| |
| # Attempt to run rinv once if an error was detected, it may fix the following error: |
| # Error: Invalid nodes and/or groups in noderange: bladex |
| if (!$rinv_attempted) { |
| # Attempt to run rinv to fix any inventory problems with the blade |
| notify($ERRORS{'DEBUG'}, 0, "attempt $rpower_attempt: failed to initiate rpower for $computer_node_name, attempting to run rinv"); |
| $self->_rinv($computer_node_name); |
| $rinv_attempted = 1; |
| } |
| |
| next RPOWER_ATTEMPT; |
| } |
| |
| # Expected output: |
| # Invalid node is specified (exit status = 0): |
| # [root@managementnode]# rpower vclb2-8x stat |
| # invalid node, group, or range: vclb2-8x |
| # Successful off (exit status = 0): |
| # [root@managementnode]# rpower vclb2-8 off |
| # vclb2-8: off |
| # Successful reset (exit status = 0): |
| # [root@managementnode test]# rpower vclb2-8 reset |
| # vclb2-8: reset |
| # Successful stat (exit status = 0): |
| # [root@managementnode test]# rpower vclb2-8 stat |
| # vclb2-8: on |
| # Successful cycle (exit status = 0): |
| # [root@managementnode test]# rpower vclb2-8 cycle |
| # vclb2-8: off on |
| |
| # Find the line containing the node name |
| for my $line (@$output) { |
| my ($status) = $line =~ /^$computer_node_name:.*\s([^\s]+)$/; |
| if ($status) { |
| notify($ERRORS{'DEBUG'}, 0, "issued rpower $rpower_option command for $computer_node_name, status line: '$line', returning '$status'"); |
| return $status; |
| } |
| } |
| |
| notify($ERRORS{'WARNING'}, 0, "failed to parse rpower output\ncommand: $command\noutput:\n" . join("\n", @$output)); |
| } |
| |
| notify($ERRORS{'WARNING'}, 0, "failed to issue rpower command for $computer_node_name, made $rpower_attempt attempts"); |
| return; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _rinv |
| |
| Parameters : $computer_name |
| Returns : hash reference |
| Description : Retrieves the hardware inventory of the node. A hash is returned, |
| usually containing the following parameters: |
| { |
| "BIOS" => "1.14 (MJE133AUS 03/13/2009)", |
| "BMC/Mgt processor" => "1.30 (MJBT30A)", |
| "Diagnostics" => "1.03 (MJYT17AUS 03/07/2008)", |
| "MAC Address 1" => "xx:xx:xx:xx:xx:xx", |
| "MAC Address 2" => "yy:yy:yy:yy:yy:yy", |
| "Machine Type/Model" => 7995, |
| "Management Module firmware" => "50 (BPET50P 03/26/2010)", |
| "Serial Number" => "wwwwwww" |
| } |
| |
| =cut |
| |
| sub _rinv { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| my $command = "$XCAT_ROOT/bin/rinv $computer_node_name"; |
| my ($exit_status, $output) = $self->mn_os->execute($command); |
| if (!defined($output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to execute rinv command for $computer_node_name"); |
| return; |
| } |
| elsif (grep(/Error:/, @$output)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to issue rinv command for $computer_node_name\ncommand: $command\noutput:\n" . join("\n", @$output)); |
| return; |
| } |
| |
| # Expected output: |
| # vclh3-4: Machine Type/Model: 7995 |
| # vclh3-4: Serial Number: wwwww |
| # vclh3-4: MAC Address 1: xx:xx:xx:xx:xx:xx |
| # vclh3-4: MAC Address 2: yy:yy:yy:yy:yy:yy |
| # vclh3-4: BIOS: 1.14 (MJE133AUS 03/13/2009) |
| # vclh3-4: Diagnostics: 1.03 (MJYT17AUS 03/07/2008) |
| # vclh3-4: BMC/Mgt processor: 1.30 (MJBT30A) |
| # vclh3-4: Management Module firmware: 50 (BPET50P 03/26/2010) |
| |
| # Find the line containing the node name |
| my $rinv_info; |
| for my $line (@$output) { |
| my ($parameter, $value) = $line =~ /^$computer_node_name:\s+([^:]+):\s+(.+)$/; |
| if (defined($parameter) && defined($value)) { |
| $rinv_info->{$parameter} = $value; |
| } |
| } |
| |
| if ($rinv_info) { |
| notify($ERRORS{'DEBUG'}, 0, "retrieved inventory of $computer_node_name:\n" . format_data($rinv_info)); |
| return $rinv_info; |
| } |
| else { |
| # Line containing node name was not found |
| notify($ERRORS{'WARNING'}, 0, "failed to issue rinv command for $computer_node_name\ncommand: '$command'\noutput:\n" . join("\n", @$output)); |
| return; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _get_tmpl_directory_path |
| |
| Parameters : $image_name, $management_node_identifier (optional) |
| Returns : string |
| Description : Determines the directory where the image template file resides |
| for the image. Example: |
| /opt/xcat/share/xcat/install/rh |
| |
| =cut |
| |
| sub _get_tmpl_directory_path { |
| my $self = shift; |
| unless (ref($self) && $self->isa('VCL::Module')) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a VCL::Module module object method"); |
| return; |
| } |
| |
| # Get the image name argument |
| my $image_name = shift; |
| if (!$image_name) { |
| notify($ERRORS{'WARNING'}, 0, "image name argument was not specified"); |
| return; |
| } |
| |
| # Check if a management node identifier argument was passed |
| my $management_node_identifier = shift; |
| if ($management_node_identifier) { |
| notify($ERRORS{'DEBUG'}, 0, "management node identifier argument was specified: $management_node_identifier"); |
| } |
| |
| # Create a DataStructure object containing info about the image |
| my $image_data = $self->create_datastructure_object({image_identifier => $image_name}) || return; |
| my $image_os_source_path = $image_data->get_image_os_source_path() || return; |
| my $image_os_install_type = $image_data->get_image_os_install_type() || return; |
| |
| # Remove trailing / from $XCAT_ROOT if exists |
| (my $xcat_root = $XCAT_ROOT) =~ s/\/$//; |
| |
| # Remove trailing / from $image_os_source_path if exists |
| $image_os_source_path =~ s/\/$//; |
| |
| # Fix the image OS source path for xCAT 2.x |
| my $xcat2_image_os_source_path = $image_os_source_path; |
| # Remove periods |
| $xcat2_image_os_source_path =~ s/\.//g; |
| # centos5 --> centos |
| $xcat2_image_os_source_path =~ s/\d+$//g; |
| # rhas5 --> rh |
| $xcat2_image_os_source_path =~ s/^rh.*/rh/; |
| # esxi --> esx |
| $xcat2_image_os_source_path =~ s/^esx.*/esx/i; |
| |
| notify($ERRORS{'DEBUG'}, 0, "attempting to determine template path for image: |
| image name: $image_name |
| OS install type: $image_os_install_type |
| OS source path: $image_os_source_path |
| xCAT 2.x OS source path: $xcat2_image_os_source_path |
| "); |
| |
| my $image_template_path = "$xcat_root/share/xcat/install/$xcat2_image_os_source_path"; |
| notify($ERRORS{'DEBUG'}, 0, "returning: $image_template_path"); |
| return $image_template_path; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _create_template |
| |
| Parameters : $image_name |
| Returns : boolean |
| Description : Creates a template file (.tmpl) for the image. |
| |
| =cut |
| |
| sub _create_template { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the image name argument |
| my $image_name = shift; |
| if (!$image_name) { |
| notify($ERRORS{'WARNING'}, 0, "image name argument was not specified"); |
| return; |
| } |
| |
| # Create a DataStructure object containing info about the image |
| my $image_data = $self->create_datastructure_object({image_identifier => $image_name}) || return; |
| my $image_os_name = $image_data->get_image_os_name() || return; |
| my $image_os_type = $image_data->get_image_os_type_name() || return; |
| |
| # Get the image template directory path |
| my $template_directory_path = $self->_get_tmpl_directory_path($image_name); |
| if (!$template_directory_path) { |
| notify($ERRORS{'WARNING'}, 0, "template directory path could not be determined") ; |
| return; |
| } |
| |
| # Determine the base template filename |
| # Find the template file to use, from most specific to least |
| # Try OS-specific: <OS name>.tmpl |
| my $base_template_file_name; |
| if ($self->mn_os->file_exists("$template_directory_path/$image_os_name.tmpl")) { |
| $base_template_file_name = "$image_os_name.tmpl"; |
| notify($ERRORS{'DEBUG'}, 0, "OS specific base image template file found: $template_directory_path/$image_os_name.tmpl"); |
| } |
| elsif ($self->mn_os->file_exists("$template_directory_path/$image_os_type.tmpl")) { |
| $base_template_file_name = "$image_os_type.tmpl"; |
| notify($ERRORS{'DEBUG'}, 0, "OS type specific base image template file found: $template_directory_path/$image_os_type.tmpl"); |
| } |
| elsif ($self->mn_os->file_exists("$template_directory_path/default.tmpl")) { |
| $base_template_file_name = "default.tmpl"; |
| notify($ERRORS{'DEBUG'}, 0, "default base image template file found: $template_directory_path/default.tmpl"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to find suitable base image template file in $template_directory_path"); |
| return; |
| } |
| |
| my $base_template_file_path = "$template_directory_path/$base_template_file_name"; |
| my $image_template_file_path = "$template_directory_path/$image_name.tmpl"; |
| |
| notify($ERRORS{'DEBUG'}, 0, "attempting to create template file for image: $image_name\n" . |
| "base template file: $base_template_file_path\n" . |
| "image template file: $image_template_file_path" |
| ); |
| |
| # Create a copy of the base template file |
| if (!$self->mn_os->copy_file($base_template_file_path, $image_template_file_path)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to create template file: $base_template_file_path --> $image_template_file_path"); |
| return; |
| } |
| |
| my $template_file_size_bytes = $self->mn_os->get_file_size($image_template_file_path); |
| if ($template_file_size_bytes) { |
| notify($ERRORS{'DEBUG'}, 0, "verified image template file exists and is not blank: $image_template_file_path, size: $template_file_size_bytes bytes"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve size of new image template file: $image_template_file_path"); |
| return; |
| } |
| |
| notify($ERRORS{'OK'}, 0, "created image template file: $image_template_file_path"); |
| return 1; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _delete_template |
| |
| Parameters : $image_name |
| Returns : boolean |
| Description : Deletes a template file (.tmpl) for the image. |
| |
| =cut |
| |
| sub _delete_template { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the image name argument |
| my $image_name = shift; |
| if (!$image_name) { |
| notify($ERRORS{'WARNING'}, 0, "image name argument was not specified"); |
| return; |
| } |
| |
| notify($ERRORS{'OK'}, 0, "attempting to delete tmpl file for image: $image_name"); |
| |
| # Get the image template repository path |
| my $tmpl_repository_path = $self->_get_tmpl_directory_path($image_name); |
| if (!$tmpl_repository_path) { |
| notify($ERRORS{'WARNING'}, 0, "xCAT template repository information could not be determined"); |
| return; |
| } |
| |
| # Delete the template file |
| my $rm_output = `/bin/rm -fv $tmpl_repository_path/$image_name.tmpl 2>&1`; |
| my $rm_exit_status = $? >> 8; |
| |
| # Check if $? = -1, this likely means a Perl CHLD signal bug was encountered |
| if ($? == -1) { |
| notify($ERRORS{'OK'}, 0, "\$? is set to $?, setting exit status to 0, Perl bug likely encountered"); |
| $rm_exit_status = 0; |
| } |
| |
| if ($rm_exit_status == 0) { |
| notify($ERRORS{'DEBUG'}, 0, "deleted $tmpl_repository_path/$image_name.tmpl, output:\n$rm_output"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to delete $tmpl_repository_path/$image_name.tmpl, returning undefined, exit status: $rm_exit_status, output:\n$rm_output"); |
| return; |
| } |
| |
| # Make sure template file was deleted |
| # -s File has nonzero size |
| my $tmpl_file_exists; |
| if (-s "$tmpl_repository_path/$image_name.tmpl") { |
| notify($ERRORS{'WARNING'}, 0, "template file should have been deleted but still exists: $tmpl_repository_path/$image_name.tmpl, returning undefined"); |
| return; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "confirmed template file was deleted: $tmpl_repository_path/$image_name.tmpl"); |
| } |
| |
| notify($ERRORS{'OK'}, 0, "successfully deleted template file: $tmpl_repository_path/$image_name.tmpl"); |
| return 1; |
| } ## end sub _delete_template |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _is_throttle_limit_reached |
| |
| Parameters : $throttle_limit |
| Returns : boolean |
| Description : Checks the status of all nodes and counts how many are currently |
| installing or capturing an image (nodeset status is either |
| 'install' or 'image'). The processes running on the management |
| node are then checked to determine if a vcld process is actually |
| running for each of the active nodes reported by nodeset. Nodes |
| only count against the throttle limit if a process is running. |
| |
| =cut |
| |
| sub _is_throttle_limit_reached { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the throttle limit argument |
| my $throttle_limit = shift; |
| if (!defined($throttle_limit)) { |
| notify($ERRORS{'WARNING'}, 0, "throttle limit argument was not supplied"); |
| return; |
| } |
| |
| my $computer_node_name = $self->data->get_computer_node_name(); |
| |
| # Get the nodeset status for all nodes |
| my $nodeset_all_stat_info = $self->_get_nodeset_all_stat_info(); |
| if (!defined($nodeset_all_stat_info)) { |
| notify($ERRORS{'WARNING'}, 0, "unable to determine if throttle limit is reached, failed to retrieve nodeset status of all nodes"); |
| return; |
| } |
| #notify($ERRORS{'DEBUG'}, 0, "retrieved nodeset status of all nodes:\n" . format_data($nodeset_all_stat_info)); |
| |
| my @nodeset_active_nodes; |
| for my $node_name (keys %$nodeset_all_stat_info) { |
| my $node_status = $nodeset_all_stat_info->{$node_name}; |
| |
| # Ignore this computer |
| if ($node_name eq $computer_node_name) { |
| next; |
| } |
| |
| if ($node_status =~ /^(install|image)/i) { |
| push @nodeset_active_nodes, $node_name; |
| } |
| } |
| |
| # Check if throttle limit has been reached according to nodeset |
| my $nodeset_active_node_count = scalar(@nodeset_active_nodes); |
| if ($nodeset_active_node_count < $throttle_limit) { |
| notify($ERRORS{'DEBUG'}, 0, "throttle limit has NOT been reached according to nodeset:\nnodes currently being installed or captured: $nodeset_active_node_count\nthrottle limit: $throttle_limit"); |
| return 0; |
| } |
| |
| # nodeset reports that the throttle limit has been reached |
| # This doesn't necessarily mean all those nodes are really being installed or captured |
| # If problems occur, a vcld process may die and leave nodes in the install or image state |
| # Verify that a running process exists for each node |
| notify($ERRORS{'DEBUG'}, 0, "throttle limit has been reached according to nodestat:\nnodes currently being installed or captured: $nodeset_active_node_count\nthrottle limit: $throttle_limit"); |
| |
| # Get the list of all vcld processes running on the management node |
| my $process_identifier = $PROCESSNAME; |
| if ($PROCESSNAME ne 'vcld') { |
| $process_identifier .= "|vcld"; |
| } |
| my $vcld_processes = is_management_node_process_running($process_identifier); |
| if (!$vcld_processes) { |
| notify($ERRORS{'WARNING'}, 0, "unable to determine if nodes are actively being loaded or captured, failed to retrieve names of any running vcld processes"); |
| return; |
| } |
| |
| my @vcld_process_names = values(%$vcld_processes); |
| notify($ERRORS{'DEBUG'}, 0, "vcld process names:\n" . join("\n", @vcld_process_names)); |
| |
| my $active_process_node_count = 0; |
| for my $node_name (sort { $a cmp $b } @nodeset_active_nodes) { |
| my $nodeset_status = $nodeset_all_stat_info->{$node_name}; |
| |
| my @node_process_names = grep(/\s$node_name\s/, @vcld_process_names); |
| my $node_process_count = scalar(@node_process_names); |
| if (!$node_process_count) { |
| #notify($ERRORS{'DEBUG'}, 0, "ignoring $node_name from throttle limit consideration, nodeset status is '$nodeset_status' but running vcld process NOT detected"); |
| } |
| elsif ($node_process_count == 1) { |
| notify($ERRORS{'DEBUG'}, 0, "including $node_name in throttle limit consideration, nodeset status is '$nodeset_status' and 1 running vcld process detected: " . $node_process_names[0]); |
| $active_process_node_count++; |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "including $node_name in throttle limit consideration, nodeset status is '$nodeset_status', multiple running vcld processes detected: $node_process_count\n" . join("\n", @node_process_names)); |
| $active_process_node_count++; |
| } |
| } |
| |
| if ($active_process_node_count < $throttle_limit) { |
| notify($ERRORS{'DEBUG'}, 0, "throttle limit has NOT been reached according to number of processes running:\nnodes currently being installed or captured: $active_process_node_count\nthrottle limit: $throttle_limit"); |
| return 0; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "throttle limit has been reached according to number of processes running:\nnodes currently being installed or captured: $active_process_node_count\nthrottle limit: $throttle_limit"); |
| return 1; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 _get_install_status |
| |
| Parameters : $computer_node_name |
| Returns : string |
| Description : Attempts to connect to TCP port 3001 on a node to retrieve the |
| installation status. This is done to overcome a problem which |
| occurs if the node is responding to SSH while it is being |
| installed and nodestat returns 'sshd' instead of the more |
| detailed status. |
| |
| =cut |
| |
| sub _get_install_status { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| # Get the computer name argument |
| my $computer_node_name = shift; |
| if (!$computer_node_name) { |
| notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified"); |
| return; |
| } |
| |
| my $protocol = 'tcp'; |
| my $port = 3001; |
| |
| my $socket; |
| if (!socket($socket, PF_INET, SOCK_STREAM, getprotobyname($protocol))) { |
| return; |
| } |
| |
| my $host_by_name = gethostbyname($computer_node_name); |
| my $sockaddr_in = sockaddr_in($port, $host_by_name); |
| if (!connect($socket, $sockaddr_in)) { |
| return; |
| } |
| |
| print $socket "stat \n"; |
| $socket->flush; |
| |
| my $status; |
| while (<$socket>) { |
| $status .= $_; |
| } |
| close($socket); |
| |
| if ($status =~ /\w/) { |
| notify($ERRORS{'DEBUG'}, 0, "retrieved install status from $computer_node_name: '$status'"); |
| return $status; |
| } |
| else { |
| return; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 check_image_os |
| |
| Parameters : none |
| Returns : boolean |
| Description : For image captures, checks the OS in the VCL database of the |
| image to be captured. If capturing a Kickstart-based image, the |
| image OS needs to be changed to from the Kickstart OS entry to |
| the corresponding image OS entry. |
| |
| =cut |
| |
| sub check_image_os { |
| my $self = shift; |
| if (ref($self) !~ /xCAT/i) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method"); |
| return; |
| } |
| |
| my $request_state_name = $self->data->get_request_state_name(); |
| my $image_id = $self->data->get_image_id(); |
| my $image_name = $self->data->get_image_name(); |
| my $image_os_name = $self->data->get_image_os_name(); |
| my $imagerevision_id = $self->data->get_imagerevision_id(); |
| my $image_architecture = $self->data->get_image_architecture(); |
| |
| my $image_os_name_new; |
| if ($image_os_name =~ /^(rh)el[s]?([0-9])/ || $image_os_name =~ /^rh(fc)([0-9])/) { |
| # Change rhelX --> rhXimage, rhfcX --> fcXimage |
| $image_os_name_new = "$1$2image"; |
| } |
| elsif ($image_os_name =~ /^(centos)([0-9])/) { |
| # Change rhelX --> rhXimage, rhfcX --> fcXimage |
| $image_os_name_new = "$1$2image"; |
| } |
| elsif ($image_os_name =~ /^(fedora)([0-9])/) { |
| # Change fedoraX --> fcXimage |
| $image_os_name_new = "fc$1image" |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "no corrections need to be made to image OS: $image_os_name"); |
| return 1; |
| } |
| |
| # Change the image name |
| $image_name =~ /^[^-]+-(.*)/; |
| my $image_name_new = "$image_os_name_new-$1"; |
| |
| my $new_architecture = $image_architecture; |
| if ($image_architecture eq "x86_64" ) { |
| $new_architecture = "x86"; |
| } |
| |
| notify($ERRORS{'OK'}, 0, "Kickstart image OS needs to be changed: $image_os_name -> $image_os_name_new, image name: $image_name -> $image_name_new"); |
| |
| # Update the image table, change the OS for this image |
| my $sql_statement = <<EOF; |
| UPDATE |
| OS, |
| image, |
| imagerevision |
| SET |
| image.OSid = OS.id, |
| image.architecture = '$new_architecture', |
| image.name = '$image_name_new', |
| imagerevision.imagename = '$image_name_new' |
| WHERE |
| image.id = $image_id |
| AND imagerevision.id = $imagerevision_id |
| AND OS.name = '$image_os_name_new' |
| EOF |
| |
| # Update the image and imagerevision tables |
| if (database_execute($sql_statement)) { |
| notify($ERRORS{'OK'}, 0, "image ($image_id) and imagerevision ($imagerevision_id) tables updated: $image_name -> $image_name_new"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to update image and imagerevision tables: $image_name -> $image_name_new, returning 0"); |
| return 0; |
| } |
| |
| if (!$self->data->refresh()) { |
| notify($ERRORS{'WARNING'}, 0, "failed to update DataStructure updated correcting image OS"); |
| return 0; |
| } |
| |
| return 1; |
| } ## end sub check_image_os |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 DESTROY |
| |
| Parameters : none |
| Returns : nothing |
| Description : Destroys the xCAT.pm module and resets node to the boot state. |
| |
| =cut |
| |
| sub DESTROY { |
| my $self = shift; |
| if (!defined($self)) { |
| notify($ERRORS{'DEBUG'}, 0, "skipping xCAT DESTROY tasks, \$self is not defined"); |
| return; |
| } |
| |
| my $address = sprintf('%x', $self); |
| my $type = ref($self); |
| notify($ERRORS{'DEBUG'}, 0, "destroying $type object, address: $address"); |
| |
| if (!$self->data(0)) { |
| notify($ERRORS{'DEBUG'}, 0, "skipping xCAT DESTROY tasks, \$self->data is not defined"); |
| } |
| elsif (!$self->mn_os(0)) { |
| notify($ERRORS{'DEBUG'}, 0, "skipping xCAT DESTROY tasks, \$self->mn_os is not defined"); |
| } |
| else { |
| my $node = $self->data->get_computer_node_name(0); |
| my $request_state_name = $self->data->get_request_state_name(0); |
| |
| if (!defined($node) || !defined($request_state_name)) { |
| notify($ERRORS{'DEBUG'}, 0, "skipping xCAT DESTROY tasks, unable to retrieve node name and request state name from DataStructure"); |
| } |
| elsif ($request_state_name =~ /^(new|reload|image|checkpoint)$/) { |
| notify($ERRORS{'DEBUG'}, 0, "request state is '$request_state_name', attempting to set nodeset state of $node to 'boot'"); |
| $self->_nodeset($node, 'boot'); |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "request state is '$request_state_name', skipping setting nodeset state of $node to 'boot'"); |
| } |
| } |
| |
| # Check for an overridden destructor |
| $self->SUPER::DESTROY if $self->can("SUPER::DESTROY"); |
| } ## end sub DESTROY |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| initialize() if (!$XCAT_ROOT); |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| 1; |
| __END__ |
| |
| =head1 SEE ALSO |
| |
| L<http://cwiki.apache.org/VCL/> |
| |
| =cut |