| #!/usr/bin/perl -w |
| ############################################################################### |
| # $Id$ |
| ############################################################################### |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| ############################################################################### |
| |
| =head1 NAME |
| |
| VCL::Core::State - VCL state base module |
| |
| =head1 SYNOPSIS |
| |
| use base qw(VCL::Module::State); |
| |
| =head1 DESCRIPTION |
| |
| This is the base module for all of the state objects which are instantiated by |
| vcld (new.pm, reserved.pm, etc). |
| |
| =cut |
| |
| ############################################################################### |
| package VCL::Module::State; |
| |
| # Specify the lib path using FindBin |
| use FindBin; |
| use lib "$FindBin::Bin/../.."; |
| |
| # Configure inheritance |
| use base qw(VCL::Module); |
| |
| # Specify the version of this module |
| our $VERSION = '2.5.1'; |
| |
| # Specify the version of Perl to use |
| use 5.008000; |
| |
| use strict; |
| use warnings; |
| use diagnostics; |
| use English '-no_match_vars'; |
| use POSIX qw(floor); |
| |
| use VCL::utils; |
| use VCL::DataStructure; |
| |
| ############################################################################### |
| |
| =head1 OBJECT METHODS |
| |
| =cut |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 initialize |
| |
| Parameters : none |
| Returns : boolean |
| Description : Prepares VCL::Module::State objects to process a reservation. |
| - Renames the process |
| - Updates reservation.lastcheck |
| - Creates OS, management node OS, VM host OS (conditional), and |
| provisioner objects |
| - If this is a cluster request parent reservation, waits for |
| child reservations to begin |
| - Updates request.state to 'pending' |
| |
| =cut |
| |
| sub initialize { |
| my $self = shift; |
| notify($ERRORS{'DEBUG'}, 0, "initializing VCL::Module::State object"); |
| |
| $self->{start_time} = time; |
| |
| my $request_id = $self->data->get_request_id(); |
| my $reservation_id = $self->data->get_reservation_id(); |
| my $request_state_name = $self->data->get_request_state_name(); |
| my $computer_id = $self->data->get_computer_id(); |
| my $is_vm = $self->data->get_computer_vmhost_id(0); |
| my $is_parent_reservation = $self->data->is_parent_reservation(); |
| my $reservation_count = $self->data->get_reservation_count(); |
| my $nathost_id = $self->data->get_nathost_id(0); |
| |
| # Initialize the database handle count |
| $ENV->{dbh_count} = 0; |
| |
| # Attempt to get a database handle |
| if ($ENV->{dbh} = getnewdbh()) { |
| notify($ERRORS{'DEBUG'}, 0, "obtained a database handle for this state process, stored as \$ENV->{dbh}"); |
| } |
| else { |
| notify($ERRORS{'CRITICAL'}, 0, "unable to obtain a database handle for this state process"); |
| return; |
| } |
| |
| # Update reservation lastcheck value to prevent processes from being forked over and over if a problem occurs |
| my $reservation_lastcheck = update_reservation_lastcheck($reservation_id); |
| if ($reservation_lastcheck) { |
| $self->data->set_reservation_lastcheck_time($reservation_lastcheck); |
| } |
| |
| # If this is a cluster request, wait for all reservations to begin before proceeding |
| if ($reservation_count > 1) { |
| if (!$self->wait_for_all_reservations_to_begin('begin', 300, 5)) { |
| $self->reservation_failed("failed to detect start of processing for all reservation processes", 'available'); |
| } |
| } |
| |
| # Parent reservation needs to update the request state to pending |
| if ($is_parent_reservation) { |
| if ($reservation_count > 1) { |
| # Check if any reservations have failed |
| if (my @failed_reservation_ids = $self->does_loadstate_exist_any_reservation('failed')) { |
| notify($ERRORS{'WARNING'}, 0, "reservations failed: " . join(', ', @failed_reservation_ids)); |
| $self->state_exit('failed'); |
| } |
| } |
| |
| # Update the request state to pending for this reservation |
| if (!update_request_state($request_id, "pending", $request_state_name)) { |
| # Check if request was deleted |
| if (is_request_deleted($request_id)) { |
| exit; |
| } |
| |
| # Check the current state |
| my ($current_request_state, $current_request_laststate) = get_request_current_state_name($request_id); |
| if (!$current_request_state) { |
| # Request probably complete and already removed |
| notify($ERRORS{'DEBUG'}, 0, "current request state could not be retrieved, it was probably completed by another vcld process"); |
| exit; |
| } |
| if ($current_request_state =~ /^(deleted|complete)$/ || $current_request_laststate =~ /^(deleted)$/) { |
| notify($ERRORS{'DEBUG'}, 0, "current request state: $current_request_state/$current_request_laststate, exiting"); |
| exit; |
| } |
| |
| $self->reservation_failed("failed to update request state to pending"); |
| } |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "child reservation, not updating request state to 'pending'"); |
| } |
| |
| # Set the PID and PPID in the DataStructure |
| # These will be wrong if set in get_request_info before the state process is forked |
| $self->data->set_process_pid($PID); |
| $self->data->set_process_ppid(getppid() || '<unknown>'); |
| |
| # Create an OS object |
| if (my $os = $self->create_os_object()) { |
| $self->set_os($os); |
| } |
| else { |
| $self->reservation_failed("failed to create OS object"); |
| } |
| |
| # Set the os under mn_os to the OS object for the computer being loaded |
| # This allows the $self->mn_os object to call $self->os to retrieve the OS object for the computer being loaded |
| # This is useful because the DataStructure object changes when mn_os is created and it would otherwise not have access to the original data |
| if ($self->mn_os()) { |
| $self->mn_os->set_os($self->os); |
| } |
| |
| # Create a VM host OS object if vmhostid is set for the computer |
| my $vmhost_os; |
| if ($is_vm) { |
| $vmhost_os = $self->create_vmhost_os_object(); |
| if (!$vmhost_os) { |
| $self->reservation_failed("failed to create VM host OS object"); |
| } |
| $self->set_vmhost_os($vmhost_os); |
| } |
| |
| # Create a NAT host OS object if computer is mapped to a NAT host |
| my $nathost_os; |
| if ($nathost_id) { |
| $nathost_os = $self->create_nathost_os_object(); |
| if (!$nathost_os) { |
| $self->reservation_failed("failed to create NAT host OS object"); |
| } |
| $self->set_nathost_os($nathost_os); |
| |
| # Allow the OS object to access the nathost_os object |
| # This is necessary to allow the OS code to call the subroutines to forward ports |
| $self->os->set_nathost_os($self->nathost_os()); |
| |
| # Allow the NAT host OS object to access the OS object |
| # This allows the NAT host OS object to retrieve info about the computer being loaded |
| $nathost_os->set_os($self->os()); |
| } |
| |
| # Create a provisioning object |
| if (my $provisioner = $self->create_provisioning_object()) { |
| $self->set_provisioner($provisioner); |
| |
| # Allow the provisioning object to access the OS object |
| $self->provisioner->set_os($self->os()); |
| |
| # Allow the OS object to access the provisioning object |
| # This is necessary to allow the OS code to be able to call the provisioning power* subroutines if the OS reboot or shutdown fails |
| $self->os->set_provisioner($self->provisioner()); |
| } |
| else { |
| $self->reservation_failed("failed to create provisioning object"); |
| } |
| |
| # Create a VM host OS object if vmhostid is set for the computer |
| if ($is_vm) { |
| # Check if provisioning object already has a VM host OS object |
| my $provisioner_vmhost_os = $self->provisioner->vmhost_os(0); |
| |
| if (ref($provisioner_vmhost_os) ne ref($vmhost_os)) { |
| $self->set_vmhost_os($provisioner_vmhost_os); |
| } |
| } |
| |
| return 1; |
| } ## end sub initialize |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 user_connected |
| |
| Parameters : none |
| Returns : boolean |
| Description : Checks if the user is connected to the computer. If the user |
| isn't connected and this is a cluster request, checks if a |
| computerloadlog 'connected' entry exists for any of the other |
| reservations in cluster. |
| |
| =cut |
| |
| sub user_connected { |
| my $self = shift; |
| if (ref($self) !~ /VCL::/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object"); |
| return; |
| } |
| |
| my $request_id = $self->data->get_request_id(); |
| my @reservation_ids = $self->data->get_reservation_ids(); |
| my $reservation_id = $self->data->get_reservation_id(); |
| my $reservation_lastcheck = $self->data->get_reservation_lastcheck_time(); |
| my $reservation_count = $self->data->get_request_reservation_count(); |
| my $computer_id = $self->data->get_computer_id(); |
| my $computer_short_name = $self->data->get_computer_short_name(); |
| my $server_request_id = $self->data->get_server_request_id(); |
| my $request_duration_epoch_secs = $self->data->get_request_duration_epoch(); |
| my $request_duration_hrs = floor($request_duration_epoch_secs / 60 / 60); |
| my $ignore_connections_gte_min = $self->os->get_timings('ignore_connections_gte'); |
| my $ignore_connections_gte = floor($ignore_connections_gte_min / 60); |
| |
| # Check if the request state changed for any reason |
| # This will occur if the user deletes the request, makeproduction is initiated, reboot is initiated, image capture is started |
| if ($self->request_state_changed()) { |
| $self->state_exit(); |
| } |
| |
| # Check if this is a server request, causes process to exit if server request |
| if ($server_request_id) { |
| notify($ERRORS{'DEBUG'}, 0, "server reservation detected, set as user is connected"); |
| insertloadlog($reservation_id, $computer_id, "connected", "user connected to $computer_short_name"); |
| return 1; |
| } |
| |
| # If duration is >= 24 hrs set as connected and return |
| if ($request_duration_hrs >= $ignore_connections_gte) { |
| notify($ERRORS{'OK'}, 0, "reservation duration is $request_duration_hrs hrs is >= to ignore_connections setting $ignore_connections_gte hrs, skipping inuse checks"); |
| insertloadlog($reservation_id, $computer_id, "connected", "user connected to $computer_short_name"); |
| return 1; |
| } |
| |
| # Check if the user has connected to the reservation being processed |
| if ($self->os->is_user_connected()) { |
| insertloadlog($reservation_id, $computer_id, "connected", "user connected to $computer_short_name"); |
| |
| # If this is a cluster request, update the lastcheck value for all reservations |
| # This signals the other reservation inuse processes that a connection was detected on another computer |
| if ($reservation_count > 1) { |
| update_reservation_lastcheck(@reservation_ids); |
| } |
| return 1; |
| } |
| |
| if ($reservation_count > 1) { |
| my $current_reservation_lastcheck = get_current_reservation_lastcheck($reservation_id); |
| if ($current_reservation_lastcheck ne $reservation_lastcheck) { |
| notify($ERRORS{'DEBUG'}, 0, "user connected to another computer in the cluster, reservation.lastcheck updated since this process began: $reservation_lastcheck --> $current_reservation_lastcheck"); |
| return 1; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "no connection to another computer in the cluster detected, reservation.lastcheck has not been updated since this process began: $reservation_lastcheck"); |
| } |
| } |
| |
| return 0; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 request_state_changed |
| |
| Parameters : none |
| Returns : boolean |
| Description : Returns true if the neither current request state changed after |
| the process began, including: |
| * Request deleted |
| * Request deleted and makeproduction initiated |
| * Image capture initiated |
| * Checkpoint capture initiated |
| * Reboot initiated |
| |
| =cut |
| |
| sub request_state_changed { |
| my $self = shift; |
| if (ref($self) !~ /VCL::/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method, reservation failure tasks not attempted, process exiting"); |
| exit 1; |
| } |
| |
| my $request_id = $self->data->get_request_id(); |
| my $processing_request_state_name = $self->data->get_request_state_name(); |
| |
| my ($current_state_name, $current_laststate_name) = get_request_current_state_name($request_id); |
| if (!$current_state_name || !$current_laststate_name) { |
| notify($ERRORS{'WARNING'}, 0, "request $request_id state data could not be retrieved, assuming request is deleted and was removed from the database, returning true"); |
| return 1; |
| } |
| elsif (($current_state_name ne 'pending' and $current_state_name ne $processing_request_state_name) || |
| ($current_state_name eq 'pending' and $current_laststate_name ne $processing_request_state_name)) { |
| notify($ERRORS{'OK'}, 0, "request state changed after this process began: $processing_request_state_name --> $current_state_name/$current_laststate_name, returning true"); |
| return 1; |
| } |
| else { |
| #notify($ERRORS{'DEBUG'}, 0, "request state has NOT changed after this process began: $processing_request_state_name --> $current_state_name/$current_laststate_name, returning false"); |
| return 0; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 reservation_failed |
| |
| Parameters : $message |
| Returns : exits |
| Description : Performs the steps required when a reservation fails: |
| - Checks if request was deleted, if so: |
| - Sets computer.state to 'available' |
| - Exits with status 0 |
| - Inserts 'failed' computerloadlog table entry |
| - Updates log.ending to 'failed' |
| - Updates computer.state to 'failed' |
| - Updates request.state to 'failed', laststate to request's |
| previous state |
| - Removes computer from blockcomputers table if this is a block |
| request |
| - Exits with status 1 |
| |
| =cut |
| |
| sub reservation_failed { |
| my $self = shift; |
| if (ref($self) !~ /VCL::/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method, reservation failure tasks not attempted, process exiting"); |
| exit 1; |
| } |
| |
| # Check if a message was passed as an argument |
| my $message = shift; |
| if (!$message) { |
| $message = 'reservation failed'; |
| } |
| |
| # Check if computer needs to be marked as failed |
| my $computer_input_state = shift; |
| if (!$computer_input_state) { |
| $computer_input_state = 0; |
| } |
| |
| # Get the required data |
| my $request_id = $self->data->get_request_id(); |
| my $reservation_id = $self->data->get_reservation_id(); |
| my $computer_id = $self->data->get_computer_id(); |
| my $computer_short_name = $self->data->get_computer_short_name(); |
| my $request_state_name = $self->data->get_request_state_name(); |
| my $request_laststate_name = $self->data->get_request_laststate_name(); |
| my $computer_state_name = $self->data->get_computer_state_name(); |
| |
| # Determine if the failure occurred during initialization |
| my $calling_subroutine = get_calling_subroutine(); |
| my $initialize_failed = 0; |
| if ($calling_subroutine =~ /initialize/) { |
| $initialize_failed = 1; |
| } |
| |
| # Check if the request has been deleted |
| # Ignore if this process's state is deleted |
| # If a 'deleted' request fails during initialization and before the request state was changed to 'pending', vcld will try to process over and over again |
| if ($request_state_name ne 'deleted' && is_request_deleted($request_id)) { |
| notify($ERRORS{'OK'}, 0, "request has been deleted, setting computer state to available and exiting"); |
| |
| # Update the computer state to available |
| if ($computer_state_name !~ /^(maintenance)/) { |
| if (update_computer_state($computer_id, "available")) { |
| notify($ERRORS{'OK'}, 0, "$computer_short_name ($computer_id) state set to 'available'"); |
| } |
| else { |
| notify($ERRORS{'OK'}, 0, "failed to set $computer_short_name ($computer_id) state to 'available'"); |
| } |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "computer $computer_short_name ($computer_id) state NOT set to available because the current state is $computer_state_name"); |
| } |
| |
| notify($ERRORS{'OK'}, 0, "exiting 0"); |
| exit 0; |
| } ## end if (is_request_deleted($request_id)) |
| |
| |
| my $new_request_state_name; |
| my $new_computer_state_name; |
| my $request_log_ending; |
| |
| if ($request_state_name =~ /(inuse|reboot|server)/) { |
| # Check if the request end time has not been reached |
| my $request_end_time_epoch = convert_to_epoch_seconds($self->data->get_request_end_time()); |
| my $current_time_epoch = time; |
| if ($request_end_time_epoch <= $current_time_epoch) { |
| # If the end has been reached, set the request state to complete and the computer state to failed |
| # This was likely caused by this process failing to initialize all of its module objects |
| $new_request_state_name = 'complete'; |
| $new_computer_state_name = 'failed'; |
| $request_log_ending = 'EOR'; |
| notify($ERRORS{'CRITICAL'}, 0, ($initialize_failed ? 'process failed to initialize: ' : '') . "$message, request end time has been reached, setting request state to $new_request_state_name, computer state to $new_computer_state_name"); |
| } |
| else { |
| # End time has not been reached, never set inuse requests to failed, set the state back to inuse |
| notify($ERRORS{'WARNING'}, 0, ($initialize_failed ? 'process failed to initialize: ' : '') . "$message, setting request and computer states back to 'inuse'"); |
| $self->state_exit('inuse', 'inuse'); |
| } |
| } |
| else { |
| # Display the message |
| notify($ERRORS{'CRITICAL'}, 0, "reservation failed on $computer_short_name" . ($initialize_failed ? ', process failed to initialize' : '') . ": $message"); |
| |
| if ($request_state_name =~ /(image|checkpoint)/) { |
| $new_request_state_name = 'maintenance'; |
| $new_computer_state_name = 'maintenance'; |
| } |
| elsif ($request_state_name eq 'deleted') { |
| $new_request_state_name = 'complete'; |
| $new_computer_state_name = 'failed'; |
| } |
| elsif ($computer_input_state) { |
| $new_request_state_name = 'failed'; |
| $new_computer_state_name = $computer_input_state; |
| } |
| else { |
| $new_request_state_name = 'failed'; |
| $new_computer_state_name = 'failed'; |
| } |
| } |
| |
| if ($request_state_name =~ /^(new|reserved)/) { |
| # Update log table ending column to failed for this request |
| $request_log_ending = 'failed'; |
| } |
| |
| # Insert a row into the computerloadlog table |
| if (insertloadlog($reservation_id, $computer_id, "failed", $message)) { |
| notify($ERRORS{'OK'}, 0, "inserted computerloadlog 'failed' entry for reservation $reservation_id"); |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "failed to insert computerloadlog entry"); |
| } |
| |
| # Check if computer is part of a blockrequest, if so pull out of blockcomputers table |
| if (is_inblockrequest($computer_id)) { |
| notify($ERRORS{'OK'}, 0, "$computer_short_name in blockcomputers table"); |
| if (clearfromblockrequest($computer_id)) { |
| notify($ERRORS{'OK'}, 0, "removed $computer_short_name from blockcomputers table"); |
| } |
| else { |
| notify($ERRORS{'CRITICAL'}, 0, "failed to remove $computer_short_name from blockcomputers table"); |
| } |
| } |
| else { |
| notify($ERRORS{'OK'}, 0, "$computer_short_name is NOT in blockcomputers table"); |
| } |
| |
| $self->state_exit($new_request_state_name, $new_computer_state_name, $request_log_ending); |
| } ## end sub reservation_failed |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 does_loadstate_exist_all_reservations |
| |
| Parameters : $loadstate_name, $ignore_current_reservation (optional) |
| Returns : boolean |
| Description : Checks the computerloadlog entries for all reservations belonging |
| to the request. True is returned if an entry matching the |
| $loadstate_name argument exists for all reservations. The |
| $ignore_current_reservation argument may be used to check all |
| reservations other than the one currently being processed. This |
| may be used by a parent reservation to determine when all child |
| reservations have begun to be processed. |
| |
| =cut |
| |
| sub does_loadstate_exist_all_reservations { |
| my $self = shift; |
| if (ref($self) !~ /VCL/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object"); |
| return; |
| } |
| |
| my $loadstate_name = shift; |
| if (!defined($loadstate_name)) { |
| notify($ERRORS{'WARNING'}, 0, "computerloadlog loadstate name argument was not supplied"); |
| return; |
| } |
| |
| my $ignore_current_reservation = shift; |
| |
| my $request_id = $self->data->get_request_id(); |
| my $request_state = $self->data->get_request_state_name(); |
| my $reservation_id = $self->data->get_reservation_id(); |
| |
| # Retrieve computerloadlog entries for all reservations |
| my $request_loadstate_names = get_request_loadstate_names($request_id); |
| if (!$request_loadstate_names) { |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve request loadstate names"); |
| return; |
| } |
| |
| my @exists; |
| my @does_not_exist; |
| for my $check_reservation_id (sort {$a <=> $b} keys %$request_loadstate_names) { |
| # Ignore the current reservation |
| if ($ignore_current_reservation && $check_reservation_id eq $reservation_id) { |
| next; |
| } |
| |
| my @loadstate_names = @{$request_loadstate_names->{$check_reservation_id}}; |
| if (grep { $_ eq $loadstate_name } @loadstate_names) { |
| push @exists, $check_reservation_id; |
| } |
| else { |
| push @does_not_exist, $check_reservation_id; |
| } |
| } |
| |
| if (@does_not_exist) { |
| notify($ERRORS{'DEBUG'}, 0, "computerloadlog '$loadstate_name' entry does NOT exist for all reservations:\n" . |
| "exists for reservation IDs: " . join(', ', @exists) . "\n" . |
| "does not exist for reservation IDs: " . join(', ', @does_not_exist) |
| ); |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "computerloadlog '$loadstate_name' entry exists for all reservations"); |
| } |
| |
| if (wantarray) { |
| return (\@exists, \@does_not_exist); |
| } |
| else { |
| return !scalar(@does_not_exist); |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 does_loadstate_exist_any_reservation |
| |
| Parameters : $loadstate_name, $ignore_current_reservation (optional) |
| Returns : array or integer |
| Description : Checks the computerloadlog entries for all reservations belonging |
| to the request. An array is returned containing reservation IDs |
| of any reservations for which have a corresponding |
| computerloadlog $loadstate_name entry. The |
| $ignore_current_reservation argument may be used to check all |
| reservations other than the one currently being processed. |
| |
| =cut |
| |
| sub does_loadstate_exist_any_reservation { |
| my $self = shift; |
| if (ref($self) !~ /VCL/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object"); |
| return; |
| } |
| |
| my $loadstate_name = shift; |
| if (!defined($loadstate_name)) { |
| notify($ERRORS{'WARNING'}, 0, "computerloadlog loadstate name argument was not supplied"); |
| return; |
| } |
| |
| my $ignore_current_reservation = shift; |
| |
| my $request_id = $self->data->get_request_id(); |
| my $request_state = $self->data->get_request_state_name(); |
| my $reservation_id = $self->data->get_reservation_id(); |
| |
| # Retrieve computerloadlog entries for all reservations |
| my $request_loadstate_names = get_request_loadstate_names($request_id); |
| if (!$request_loadstate_names) { |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve request loadstate names"); |
| return; |
| } |
| |
| my @exists; |
| my @does_not_exist; |
| for my $check_reservation_id (keys %$request_loadstate_names) { |
| # Ignore the current reservation |
| if ($ignore_current_reservation && $check_reservation_id eq $reservation_id) { |
| next; |
| } |
| |
| my @loadstate_names = @{$request_loadstate_names->{$check_reservation_id}}; |
| if (grep { $_ eq $loadstate_name } @loadstate_names) { |
| push @exists, $check_reservation_id; |
| } |
| else { |
| push @does_not_exist, $check_reservation_id; |
| } |
| } |
| |
| if (@exists) { |
| notify($ERRORS{'DEBUG'}, 0, "computerloadlog '$loadstate_name' entry exists for reservation:\n" . |
| "exists for reservation IDs: " . join(', ', @exists) . "\n" . |
| "does not exist for reservation IDs: " . join(', ', @does_not_exist) |
| ); |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "computerloadlog '$loadstate_name' entry does NOT exist for any reservation"); |
| } |
| return (wantarray) ? @exists : scalar(@exists); |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 wait_for_all_reservations_to_begin |
| |
| Parameters : $loadstate_name (optional), $total_wait_seconds (optional), $attempt_delay_seconds (optional) |
| Returns : boolean |
| Description : Loops until a computerloadlog entry exists for all child |
| reservations matching the loadstate specified by the |
| $loadstate_name argument. Returns false if the loop times out. |
| Exits if the request has been deleted. The default |
| $total_wait_seconds value is 300 seconds. The default |
| $attempt_delay_seconds value is 15 seconds. |
| |
| =cut |
| |
| sub wait_for_all_reservations_to_begin { |
| my $self = shift; |
| if (ref($self) !~ /VCL/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object"); |
| return; |
| } |
| |
| my $loadstate_name = shift; |
| if (!$loadstate_name) { |
| notify($ERRORS{'WARNING'}, 0, "computerloadlog loadstate name argument was not supplied"); |
| return; |
| } |
| |
| my $total_wait_seconds = shift || 300; |
| my $attempt_delay_seconds = shift || 30; |
| |
| my $request_id = $self->data->get_request_id(); |
| my $request_state_name = $self->data->get_request_state_name(); |
| |
| my $result = $self->code_loop_timeout( |
| sub { |
| if ($request_state_name ne 'deleted' && is_request_deleted($request_id)) { |
| notify($ERRORS{'OK'}, 0, "request has been deleted, exiting"); |
| exit; |
| } |
| |
| return $self->does_loadstate_exist_all_reservations($loadstate_name, 1); |
| }, |
| [], |
| "waiting for all reservation processes to begin", $total_wait_seconds, $attempt_delay_seconds |
| ); |
| |
| if (!$result) { |
| my ($exists, $not_exists) = $self->does_loadstate_exist_all_reservations($loadstate_name, 1); |
| if (!defined($exists) || !defined($not_exists)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to determine if all reservation processes have begun, does_loadstate_exist_all_reservations returned a null value"); |
| return; |
| } |
| elsif (!ref($exists) || !ref($not_exists) || ref($exists) ne 'ARRAY' || ref($not_exists) ne 'ARRAY') { |
| notify($ERRORS{'WARNING'}, 0, "failed to determine if all reservation processes have begun, does_loadstate_exist_all_reservations did not return 2 array references:\n1st item returned:\n" . format_data($exists) . "\n2nd item returned:\n" . format_data($not_exists)); |
| return; |
| } |
| |
| if (scalar(@$not_exists) == 0) { |
| notify($ERRORS{'DEBUG'}, 0, "detected all reservation processes have begun after loop timed out"); |
| return 1; |
| } |
| |
| my $string = ''; |
| for my $reservation_id (@$not_exists) { |
| my $management_node_hostname = get_reservation_management_node_hostname($reservation_id) || '<unknown>'; |
| $string .= "$reservation_id: $management_node_hostname\n" |
| } |
| $string =~ s/\n$//; |
| |
| notify($ERRORS{'WARNING'}, 0, "failed to determine if processes for the following reservations have begun, computerloadlog '$loadstate_name' entry does not exist:\n$string"); |
| return; |
| } |
| |
| |
| return $result; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 wait_for_reservation_loadstate |
| |
| Parameters : $reservation_id, $loadstate_name, $total_wait_seconds (optional), $attempt_delay_seconds (optional) |
| Returns : boolean |
| Description : Waits for a computerloadlog entry to exist for a particular |
| reservation. |
| |
| =cut |
| |
| sub wait_for_reservation_loadstate { |
| my $self = shift; |
| if (ref($self) !~ /VCL/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object"); |
| return; |
| } |
| |
| my ($reservation_id, $loadstate_name, $total_wait_seconds, $attempt_delay_seconds) = @_; |
| if (!$reservation_id) { |
| notify($ERRORS{'WARNING'}, 0, "computerloadlog loadstate name argument was not supplied"); |
| return; |
| } |
| elsif (!$loadstate_name) { |
| notify($ERRORS{'WARNING'}, 0, "computerloadlog loadstate name argument was not supplied"); |
| return; |
| } |
| |
| $total_wait_seconds = 300 unless defined($total_wait_seconds); |
| $attempt_delay_seconds = 30 unless defined($attempt_delay_seconds); |
| |
| my $request_id = $self->data->get_request_id(); |
| my $request_state_name = $self->data->get_request_state_name(); |
| |
| my $result = $self->code_loop_timeout( |
| sub { |
| if ($request_state_name ne 'deleted' && is_request_deleted($request_id)) { |
| notify($ERRORS{'OK'}, 0, "request has been deleted, exiting"); |
| exit; |
| } |
| return get_reservation_computerloadlog_time($reservation_id, $loadstate_name); |
| }, |
| [], |
| "waiting for reservation $reservation_id to generate a $loadstate_name computerloadlog entry", $total_wait_seconds, $attempt_delay_seconds |
| ); |
| |
| if ($result) { |
| return $result; |
| } |
| else { |
| notify($ERRORS{'WARNING'}, 0, "computerloadlog '$loadstate_name' entry does not exist for reservation $reservation_id, waited $total_wait_seconds seconds"); |
| return; |
| } |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 wait_for_child_reservations_to_exit |
| |
| Parameters : $total_wait_seconds (optional), $attempt_delay_seconds (optional) |
| Returns : boolean |
| Description : Loops until an 'exited' computerloadlog entry exists for all |
| child reservations which also have a 'begin' entry. Returns false |
| if the loop times out. The default $total_wait_seconds value is |
| 300 seconds. The default $attempt_delay_seconds value is 15 |
| seconds. |
| |
| =cut |
| |
| sub wait_for_child_reservations_to_exit { |
| my $self = shift; |
| if (ref($self) !~ /VCL/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object"); |
| return; |
| } |
| |
| my $total_wait_seconds = shift || 300; |
| my $attempt_delay_seconds = shift || 15; |
| |
| my $request_id = $self->data->get_request_id(); |
| my $request_state_name = $self->data->get_request_state_name(); |
| |
| my $subroutine_name = get_current_subroutine_name(); |
| |
| return $self->code_loop_timeout( |
| sub { |
| # Commented out - causes problems for cluster requests |
| # Example: request deleted while in pending/reserved, waiting for acknowledgement |
| # Parent sees state=deleted, and doesn't wait for child reserved processes to exit |
| # Parent's deleted/reclaim.pm process starts up |
| # -Parent sees 'begin' entries for the child reservations |
| # -Sets request state to pending/deleted |
| # -reclaim.pm processes are never created for children |
| # Child computer state gets left in 'reserved' |
| #if (is_request_deleted($request_id)) { |
| # notify($ERRORS{'OK'}, 0, "request has been deleted, exiting"); |
| # exit; |
| #} |
| |
| my ($exited, $not_exited) = $self->does_loadstate_exist_all_reservations('exited', 1); |
| # If no reservations are missing an 'exited' entry return true |
| if (!@$not_exited) { |
| notify($ERRORS{'DEBUG'}, 0, "$subroutine_name: computerloadlog 'exited' entry exists for all reservations"); |
| return 1; |
| } |
| |
| # Some reservations are missing an 'exited' entry |
| # Ignore reservations missing both an 'exited' and 'begin' entry |
| my ($began, $not_began) = $self->does_loadstate_exist_all_reservations('begin', 1); |
| my @began_not_exited = get_array_intersection($began, $not_exited); |
| if (@began_not_exited) { |
| notify($ERRORS{'DEBUG'}, 0, "$subroutine_name: reservation exists with a computerloadlog 'begin' entry but no 'exited' entry, returning false\n" . join(', ', @began_not_exited)); |
| return 0; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "$subroutine_name: no reservations have a computerloadlog 'begin' entry but no 'exited' entry, returning true"); |
| return 1; |
| } |
| }, |
| [$self, 'exited', 1], |
| "waiting for child reservation processes to exit", $total_wait_seconds, $attempt_delay_seconds |
| ); |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 state_exit |
| |
| Parameters : $request_state_name_new (optional), $computer_state_name_new (optional), $request_log_ending (optional) |
| Returns : none, exits |
| Description : Performs common tasks before a reservation process exits and then |
| exits. |
| |
| =cut |
| |
| sub state_exit { |
| my $self = shift; |
| if (ref($self) !~ /VCL/) { |
| notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object"); |
| return; |
| } |
| |
| # Set flag to avoid this subroutine from being called more than once |
| $ENV->{state_exit} = 1; |
| |
| my ($request_state_name_new, $computer_state_name_new, $request_log_ending) = @_; |
| notify($ERRORS{'DEBUG'}, 0, "beginning state module exit tasks, " . |
| "request state argument: " . ($request_state_name_new ? $request_state_name_new : '<not specified>') . ', ' . |
| "computer state argument: " . ($computer_state_name_new ? $computer_state_name_new : '<not specified>') . ', ' . |
| "log ending argument: " . ($request_log_ending ? $request_log_ending : '<not specified>') |
| ); |
| |
| |
| my $calling_sub = get_calling_subroutine(); |
| |
| my $request_id = $self->data->get_request_id(); |
| my $request_logid = $self->data->get_request_log_id(0); |
| my $reservation_id = $self->data->get_reservation_id(); |
| my @reservation_ids = $self->data->get_reservation_ids(); |
| my $reservation_count = $self->data->get_reservation_count(); |
| my $is_parent_reservation = $self->data->is_parent_reservation(); |
| my $request_state_name_old = $self->data->get_request_state_name(); |
| my $request_laststate_name_old = $self->data->get_request_laststate_name(); |
| my $computer_id = $self->data->get_computer_id(); |
| my $computer_shortname = $self->data->get_computer_short_name(); |
| my $nathost_hostname = $self->data->get_nathost_hostname(0); |
| |
| if ($is_parent_reservation) { |
| # If parent of a cluster request, wait for child processes to exit before switching the state |
| if ($reservation_count > 1) { |
| # Check frequently if reservation timed out to cause Reservations page to remove the Connect button ASAP |
| if ($request_state_name_new && $request_state_name_new =~ /(timeout)/) { |
| $self->wait_for_child_reservations_to_exit(300, 3); |
| } |
| else { |
| $self->wait_for_child_reservations_to_exit(); |
| } |
| |
| # Check if any reservations failed |
| my @failed_reservation_ids = $self->does_loadstate_exist_any_reservation('failed'); |
| if (@failed_reservation_ids && (!$request_state_name_new || $request_state_name_new ne 'failed')) { |
| notify($ERRORS{'OK'}, 0, "another reservation failed, request state will be updated to 'failed'"); |
| $request_state_name_new = 'failed'; |
| } |
| |
| if ($request_state_name_new && $request_state_name_new eq 'failed') { |
| # Child reservations will leave the state of the computer to 'reloading' if they didn't fail |
| # Need to change state back to available for child reservations which didn't fail |
| for my $cluster_reservation_id (@reservation_ids) { |
| next if $cluster_reservation_id eq $reservation_id; |
| |
| my $reservation_data = $self->data->get_reservation_data($cluster_reservation_id) || next; |
| my $reservation_computer_id = $reservation_data->get_computer_id() || next; |
| my $reservation_computer_hostname = $reservation_data->get_computer_hostname() || next; |
| if (!(grep { $_ eq $cluster_reservation_id } @failed_reservation_ids)) { |
| notify($ERRORS{'DEBUG'}, 0, "child reservation $cluster_reservation_id did not fail, checking state of computer assigned to reservation: $reservation_computer_id"); |
| |
| my $computer_current_state_name = get_computer_current_state_name($reservation_computer_id) || next; |
| if ($computer_current_state_name =~ /(reloading)/) { |
| notify($ERRORS{'DEBUG'}, 0, "state of computer $reservation_computer_id assigned to child reservation $cluster_reservation_id is $computer_current_state_name, reservation did not fail, changing state to available"); |
| update_computer_state($reservation_computer_id, 'available'); |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "state of computer $reservation_computer_id assigned to child reservation $cluster_reservation_id is $computer_current_state_name, reservation did not fail, state of computer will not be changed"); |
| } |
| } |
| } |
| } |
| } |
| |
| if ($request_state_name_new) { |
| # Never set request state to failed if previous state is image |
| # Allow pending/checkpoint --> reserved/checkpoint |
| if ($request_state_name_old =~ /(image|checkpoint)/ && $request_state_name_new !~ /(reserved|complete|maintenance)/) { |
| notify($ERRORS{'CRITICAL'}, 0, "previous request state is $request_state_name_old, not setting request state to $request_state_name_new, setting request and computer state to maintenance"); |
| $request_state_name_new = 'maintenance'; |
| $computer_state_name_new = 'maintenance'; |
| } |
| elsif ($request_state_name_old =~ /(inuse|reboot|server)/ && $request_state_name_new !~ /(inuse|timeout|maintenance)/) { |
| notify($ERRORS{'CRITICAL'}, 0, "previous request state is $request_state_name_old, not setting request state to $request_state_name_new, setting request and computer state to inuse"); |
| $request_state_name_new = 'inuse'; |
| $computer_state_name_new = 'inuse'; |
| } |
| } |
| } |
| |
| |
| # If $request_log_ending was passed this should be the end of the reservation |
| # If NAT is used, rules added to the NAT host should be removed |
| if ($nathost_hostname) { |
| my $nat_sanitize_needed = 0; |
| if ($request_log_ending) { |
| notify($ERRORS{'DEBUG'}, 0, "attempting to sanitize firewall rules created for reservation $reservation_id on NAT host $nathost_hostname, \$request_log_ending argument was specified"); |
| $nat_sanitize_needed = 1; |
| } |
| elsif ($request_state_name_new && $request_state_name_new =~ /(timeout|deleted|complete|image|checkpoint|failed)/) { |
| notify($ERRORS{'DEBUG'}, 0, "attempting to sanitize firewall rules created for reservation $reservation_id on NAT host $nathost_hostname, next request state is '$request_state_name_new'"); |
| $nat_sanitize_needed = 1; |
| } |
| |
| if ($nat_sanitize_needed) { |
| $self->nathost_os->firewall->nat_sanitize_reservation(); |
| } |
| } |
| |
| |
| # Update the computer state if argument was supplied |
| if ($computer_state_name_new) { |
| my $computer_state_name_old = $self->data->get_computer_state_name(); |
| |
| if ($computer_state_name_new eq $computer_state_name_old) { |
| notify($ERRORS{'DEBUG'}, 0, "state of computer $computer_shortname not updated, already set to $computer_state_name_old"); |
| } |
| elsif (!update_computer_state($computer_id, $computer_state_name_new)) { |
| notify($ERRORS{'CRITICAL'}, 0, "failed update state of computer $computer_shortname: $computer_state_name_old->$computer_state_name_new"); |
| } |
| } |
| |
| |
| if ($is_parent_reservation) { |
| # Clean computerloadlog as late as possible |
| if ($request_state_name_old =~ /(new|reserved)/) { |
| # Only delete computerloadlog entries with loadstatename = 'begin' for all reservations in this request |
| delete_computerloadlog_reservation(\@reservation_ids, '(begin)'); |
| } |
| else { |
| # Delete all computerloadlog entries for all reservations in this request |
| delete_computerloadlog_reservation(\@reservation_ids); |
| } |
| |
| # Update log.ending if this is the parent reservation and argument was supplied |
| if ($request_logid && $request_log_ending) { |
| if (!update_log_ending($request_logid, $request_log_ending)) { |
| notify($ERRORS{'CRITICAL'}, 0, "failed to set log ending to $request_log_ending, log ID: $request_logid"); |
| } |
| } |
| |
| # Update the reservation.lastcheck time to now if the next request state is inuse |
| # Do this to ensure that reservations are not processed again quickly after this process exits |
| # For cluster requests, the parent may have had to wait a while for child processes to exit |
| # Resetting reservation.lastcheck causes reservations to wait the full interval between inuse checks |
| if ($request_state_name_new && $request_state_name_new =~ /(reserved|inuse|reboot|server)/) { |
| update_reservation_lastcheck(@reservation_ids); |
| } |
| } |
| |
| # Insert a computerloadlog 'exited' entry |
| # This is used by the parent cluster reservation |
| # Do this as late as possible, if request.state is changed to 'complete', vcld may begin processing it before this process exits |
| # Warning will be generated if request is deleted before insertloadlog is executed |
| insertloadlog($reservation_id, $computer_id, "exited", "vcld process exiting"); |
| |
| |
| if ($is_parent_reservation && $request_state_name_new) { |
| # Update the request state |
| if ($request_state_name_old ne 'deleted') { |
| if (is_request_deleted($request_id)) { |
| notify($ERRORS{'OK'}, 0, "request has been deleted, request state not updated: $request_state_name_old --> $request_state_name_new"); |
| } |
| else { |
| # Check if the request state has already been updated |
| # This can occur if another reservation in a cluster failed |
| my ($request_state_name_current, $request_laststate_name_current) = get_request_current_state_name($request_id); |
| if ($request_state_name_current eq $request_state_name_new && $request_laststate_name_current eq $request_state_name_old) { |
| notify($ERRORS{'OK'}, 0, "request has NOT been deleted, current state already set to: $request_state_name_current/$request_laststate_name_current"); |
| } |
| else { |
| notify($ERRORS{'OK'}, 0, "request has NOT been deleted, updating request state: $request_state_name_old/$request_laststate_name_old --> $request_state_name_new/$request_state_name_old"); |
| if (!update_request_state($request_id, $request_state_name_new, $request_state_name_old)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to change request state: $request_state_name_old/$request_laststate_name_old --> $request_state_name_new/$request_state_name_old"); |
| } |
| } |
| } |
| } |
| else { |
| # Current request state = 'deleted', always set the request state to 'complete' |
| if (!update_request_state($request_id, 'complete', $request_state_name_old)) { |
| notify($ERRORS{'WARNING'}, 0, "failed to change request state: $request_state_name_old/$request_laststate_name_old --> $request_state_name_new/$request_state_name_old"); |
| } |
| } |
| } |
| |
| |
| # Don't call exit if this was called from DESTROY or else DESTROY gets called again |
| if ($calling_sub) { |
| if ($calling_sub =~ /DESTROY/) { |
| notify($ERRORS{'DEBUG'}, 0, "calling subroutine: $calling_sub, skipping call to exit"); |
| return; |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "calling subroutine: $calling_sub, calling exit"); |
| } |
| } |
| else { |
| notify($ERRORS{'DEBUG'}, 0, "calling subroutine not defined, calling exit"); |
| } |
| exit; |
| } |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| =head2 DESTROY |
| |
| Parameters : none |
| Returns : exits |
| Description : Performs VCL::State module cleanup actions: |
| - Removes computerloadlog 'begin' entries for reservation |
| - If this is a cluster parent reservation, removes |
| computerloadlog 'begin' entries for all reservations in request |
| - Closes the database connection |
| |
| =cut |
| |
| sub DESTROY { |
| my $self = shift; |
| |
| my $address = sprintf('%x', $self); |
| notify($ERRORS{'DEBUG'}, 0, ref($self) . " destructor called, address: $address"); |
| |
| my $calling_sub = get_calling_subroutine(); |
| |
| # Check if normal module object data is available |
| if ($calling_sub && $self && $self->data(0) && !$self->data->is_blockrequest()) { |
| if (!$ENV->{state_exit}) { |
| my $request_id = $self->data->get_request_id(); |
| my @reservation_ids = $self->data->get_reservation_ids(); |
| if (@reservation_ids && $request_id) { |
| $self->state_exit(); |
| #notify($ERRORS{'DEBUG'}, 0, "computerloadlog states remaining after process exits:\n" . format_data(get_request_loadstate_names($request_id))); |
| } |
| elsif (!$SETUP_MODE) { |
| notify($ERRORS{'WARNING'}, 0, "failed to retrieve the reservation ID, computerloadlog 'begin' rows not removed"); |
| } |
| } |
| } |
| |
| # Uncomment to enable database metrics |
| # Print the number of database handles this process created for testing/development |
| #if (defined $ENV->{dbh_count}) { |
| # notify($ERRORS{'DEBUG'}, 0, "number of database handles state process created: $ENV->{dbh_count}"); |
| #} |
| #if (defined $ENV->{database_select_count}) { |
| # notify($ERRORS{'DEBUG'}, 0, "database select queries: $ENV->{database_select_count}"); |
| #} |
| #if (defined $ENV->{database_select_calls}) { |
| # my $database_select_calls_string; |
| # my %hash = %{$ENV->{database_select_calls}}; |
| # my @sorted_keys = sort { $hash{$b} <=> $hash{$a} } keys(%hash); |
| # for my $key (@sorted_keys) { |
| # $database_select_calls_string .= "$ENV->{database_select_calls}{$key}: $key\n"; |
| # } |
| # notify($ERRORS{'DEBUG'}, 0, "database select called from:\n$database_select_calls_string"); |
| #} |
| #if (defined $ENV->{database_execute_count}) { |
| # notify($ERRORS{'DEBUG'}, 0, "database execute queries: $ENV->{database_execute_count}"); |
| #} |
| |
| # Close the database handle |
| if (defined $ENV->{dbh}) { |
| if (!$ENV->{dbh}->disconnect) { |
| notify($ERRORS{'WARNING'}, 0, "\$ENV->{dbh}: database disconnect failed, " . DBI::errstr()); |
| } |
| } |
| |
| # Check for an overridden destructor |
| $self->SUPER::DESTROY if $self->can("SUPER::DESTROY"); |
| |
| # Determine how long process took to run |
| if ($self->{start_time}) { |
| my $duration = (time - $self->{start_time}); |
| notify($ERRORS{'OK'}, 0, ref($self) . " process duration: $duration seconds"); |
| } |
| } ## end sub DESTROY |
| |
| #////////////////////////////////////////////////////////////////////////////// |
| |
| 1; |
| __END__ |
| |
| =head1 SEE ALSO |
| |
| L<http://cwiki.apache.org/VCL/> |
| |
| =cut |