blob: b9edd47e895e5ce074aa864b362c2bd0a4e89f22 [file] [log] [blame]
///////////////////////////////////////////////////////////////////////////////
//
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
//
///////////////////////////////////////////////////////////////////////////////
// Persistent process test for the monitor
#include <stdio.h>
#include <stdlib.h>
#include "clio.h"
#include "sqevlog/evl_sqlog_writer.h"
#include "montestutil.h"
#include "xmpi.h"
MonTestUtil util;
long trace_settings = 0;
FILE *shell_locio_trace_file = NULL;
bool tracing = false;
const char *MyName;
int MyRank = -1;
int gv_ms_su_nid = -1; // Local IO nid to make compatible w/ Seabed
SB_Verif_Type gv_ms_su_verif = -1;
char ga_ms_su_c_port[MPI_MAX_PORT_NAME] = {0}; // connect
// Routine for handling notices:
// NodeDown, NodeUp, ProcessDeath, Shutdown, TmSyncAbort, TmSyncCommit
void recv_notice_msg(struct message_def *recv_msg, int )
{
if ( recv_msg->type == MsgType_ProcessDeath )
{
if ( tracing )
printf("[%s] Process death notice received for %s (%d, %d:%d),"
" trans_id=%lld.%lld.%lld.%lld., aborted=%d\n",
MyName,
recv_msg->u.request.u.death.process_name,
recv_msg->u.request.u.death.nid,
recv_msg->u.request.u.death.pid,
recv_msg->u.request.u.death.verifier,
recv_msg->u.request.u.death.trans_id.txid[0],
recv_msg->u.request.u.death.trans_id.txid[1],
recv_msg->u.request.u.death.trans_id.txid[2],
recv_msg->u.request.u.death.trans_id.txid[3],
recv_msg->u.request.u.death.aborted);
}
else if ( recv_msg->type == MsgType_NodeDown )
{
printf("[%s] Node %d (%s) is DOWN.\n", MyName,
recv_msg->u.request.u.down.nid,
recv_msg->u.request.u.down.node_name);
}
else
{
printf("[%s] unexpected notice, type=%s\n", MyName,
MessageTypeString( recv_msg->type));
}
}
bool testPersistent ()
{
bool testSuccess = true;
int procNid;
int procPid;
Verifier_t procVerifier;
char procName[25];
char *serverArgs[1] = {(char *) "-t"};
enum { TEST_FAILED=0 };
char value[25];
// Set persistent process registry values for process $ABC
const int persistNode1 = 2;
const int persistNode2 = 4;
printf("[%s] For process $ABC setting PERSIST_ZONES=%d,%d\n",
MyName, persistNode1, persistNode2);
sprintf(value, "%d,%d", persistNode1, persistNode2);
if (!util.requestSet ( ConfigType_Process, "$ABC", "PERSIST_ZONES",
value))
{
return TEST_FAILED;
}
int maxRetries = 1;
int retryResetTime = 10;
printf("[%s] For process $ABC setting PERSIST_RETRIES=%d,%d\n",
MyName, maxRetries, retryResetTime);
sprintf(value, "%d,%d", maxRetries, retryResetTime);
// Set count of times to restart and persistent "max time".
if (!util.requestSet ( ConfigType_Process, "$ABC", "PERSIST_RETRIES",
value))
{
return TEST_FAILED;
}
// Start the server process
if (!util.requestNewProcess ( persistNode1, ProcessType_Generic, false,
"$ABC", "server", "", "",
((tracing) ? 1: 0), serverArgs,
procNid, procPid, procVerifier, procName))
{
return TEST_FAILED;
}
// Allow time for process creation
sleep(1);
// Verify process is running
int statNid1;
int statPid1;
Verifier_t statVerifier1;
if (util.requestProcInfo ( "$ABC", statNid1, statPid1, statVerifier1))
{
if ( statNid1 != persistNode1 )
{
printf ("[%s] process $ABC (%d, %d:%d) is not running on node %d "
"as expected.\n", MyName, statNid1, statPid1,
statVerifier1, persistNode1);
return TEST_FAILED;
}
else
{
printf ("[%s] Started persistent process $ABC (%d, %d:%d)\n",
MyName, statNid1, statPid1, statVerifier1 );
}
}
else
{
printf ("[%s] Started persisten process $ABC but unable to get "
"process info for it.\n", MyName);
return TEST_FAILED;
}
printf ("[%s] Killing process $ABC\n", MyName);
// Kill the server process, expect it to be restarted on the same node
util.requestKill ( "$ABC", procVerifier );
// Allow time for process kill and after-effects
sleep(1);
// Verify process was restarted on original node
int statNid2;
int statPid2;
Verifier_t statVerifier2;
if (util.requestProcInfo ( "$ABC", statNid2, statPid2, statVerifier2))
{
if ( statNid2 != persistNode1 )
{
printf ("[%s] process $ABC (%d, %d:%d) is not running on node %d "
"as expected.\n", MyName, statNid2, statPid2,
statVerifier2, persistNode1);
return TEST_FAILED;
}
if ( statPid2 == statPid1 )
{
printf ("[%s] process $ABC apparently not restarted, old pid is "
"the same as the current pid (%d)\n", MyName, statPid2);
return TEST_FAILED;
}
else
{
printf ("[%s] Persistent process $ABC (%d, %d:%d) was restarted as "
"expected.\n",
MyName, statNid2, statPid2, statVerifier2);
}
}
else
{
printf ("[%s] Unable to get process info for $ABC\n", MyName);
return TEST_FAILED;
}
printf ("[%s] Killing process $ABC\n", MyName);
// Kill the server process again, do not expect restart since persistent
// retry count has been exceeded.
util.requestKill ( "$ABC", -1 );
// Allow time for process kill and after-effects
sleep(1);
if (util.requestProcInfo ( "$ABC", statNid2, statPid2, statVerifier2))
{
printf("[%s] Unexpectedly got $ABC (%d, %d:%d) process status\n",
MyName, statNid2, statPid2, statVerifier2);
return TEST_FAILED;
}
else
{
printf("[%s] Confirmed: process $ABC was not restarted.\n",
MyName);
}
// Start the server process
if (!util.requestNewProcess ( persistNode1, ProcessType_Generic, false,
"$ABC", "server", "", "",
((tracing) ? 1: 0), serverArgs,
procNid, procPid, procVerifier, procName))
{
return TEST_FAILED;
}
// Allow time for process creation
sleep(1);
// Verify process is running
if (util.requestProcInfo ( "$ABC", statNid1, statPid1, statVerifier1))
{
if ( statNid1 != persistNode1 )
{
printf ("[%s] process $ABC (%d, %d:%d) is not running on node %d "
"as expected.\n", MyName, statNid1, statPid1, statVerifier1,
persistNode1);
return TEST_FAILED;
}
else
{
printf ("[%s] Started persistent process $ABC (%d, %d:%d)\n",
MyName, statNid1, statPid1, statVerifier1);
}
}
else
{
printf ("[%s] Unable to get process info for $ABC\n", MyName);
return TEST_FAILED;
}
printf ("[%s] Downing node %d\n", MyName, persistNode1);
// Down the node
util.requestNodeDown ( persistNode1 );
// Verify node is down
struct NodeInfo_reply_def * nodeData;
for (int i=0; i<30; ++i)
{
if ( util.requestNodeInfo ( -1, false, -1, -1, nodeData ) )
{ // Got node data
if ( nodeData->node[2].state == 2 )
{
printf ("[%s] Process status for node 2 is DOWN.\n", MyName);
break;
}
else
{
printf ("[%s] Process status for node 2 is %d.\n", MyName,
nodeData->node[2].state);
}
}
else
{ // Failed to get node data
printf ("[%s] Unable to get node info\n", MyName);
return TEST_FAILED;
}
sleep(2);
}
if ( nodeData->node[2].state != 2 )
{
printf ("[%s] After downing node 2, node state=%d but expected "
"state=2\n", MyName, nodeData->node[2].state);
return TEST_FAILED;
}
// Verify process was restarted on new node
if (util.requestProcInfo ( "$ABC", statNid1, statPid1, statVerifier1))
{
if ( statNid1 != persistNode2 )
{
printf ("[%s] process $ABC (%d, %d:%d) is not running on node %d "
"as expected.\n", MyName, statNid1, statPid1,
statVerifier1, persistNode2);
testSuccess = false;
}
else
{
printf ("[%s] Persistent process $ABC (%d, %d:%d) was restarted as "
"expected.\n",
MyName, statNid1, statPid1, statVerifier1);
}
}
else
{
printf ("[%s] Unable to get process info for $ABC\n", MyName);
return TEST_FAILED;
}
printf ("[%s] Killing process $ABC\n", MyName);
// Kill the server process again, do not expect restart because
// restart count exceeded.
util.requestKill ( "$ABC", -1 );
// Allow time for process kill and after-effects
sleep(1);
// Verify process not restarted
if (util.requestProcInfo( "$ABC", statNid2, statPid2, statVerifier2 ))
{
printf("[%s] Unexpectedly got $ABC (%d, %d:%d) process status\n",
MyName, statNid2, statPid2, statVerifier2);
testSuccess = false;
}
else
{
printf("[%s] Confirmed: process $ABC was not restarted.\n",
MyName);
}
return testSuccess;
}
int main (int argc, char *argv[])
{
bool testSuccess;
util.processArgs (argc, argv);
tracing = util.getTrace();
MyName = util.getProcName();
util.InitLocalIO( );
assert (gp_local_mon_io);
// Set local io callback function for "notices"
gp_local_mon_io->set_cb(recv_notice_msg, "notice");
// Send startup message to monitor
util.requestStartup ();
// Verify correct number of nodes
testSuccess = util.validateNodeCount(6);
if ( testSuccess )
{
testSuccess = testPersistent();
}
printf("Persistent Process Test:\t%s\n", (testSuccess) ? "PASSED" : "FAILED");
// tell monitor we are exiting
util.requestExit ( );
XMPI_Close_port (util.getPort());
if ( gp_local_mon_io )
{
delete gp_local_mon_io;
}
exit ( testSuccess ? 0 : 1 );
}