blob: 69a186d6709831ace91b5dd467cd82847f40d9f3 [file] [log] [blame]
///////////////////////////////////////////////////////////////////////////////
//
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
//
///////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <iostream>
#include "/usr/include/linux/watchdog.h"
#include "redirector.h"
using namespace std;
#include "msgdef.h"
#include "internal.h"
#include "monlogging.h"
#include "monsonar.h"
#include "montrace.h"
#include "monitor.h"
#include "clusterconf.h"
#include "lnode.h"
#include "pnode.h"
#include "mlio.h"
extern bool IsRealCluster;
extern CommType_t CommType;
extern CNodeContainer *Nodes;
extern CNode *MyNode;
extern CMonitor *Monitor;
extern CMonStats *MonStats;
extern bool usingCpuAffinity;
extern bool usingTseCpuAffinity;
void CoreMaskString( char *str, cpu_set_t coreMask, int totalCores )
{
str[totalCores] = '\0'; // trucate using total cores
// Least significant on right
for( int i = (totalCores-1); i >= 0; i--, str++ )
{
*str = CPU_ISSET( i, &coreMask ) ? '1' : '0';
}
}
const char *RoleTypeString( ZoneType type )
{
const char *str;
switch( type )
{
case ZoneType_Edge:
str = "connection";
break;
case ZoneType_Excluded:
str = "excluded";
break;
case ZoneType_Aggregation:
str = "aggregation";
break;
case ZoneType_Storage:
str = "storage";
break;
case ZoneType_Frontend:
str = "connection,aggregation";
break;
case ZoneType_Backend:
str = "aggregation,storage";
break;
case ZoneType_Any:
str = "connection,aggregation,storage";
break;
default:
str = "Undefined";
break;
}
return( str );
}
CLNode::CLNode( CLNodeContainer *lnodes
, int nid
, cpu_set_t &coreMask
, int processors
, ZoneType zoneType
)
:CProcessContainer()
,Nid(nid)
,CoreMask(coreMask)
,NumProcessors(processors)
,NodeZoneType(zoneType)
,ChangeState( false )
,tseCnt_(0)
,tseBackupCnt_(0)
,lnodes_(lnodes)
,cpuUser_(0)
,cpuNice_(0)
,cpuSystem_(0)
,cpuIdle_(0)
,cpuIowait_(0)
,cpuIrq_(0)
,cpuSoftIrq_(0)
,numCores_(0)
,firstCore_(-1)
,lastTseCoreAssigned_(-1)
,lastBackupTseCoreAssigned_(-1)
,next_(NULL)
,prev_(NULL)
,nextP_(NULL)
,prevP_(NULL)
,SSMProc(NULL)
{
const char method_name[] = "CLNode::CLNode";
TRACE_ENTRY;
// Add eyecatcher sequence as a debugging aid
memcpy(&eyecatcher_, "LNOD", 4);
// Set numCores_ firstCore_ based on coreMask. These are used
// when retrieving processor statistics.
for (int i = 0; i < MAX_CORES; i++ )
{
if ( CPU_ISSET( i, &coreMask ) )
{
if (firstCore_ == -1)
{
firstCore_ = i;
}
++numCores_;
}
}
if (firstCore_ == -1)
{ // Unexpectedly, mask does not indicate any processors for this
// logical node. Set default values.
firstCore_ = 0;
numCores_ = 1;
}
if (trace_settings & (TRACE_PROCESS | TRACE_PROCESS_DETAIL))
{ // Display pidMap location, useful for understanding later trace output
trace_printf("%s@%d Nid %d, pidMap_ (%p)\n",
method_name, __LINE__, Nid, GetPidMap());
}
TRACE_EXIT;
}
CLNode::~CLNode (void)
{
const char method_name[] = "CLNode::~CLNode";
TRACE_ENTRY;
if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
{
trace_printf( "%s@%d nid=%d\n", method_name, __LINE__, Nid );
}
// Alter eyecatcher sequence as a debugging aid to identify deleted object
memcpy(&eyecatcher_, "lnod", 4);
TRACE_EXIT;
}
void CLNode::DeLink (CLNode **head, CLNode **tail)
{
const char method_name[] = "CLNode::DeLink";
TRACE_ENTRY;
if (*head == this)
*head = next_;
if (*tail == this)
*tail = prev_;
if (prev_)
prev_->next_ = next_;
if (next_)
next_->prev_ = prev_;
TRACE_EXIT;
}
void CLNode::DeLinkP(CLNode **head, CLNode **tail)
{
const char method_name[] = "CLNode::DeLinkP";
TRACE_ENTRY;
if (*head == this)
*head = nextP_;
if (*tail == this)
*tail = prevP_;
if (prevP_)
prevP_->nextP_ = nextP_;
if (nextP_)
nextP_->prevP_ = prevP_;
TRACE_EXIT;
}
void CLNode::Added( void )
{
const char method_name[] = "CLNode::Added";
TRACE_ENTRY;
#ifndef NAMESERVER_PROCESS
struct message_def *msg;
if ( MyNode->GetState() == State_Up )
{
// send node added message to local node's processes
msg = new struct message_def;
msg->type = MsgType_NodeAdded;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.node_added.nid = Nid;
msg->u.request.u.node_added.zid = GetNode()->GetZone();
const char *nodeName = GetNode()->GetName();
if (IsRealCluster)
{
STRCPY(msg->u.request.u.node_added.node_name, nodeName);
}
else
{
sprintf(msg->u.request.u.node_added.node_name,"%s:%d", nodeName, Nid);
}
if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
{
trace_printf( "%s@%d - Broadcasting node added nid=%d, zid=%d, name=(%s)\n"
, method_name, __LINE__
, msg->u.request.u.node_added.nid
, msg->u.request.u.node_added.zid
, msg->u.request.u.node_added.node_name );
}
MyNode->Bcast( msg );
delete msg;
}
#endif
TRACE_EXIT;
}
void CLNode::Changed( CLNodeConfig *lnodeConfig )
{
const char method_name[] = "CLNode::Changed";
TRACE_ENTRY;
#ifdef NAMESERVER_PROCESS
lnodeConfig = lnodeConfig; // touch
#else
struct message_def *msg;
if ( MyNode->GetState() == State_Up )
{
// send node changed message to local node's processes
msg = new struct message_def;
msg->type = MsgType_NodeChanged;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.node_changed.nid = Nid;
msg->u.request.u.node_changed.pnid = GetNode()->GetPNid();
msg->u.request.u.node_changed.zid = GetNode()->GetZone();
msg->u.request.u.node_changed.first_core = lnodeConfig->GetFirstCore();
msg->u.request.u.node_changed.last_core = lnodeConfig->GetLastCore();
msg->u.request.u.node_changed.processors = lnodeConfig->GetProcessors();
msg->u.request.u.node_changed.roles = static_cast<int>(lnodeConfig->GetZoneType());
const char *nodeName = GetNode()->GetName();
if (IsRealCluster)
{
STRCPY(msg->u.request.u.node_changed.node_name, nodeName);
}
else
{
sprintf(msg->u.request.u.node_changed.node_name,"%s:%d", nodeName, Nid);
}
if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
{
trace_printf( "%s@%d - Broadcasting node changed nid=%d, zid=%d, "
"pnid=%d, name=(%s), cores=%d:%d, "
"processors=%d, roles=(%s)\n"
, method_name, __LINE__
, msg->u.request.u.node_changed.nid
, msg->u.request.u.node_changed.zid
, msg->u.request.u.node_changed.pnid
, msg->u.request.u.node_changed.node_name
, msg->u.request.u.node_changed.first_core
, msg->u.request.u.node_changed.last_core
, msg->u.request.u.node_changed.processors
, RoleTypeString(static_cast<ZoneType>(msg->u.request.u.node_changed.roles)) );
}
MyNode->Bcast( msg );
delete msg;
}
#endif
TRACE_EXIT;
}
void CLNode::Deleted( void )
{
const char method_name[] = "CLNode::Deleted";
TRACE_ENTRY;
#ifndef NAMESERVER_PROCESS
struct message_def *msg;
if ( MyNode->GetState() == State_Up )
{
// send node added message to local node's processes
msg = new struct message_def;
msg->type = MsgType_NodeDeleted;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.node_deleted.nid = Nid;
msg->u.request.u.node_deleted.zid = GetNode()->GetZone();
const char *nodeName = GetNode()->GetName();
if (IsRealCluster)
{
STRCPY(msg->u.request.u.node_deleted.node_name, nodeName);
}
else
{
sprintf(msg->u.request.u.node_deleted.node_name,"%s:%d", nodeName, Nid);
}
if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
{
trace_printf( "%s@%d - Broadcasting node deleted nid=%d, zid=%d, name=(%s)\n"
, method_name, __LINE__
, msg->u.request.u.node_deleted.nid
, msg->u.request.u.node_deleted.zid
, msg->u.request.u.node_deleted.node_name );
}
MyNode->Bcast( msg );
delete msg;
}
#endif
TRACE_EXIT;
}
void CLNode::Down( void )
{
const char method_name[] = "CLNode::Down";
TRACE_ENTRY;
#ifndef NAMESERVER_PROCESS
struct message_def *msg;
if ( MyNode->GetState() == State_Up )
{
// Record statistics (sonar counters)
if (sonar_verify_state(SONAR_ENABLED | SONAR_MONITOR_ENABLED))
MonStats->notice_node_down_Incr();
// send node down message to local node's processes
msg = new struct message_def;
msg->type = MsgType_NodeDown;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.down.nid = Nid;
msg->u.request.u.down.takeover = GetNode()->IsActivatingSpare();
#ifdef USE_SEQUENCE_NUM
msg->u.request.u.down.seqnum = Monitor->GetTimeSeqNum();
#endif
const char * nodeName = GetNode()->GetName();
if (IsRealCluster)
{
STRCPY(msg->u.request.u.down.node_name, nodeName);
}
else
{
sprintf(msg->u.request.u.down.node_name,"%s:%d", nodeName, Nid);
}
if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
{
trace_printf( "%s@%d - Broadcasting Node Down nid=%d, name=(%s), takeover=%d\n"
, method_name, __LINE__, GetNid()
, GetNode()->GetName(), msg->u.request.u.down.takeover );
}
MyNode->Bcast( msg );
delete msg;
}
#endif
TRACE_EXIT;
}
CProcess *CLNode::GetProcessL(int pid)
{
CProcess *entry;
const char method_name[] = "CLNode::GetProcessL";
TRACE_ENTRY;
// Temporary trace
if (trace_settings & TRACE_PROCESS_DETAIL)
trace_printf("%s@%d - pid %d\n",
method_name, __LINE__, pid);
entry = GetNode()->GetProcess(pid);
TRACE_EXIT;
return entry;
}
CProcess *CLNode::GetProcessL(char *name, bool checkstate)
{
CProcess *entry = NULL;
const char method_name[] = "CLNode::GetProcessL";
TRACE_ENTRY;
// Temporary trace
if (trace_settings & TRACE_PROCESS_DETAIL)
trace_printf("%s@%d - name=%s, checkstate=%d\n",
method_name, __LINE__, name, checkstate);
entry = GetNode()->GetProcess(name, checkstate);
TRACE_EXIT;
return entry;
}
CProcess *CLNode::GetProcessL( int pid
, Verifier_t verifier
, bool checkstate )
{
CProcess *entry = NULL;
const char method_name[] = "CLNode::GetProcessL(pid,verifier)";
TRACE_ENTRY;
// Temporary trace
if (trace_settings & TRACE_PROCESS_DETAIL)
trace_printf("%s@%d - nid, %d, pid=%d, verifier=%d, checkstate=%d\n",
method_name, __LINE__, GetNid(), pid, verifier, checkstate);
entry = GetNode()->GetProcess( pid, verifier, checkstate );
// Temporary trace
if (trace_settings & TRACE_PROCESS_DETAIL)
trace_printf("%s@%d - entry=%p, pid=%d, Name=%s\n",
method_name, __LINE__, entry, pid,
((entry != NULL) ? entry->GetName(): ""));
TRACE_EXIT;
return entry;
}
CProcess *CLNode::GetProcessL( const char *name
, Verifier_t verifier
, bool checkstate )
{
CProcess *entry = NULL;
const char method_name[] = "CLNode::GetProcessL(name,verifier)";
TRACE_ENTRY;
// Temporary trace
if (trace_settings & TRACE_PROCESS_DETAIL)
trace_printf("%s@%d - name=%s, verifier=%d, checkstate=%d\n",
method_name, __LINE__, name, verifier, checkstate);
entry = GetNode()->GetProcess( name, verifier, checkstate );
// Temporary trace
if (trace_settings & TRACE_PROCESS_DETAIL)
trace_printf("%s@%d - entry=%p, Name=%s\n",
method_name, __LINE__, entry,
((entry != NULL) ? entry->GetName(): ""));
TRACE_EXIT;
return entry;
}
CProcess *CLNode::CompleteProcessStartup( char *process_name,
char *port,
int os_pid,
bool event_messages,
bool system_messages,
struct timespec *creation_time,
int origPNidNs )
{
CProcess *entry = NULL;
const char method_name[] = "CLNode::CompleteProcessStartup";
TRACE_ENTRY;
entry = GetNode()->CompleteProcessStartup(process_name,
port,
os_pid,
event_messages,
system_messages,
creation_time,
origPNidNs);
TRACE_EXIT;
return entry;
}
CNode *CLNode::GetNode( void )
{
const char method_name[] = "CLNode::GetNode";
TRACE_ENTRY;
TRACE_EXIT;
return( lnodes_->GetNode() );
}
STATE CLNode::GetState( void )
{
CNode *node = lnodes_->GetNode();
const char method_name[] = "CLNode::GetState";
TRACE_ENTRY;
TRACE_EXIT;
return( node ? node->GetState() : State_Unknown );
}
int CLNode::GetZone( void )
{
const char method_name[] = "CLNode::GetZone";
TRACE_ENTRY;
TRACE_EXIT;
return( lnodes_->GetNode()->GetZone() );
}
bool CLNode::IsKillingNode( void )
{
const char method_name[] = "CLNode::IsKillingNode";
TRACE_ENTRY;
TRACE_EXIT;
return( lnodes_->GetNode()->IsKillingNode() );
}
CLNode *CLNode::Link (CLNode * entry)
{
const char method_name[] = "CLNode::Link";
TRACE_ENTRY;
next_ = entry;
entry->prev_ = this;
TRACE_EXIT;
return entry;
}
CLNode *CLNode::LinkP(CLNode * entry)
{
const char method_name[] = "CLNode::LinkP";
TRACE_ENTRY;
nextP_ = entry;
entry->prevP_ = this;
TRACE_EXIT;
return entry;
}
#ifndef NAMESERVER_PROCESS
void CLNode::PrepareForTransactions( bool activatingSpare )
{
const char method_name[] = "CLNode::PrepareForTransactions";
TRACE_ENTRY;
struct message_def *msg;
if ( trace_settings &
(TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC | TRACE_INIT) )
{
trace_printf( "%s@%d - %s (nid=%d, state=%d) sending prepare notice to DTM and SPX\n"
, method_name, __LINE__
, MyNode->GetName(), MyNode->GetPNid(), MyNode->GetState());
}
if ( MyNode->GetState() == State_Up )
{
CLNode *lnode = MyNode->GetFirstLNode();
for ( ; lnode; lnode = lnode->GetNextP() )
{
// Send local DTM processes a node prepare message for each
// logical node activated by spare node
CProcess *process = lnode->GetProcessLByType( ProcessType_DTM );
if ( process )
{
// Record statistics (sonar counters)
if (sonar_verify_state(SONAR_ENABLED | SONAR_MONITOR_ENABLED))
MonStats->notice_node_up_Incr();
// send node prepare notice to our node's DTM process
msg = new struct message_def;
msg->type = MsgType_NodePrepare;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.prepare.nid = Nid;
msg->u.request.u.prepare.takeover = activatingSpare ? true : false;
const char * nodeName = GetNode()->GetName();
STRCPY(msg->u.request.u.prepare.node_name, nodeName);
SQ_theLocalIOToClient->putOnNoticeQueue( process->GetPid()
, process->GetVerifier()
, msg
, NULL);
if ( trace_settings &
(TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC | TRACE_INIT) )
{
trace_printf( "%s@%d - Sending node %d (takeover=%d) prepare notice to DTM %s (pid=%d)\n"
, method_name, __LINE__
, Nid , msg->u.request.u.prepare.takeover
, process->GetName(), process->GetPid() );
}
}
// Send local SPX processes a node prepare message for each
// logical node activated by spare node
process = lnode->GetProcessLByType( ProcessType_SPX );
if ( process )
{
// Record statistics (sonar counters)
if (sonar_verify_state(SONAR_ENABLED | SONAR_MONITOR_ENABLED))
MonStats->notice_node_up_Incr();
// send node prepare notice to our node's DTM process
msg = new struct message_def;
msg->type = MsgType_NodePrepare;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.prepare.nid = Nid;
msg->u.request.u.prepare.takeover = activatingSpare ? true : false;
const char * nodeName = GetNode()->GetName();
STRCPY(msg->u.request.u.prepare.node_name, nodeName);
SQ_theLocalIOToClient->putOnNoticeQueue( process->GetPid()
, process->GetVerifier()
, msg
, NULL);
if ( trace_settings &
(TRACE_RECOVERY | TRACE_REQUEST | TRACE_INIT) )
{
trace_printf( "%s@%d - Sending node %d prepare notice to SPX %s (pid=%d)\n"
, method_name, __LINE__, Nid
, process->GetName(), process->GetPid());
}
}
}
}
TRACE_EXIT;
}
#endif
#ifndef NAMESERVER_PROCESS
void CLNode::SendDTMRestarted( void )
{
const char method_name[] = "CLNode::SendDTMRestarted";
TRACE_ENTRY;
struct message_def *msg;
if ( trace_settings &
(TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC | TRACE_INIT) )
{
trace_printf( "%s@%d - %s (pnid=%d, state=%d) sending DTM restarted in nid=%d notice to local DTMs\n"
, method_name, __LINE__
, MyNode->GetName(), MyNode->GetPNid(), MyNode->GetState(), GetNid() );
}
if ( MyNode->GetState() == State_Up )
{
CLNode *lnode = MyNode->GetFirstLNode();
for ( ; lnode; lnode = lnode->GetNextP() )
{
// Send local DTM processes a DTM restarted message
CProcess *process = lnode->GetProcessLByType( ProcessType_DTM );
if ( process )
{
// send node prepare notice to our node's DTM process
msg = new struct message_def;
msg->type = MsgType_TmRestarted;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.tm_restart.nid = Nid;
msg->u.request.u.tm_restart.pnid = GetNode()->GetPNid();
const char * nodeName = GetNode()->GetName();
STRCPY(msg->u.request.u.tm_restart.node_name, nodeName);
SQ_theLocalIOToClient->putOnNoticeQueue( process->GetPid()
, process->GetVerifier()
, msg
, NULL);
if ( trace_settings &
(TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC | TRACE_INIT) )
{
trace_printf( "%s@%d - Sending nid=%d DTM restarted notice to DTM %s (nid=%d,pid=%d)\n"
, method_name, __LINE__
, Nid , process->GetName(), process->GetNid(), process->GetPid() );
}
}
}
}
TRACE_EXIT;
}
#endif
#ifndef NAMESERVER_PROCESS
void CLNode::SetAffinity( pid_t pid, PROCESSTYPE type )
{
int rc = 0;
cpu_set_t mask;
CProcess *process = NULL;
int tse_1st = 0; // affinity mask to put tse in cpu 0
int tse_2nd = 1; // 2nd tse in cpu 1
int tse_3rd = 2; // 3rd in cpu 2
int tse_4th = 3; // 4th in cpu 3
char la_buf[MON_STRING_BUF_SIZE];
const char method_name[] = "CLNode::SetAffinity";
TRACE_ENTRY;
process = GetProcessL( pid );
if ( usingTseCpuAffinity && type == ProcessType_TSE
&& process != NULL && (!process->IsBackup()) )
{
switch ( tseCnt_ )
{
case 0:
tseCnt_++;
CPU_ZERO( &mask );
CPU_SET( tse_1st, &mask );
break;
case 1:
tseCnt_++;
CPU_ZERO( &mask );
CPU_SET( tse_2nd, &mask );
break;
case 2:
tseCnt_++;
CPU_ZERO( &mask );
CPU_SET( tse_3rd, &mask );
break;
case 3:
tseCnt_=0;
CPU_ZERO( &mask );
CPU_SET( tse_4th, &mask );
break;
} //end of switch
rc = sched_setaffinity( pid, sizeof(cpu_set_t), &mask );
} // end of type == tse
else
{
// Let it float
CPU_ZERO( &mask );
for ( int i=0; i<lnodes_->GetNode()->GetNumCores(); i++ )
{
CPU_SET( i, &mask );
}
rc = sched_setaffinity( pid, sizeof(cpu_set_t), &mask );
}
if ( rc )
{
sprintf( la_buf, "[CLNode::SetAffinity], Can't set processor affinity.\n" );
mon_log_write( MON_LNODE_SETAFFINITY_1, SQ_LOG_ERR, la_buf );
}
TRACE_EXIT;
}
#endif
#ifndef NAMESERVER_PROCESS
void CLNode::SetAffinity( CProcess *process )
{
int rc = 0;
char coreMaskStr[MAX_CORES+1];
cpu_set_t mask;
char la_buf[MON_STRING_BUF_SIZE];
const char method_name[] = "CLNode::SetAffinity";
TRACE_ENTRY;
if ( usingTseCpuAffinity && process->GetType( ) == ProcessType_TSE )
{
// round-robin the TSE affinity within the logical node mask
if ( !process->IsBackup() )
{
if ( lastTseCoreAssigned_ == -1 )
{
// always start with the first core and count it
lastTseCoreAssigned_ = firstCore_;
++tseCnt_;
}
else
{
// check for wrap around time
if ( tseCnt_ == numCores_ )
{
tseCnt_ = 1;
lastTseCoreAssigned_ = firstCore_;
}
else
{
++tseCnt_;
++lastTseCoreAssigned_;
}
}
if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
{
trace_printf("%s@%d - TSE process %s%s (%d, %d) affinity set to core=%d, tseCount=%d, numCores=%d." "\n"
, method_name, __LINE__, process->GetName(), process->IsBackup() ? "-B" : "-P"
, process->GetNid(), process->GetPid(), lastTseCoreAssigned_, tseCnt_, numCores_ );
}
CPU_ZERO( &mask );
CPU_SET( lastTseCoreAssigned_, &mask );
}
else
{
if ( lastBackupTseCoreAssigned_ == -1 )
{
// always start with the first core and count it
lastBackupTseCoreAssigned_ = firstCore_;
++tseBackupCnt_;
}
else
{
// check for wrap around time
if ( tseBackupCnt_ == numCores_ )
{
tseBackupCnt_ = 1;
lastBackupTseCoreAssigned_ = firstCore_;
}
else
{
++tseBackupCnt_;
++lastBackupTseCoreAssigned_;
}
}
if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
{
trace_printf("%s@%d - TSE process %s%s (%d, %d) affinity set to core=%d, tseCount=%d, numCores=%d." "\n"
, method_name, __LINE__, process->GetName(), process->IsBackup() ? "-B" : "-P"
, process->GetNid(), process->GetPid(), lastBackupTseCoreAssigned_, tseBackupCnt_, numCores_ );
}
CPU_ZERO( &mask );
CPU_SET( lastBackupTseCoreAssigned_, &mask );
}
rc = sched_setaffinity( process->GetPid(), sizeof(cpu_set_t), &mask );
}
else if ( CommType == CommType_InfiniBand &&
process->GetType( ) == ProcessType_Generic &&
process->isCmpOrEsp( ) )
{
CPU_ZERO( &mask );
short lv_corenum = 0;
do
{
CPU_SET( lv_corenum, &mask );
lv_corenum++;
}
while ( lv_corenum < (GetNode( )->GetNumCores( ) - 1) );
if ( trace_settings & (TRACE_REQUEST | TRACE_PROCESS) )
{
CoreMaskString( coreMaskStr, mask, GetNode( )->GetNumCores( ) );
trace_printf( "%s@%d - Generic process %s (%d, %d), cores=%d, affinity set to mask=%s" "\n"
, method_name, __LINE__, process->GetName( )
, process->GetNid( ), process->GetPid( ), GetNode( )->GetNumCores( ), coreMaskStr );
}
rc = sched_setaffinity( process->GetPid( ), sizeof(cpu_set_t), &mask );
}
else if ( usingCpuAffinity )
{
if (trace_settings & (TRACE_REQUEST | TRACE_PROCESS))
{
CoreMaskString( coreMaskStr, CoreMask, GetNode()->GetNumCores() );
trace_printf("%s@%d - process %s (%d, %d), cores=%d, affinity set to mask=%s" "\n"
, method_name, __LINE__, process->GetName()
, process->GetNid(), process->GetPid(), GetNode()->GetNumCores(), coreMaskStr );
}
// Use the configured cores
rc = sched_setaffinity( process->GetPid(), sizeof(cpu_set_t), &CoreMask );
}
else
{
// Let it float in the physical node
mask = GetNode( )->GetAffinityMask( );
if ( trace_settings & (TRACE_REQUEST | TRACE_PROCESS) )
{
CoreMaskString( coreMaskStr, mask, GetNode( )->GetNumCores( ) );
trace_printf( "%s@%d - process %s (%d, %d), cores=%d, affinity set to mask=%s" "\n"
, method_name, __LINE__, process->GetName( )
, process->GetNid( ), process->GetPid( ), GetNode( )->GetNumCores( ), coreMaskStr );
}
rc = sched_setaffinity( process->GetPid( ), sizeof(cpu_set_t), &mask );
}
if ( rc )
{
sprintf( la_buf, "[CLNode::SetAffinity], Can't set processor affinity.\n" );
mon_log_write( MON_LNODE_SETAFFINITY_2, SQ_LOG_ERR, la_buf );
}
TRACE_EXIT;
}
#endif
void CLNode::Up( void )
{
const char method_name[] = "CLNode::Up";
TRACE_ENTRY;
#ifndef NAMESERVER_PROCESS
struct message_def *msg;
char la_buf[MON_STRING_BUF_SIZE];
sprintf(la_buf, "[CLNode::Up], Node %d (%s) is up.\n", GetNid(), GetNode()->GetName());
mon_log_write(MON_LNODE_MARKUP, SQ_LOG_INFO, la_buf);
// Record statistics (sonar counters)
if (sonar_verify_state(SONAR_ENABLED | SONAR_MONITOR_ENABLED))
MonStats->notice_node_up_Incr();
// send node up message to our node's processes
msg = new struct message_def;
msg->type = MsgType_NodeUp;
msg->noreply = true;
msg->u.request.type = ReqType_Notice;
msg->u.request.u.up.nid = Nid;
msg->u.request.u.up.takeover = GetNode()->IsActivatingSpare();
#ifdef USE_SEQUENCE_NUM
msg->u.request.u.up.seqnum = Monitor->GetTimeSeqNum();
#endif
const char * nodeName = GetNode()->GetName();
if (IsRealCluster)
{
STRCPY(msg->u.request.u.up.node_name, nodeName);
}
else
{
sprintf(msg->u.request.u.up.node_name,"%s:%d", nodeName, Nid);
}
if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
{
trace_printf( "%s@%d - Broadcasting Node Up nid=%d, name=(%s), takeover=%d\n"
, method_name, __LINE__, GetNid()
, GetNode()->GetName(), msg->u.request.u.up.takeover );
}
MyNode->Bcast( msg );
delete msg;
#endif
TRACE_EXIT;
}
CLNodeContainer::CLNodeContainer(CNode *node)
:LNode(NULL)
,LastNid(0)
,lnodesCount_(0)
,indexToNid_(NULL)
,node_(node)
,head_(NULL)
,tail_(NULL)
{
const char method_name[] = "CLNodeContainer::CLNodeContainer";
TRACE_ENTRY;
// Add eyecatcher sequence as a debugging aid
memcpy(&eyecatcher_, "LCTR", 4);
TRACE_EXIT;
}
CLNodeContainer::~CLNodeContainer (void)
{
const char method_name[] = "CLNodeContainer::~CLNodeContainer";
TRACE_ENTRY;
if (node_ == NULL)
{ // In the main logical nodes container
CLNode *lnode = head_;
while (head_)
{
lnode->DeLink(&head_, &tail_);
delete lnode;
lnode = head_;
}
}
// Alter eyecatcher sequence as a debugging aid to identify deleted object
memcpy(&eyecatcher_, "lctr", 4);
TRACE_EXIT;
}
CLNode *CLNodeContainer::AddLNode( CLNodeConfig *lnodeConfig, CNode *node )
{
const char method_name[] = "CLNodeContainer::AddLNode";
TRACE_ENTRY;
assert( lnodeConfig != NULL );
CLNode *lnode = new CLNode( node->GetLNodeContainer()
, lnodeConfig->GetNid()
, lnodeConfig->GetCoreMask()
, lnodeConfig->GetProcessors()
, lnodeConfig->GetZoneType()
);
assert( lnode != NULL );
if (trace_settings & TRACE_INIT)
{
trace_printf( "%s@%d - Adding logical node object "
"(nid=%d) to lnodes container, "
"lnodesCount=%d\n"
, method_name, __LINE__
, lnode->GetNid()
, lnodesCount_ );
}
lnodesCount_++;
if (head_ == NULL)
{
head_ = tail_ = lnode;
}
else
{
tail_ = tail_->Link(lnode);
}
if (trace_settings & TRACE_INIT)
{
trace_printf( "%s@%d - Added logical node object "
"(nid=%d) to lnodes container, "
"lnodesCount=%d\n"
, method_name, __LINE__
, lnode->GetNid()
, lnodesCount_ );
}
TRACE_EXIT;
return lnode;
}
void CLNodeContainer::AddLNodeP( CLNode *lnode )
{
const char method_name[] = "CLNodeContainer::AddLNodeP";
TRACE_ENTRY;
if (!node_)
{
// Must only be called from physical node's logical node container
abort();
}
assert( lnode != NULL );
if (trace_settings & TRACE_INIT)
{
trace_printf( "%s@%d - Adding logical node object "
"(nid=%d) to (pnid=%d) lnodes container, "
"lnodesCount=%d\n"
, method_name, __LINE__
, lnode->GetNid()
, lnode->GetLNodeContainer()->GetNode()->GetPNid()
, lnodesCount_ );
}
lnodesCount_++;
if (head_ == NULL)
{
head_ = tail_ = lnode;
}
else
{
tail_ = tail_->LinkP(lnode);
}
if (trace_settings & TRACE_INIT)
{
trace_printf( "%s@%d - Added logical node object "
"(nid=%d) to (pnid=%d) lnodes container, "
"lnodesCount=%d\n"
, method_name, __LINE__
, lnode->GetNid()
, lnode->GetLNodeContainer()->GetNode()->GetPNid()
, lnodesCount_ );
}
TRACE_EXIT;
}
#ifndef NAMESERVER_PROCESS
void CLNodeContainer::CancelDeathNotification( int nid
, int pid
, int verifier
, _TM_Txid_External trans_id )
{
CLNode *lnode;
const char method_name[] = "CLNodeContainer::CancelDeathNotification";
TRACE_ENTRY;
for ( lnode=head_; lnode; lnode=lnode->GetNextP() )
{
lnode->CancelDeathNotification( nid, pid, verifier, trans_id);
}
TRACE_EXIT;
}
#endif
void CLNodeContainer::CheckForPendingCreates ( CProcess *process )
{
process = process;
//TODO
abort();
}
void CLNodeContainer::DeleteLNode( CLNode *lnode )
{
const char method_name[] = "CLNodeContainer::DeleteLNode";
TRACE_ENTRY;
int nid = lnode->GetNid();
if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
{
trace_printf( "%s@%d Deleting nid=%d)\n", method_name, __LINE__, nid );
}
if (node_)
{
// Must only be called from main (cluster's) logical node container
abort();
}
// delete logical node and remove from logical nodes array
lnode->DeLink(&head_, &tail_);
delete lnode;
lnodesCount_--;
TRACE_EXIT;
}
CLNode *CLNodeContainer::GetLNode(int nid)
{
const char method_name[] = "CLNodeContainer::GetLNode";
TRACE_ENTRY;
CLNode *lnode = head_;
while (lnode)
{
if ( lnode->GetNid() == nid )
{
break;
}
lnode = lnode->GetNext();
}
TRACE_EXIT;
return lnode;
}
CLNode *CLNodeContainer::GetLNode( char *process_name, CProcess **process,
bool checkstate, bool backupOk )
{
CLNode *lnode = head_;
CNode *node = lnode ? lnode->GetNode() : NULL;
CProcess *p_process;
CLNode *b_lnode = NULL;
CProcess *b_process = NULL;
const char method_name[] = "CLNodeContainer::GetLNode";
TRACE_ENTRY;
// Initialize return value
*process = NULL;
while (node)
{
if ( !node->IsSpareNode() &&
(node->GetState() == State_Up ||
node->GetState() == State_Shutdown) )
{
*process = node->CProcessContainer::GetProcess(process_name, checkstate);
if (*process)
{
p_process = *process;
if (trace_settings & (TRACE_REQUEST_DETAIL | TRACE_PROCESS_DETAIL))
trace_printf("%s@%d - process %s (%d, %d), backup=%d, backupOk=%d\n",
method_name, __LINE__,
p_process->GetName(), p_process->GetNid(),
p_process->GetPid(), p_process->IsBackup(),
backupOk);
if (!p_process->IsBackup())
{
lnode = LNode[p_process->GetNid()];
break;
}
else
{
// Save backup process and lnode
b_process = *process;
b_lnode = LNode[b_process->GetNid()];
}
}
}
lnode = lnode->GetNext ();
node = lnode ? lnode->GetNode() : NULL;
}
if ( !*process && backupOk )
{
// We did not find the primary and it's ok to return the backup
*process = b_process;
lnode = b_lnode;
}
TRACE_EXIT;
return lnode;
}
CLNode *CLNodeContainer::GetLNodeByMap(int index )
{
const char method_name[] = "CNodeContainer::GetLNodeByMap";
TRACE_ENTRY;
CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
CLNode *lnode = NULL;
if( index >= 0 && index < clusterConfig->GetLNodesCount() )
{
lnode = LNode[indexToNid_[index]];
}
TRACE_EXIT;
return lnode;
}
int CLNodeContainer::GetNidIndex( int nid )
{
const char method_name[] = "CNodeContainer::GetNidIndex";
TRACE_ENTRY;
CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
for (int i = 0; i < clusterConfig->GetLNodesCount(); i++ )
{
if (LNode[i]->GetNid() == nid)
{
return(i);
}
}
TRACE_EXIT;
return(-1);
}
CLNode *CLNodeContainer::GetLNodeNext( int nid, bool checkstate )
{
const char method_name[] = "CLNodeContainer::GetLNodeNext";
TRACE_ENTRY;
CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
CLNode *lnode = NULL;
for (int i = (nid+1); i < clusterConfig->GetLNodesCount(); i++ )
{
lnode = LNode[i];
if ( lnode )
{
if ( lnode->GetNid() > nid )
{
if (checkstate && lnode->GetState() == State_Up)
{
break; // found it
}
else
{
break; // found it
}
}
}
}
if ( lnode == NULL )
{
for (int i = 0; i < clusterConfig->GetLNodesCount(); i++ )
{
lnode = LNode[i];
if ( lnode )
{
if ( lnode->GetNid() <= nid )
{
if (checkstate && lnode->GetState() == State_Up)
{
break; // found it
}
else
{
break; // found it
}
}
}
}
}
TRACE_EXIT;
return lnode;
}
bool CLNodeContainer::IsMyNode( int nid )
{
bool found = false;
CLNode *lnode;
const char method_name[] = "CLNodeContainer::IsMyNode";
TRACE_ENTRY;
for ( lnode = head_; lnode; lnode = lnode->GetNextP() )
{
if ( lnode->Nid == nid )
{
found = true;
break;
}
}
TRACE_EXIT;
return found;
}
void CLNodeContainer::RemoveLNodeP( CLNode *lnode )
{
lnode->DeLinkP(&head_, &tail_);
}