[REEF-1719] Log machine status in TaskHostBase
JIRA:
[REEF-1719](https://issues.apache.org/jira/browse/REEF-1719)
This closes #1231
diff --git a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs
index 5f18aaf..a169575 100644
--- a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs
+++ b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/IMRUDriver.cs
@@ -756,6 +756,7 @@
if (waitingTasks.Any())
{
+ Logger.Log(Level.Info, "There are {0} tasks that timed out", waitingTasks.Count);
WaitingForCloseTaskNoResponseAction(waitingTasks);
}
break;
diff --git a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs
index 4ba9745..72e1d75 100644
--- a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs
+++ b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/Driver/TaskManager.cs
@@ -552,11 +552,12 @@
{
try
{
- return string.Format("State={0}, taskId={1}, ContextId={2}, evaluatorId={3}, evaluatorHost={4}",
+ return string.Format("State={0}, taskId={1}, ContextId={2}, evaluatorId={3}, TimeInCurrentStateinMs {4}, evaluatorHost={5}",
t.Value.TaskState.CurrentState,
t.Key,
t.Value.ActiveContext.Id,
t.Value.ActiveContext.EvaluatorId,
+ (DateTime.Now - t.Value.TimeStateUpdated).Milliseconds,
t.Value.ActiveContext.EvaluatorDescriptor.NodeDescriptor.HostName);
}
catch (Exception ex)
diff --git a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs
index 718a794..1bf1ff4 100644
--- a/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs
+++ b/lang/cs/Org.Apache.REEF.IMRU/OnREEF/IMRUTasks/TaskHostBase.cs
@@ -20,6 +20,7 @@
using System.Net.Sockets;
using System.Runtime.Remoting;
using System.Threading;
+using Org.Apache.REEF.Common.Runtime;
using Org.Apache.REEF.Common.Tasks;
using Org.Apache.REEF.Common.Tasks.Events;
using Org.Apache.REEF.IMRU.OnREEF.Driver;
@@ -64,6 +65,11 @@
protected readonly CancellationTokenSource _cancellationSource;
/// <summary>
+ /// Machine status for log purpose
+ /// </summary>
+ private readonly MachineStatus _machineStatus = new MachineStatus();
+
+ /// <summary>
/// Task host base class to hold the common stuff of both mapper and update tasks
/// </summary>
/// <param name="groupCommunicationsClient">Group Communication Client</param>
@@ -74,6 +80,7 @@
TaskCloseCoordinator taskCloseCoordinator,
bool invokeGc)
{
+ Logger.Log(Level.Info, "Entering TaskHostBase constructor with machine status {0}.", _machineStatus.ToString());
_groupCommunicationsClient = groupCommunicationsClient;
_communicationGroupClient = groupCommunicationsClient.GetCommunicationGroup(IMRUConstants.CommunicationGroupName);
@@ -88,7 +95,7 @@
/// </summary>
public byte[] Call(byte[] memento)
{
- Logger.Log(Level.Info, "Entering {0} Call().", TaskHostName);
+ Logger.Log(Level.Info, "Entering {0} Call() with machine status {1}.", TaskHostName, _machineStatus.ToString());
try
{
_groupCommunicationsClient.Initialize(_cancellationSource);
@@ -111,6 +118,7 @@
}
finally
{
+ Logger.Log(Level.Info, "TaskHostBase::Finally");
_taskCloseCoordinator.SignalTaskStopped();
}
Logger.Log(Level.Info, "{0} returned with cancellation token:{1}.", TaskHostName, _cancellationSource.IsCancellationRequested);