clustermanagerservice: in vmupdate, handle MigrateTrans->Exited transition explicitly
nodemanagerservice: name my threads
qemu: use self.controlledVMs in matchHostPids thread
qemu: let NM know of an exit regardless whether it was migrating out
git-svn-id: https://svn.apache.org/repos/asf/incubator/tashi/trunk@1349038 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/tashi/clustermanager/clustermanagerservice.py b/src/tashi/clustermanager/clustermanagerservice.py
index 28a29c5..081862e 100644
--- a/src/tashi/clustermanager/clustermanagerservice.py
+++ b/src/tashi/clustermanager/clustermanagerservice.py
@@ -642,18 +642,24 @@
if (instance.state == InstanceState.Exited):
# determine why a VM has exited
hostname = self.data.getHost(oldInstance.hostId).name
+
if (oldInstance.state not in [InstanceState.ShuttingDown, InstanceState.Destroying, InstanceState.Suspending]):
self.log.warning('Unexpected exit on %s of instance %s (vmId %d)' % (hostname, oldInstance.name, oldInstance.vmId))
+
if (oldInstance.state == InstanceState.Suspending):
self.__stateTransition(oldInstance, InstanceState.Suspending, InstanceState.Suspended)
oldInstance.hostId = None
oldInstance.vmId = None
self.data.releaseInstance(oldInstance)
+
+ if (oldInstance.state == InstanceState.MigrateTrans):
+ # Just await update from target host
+ self.data.releaseInstance(oldInstance)
+
else:
del self.instanceLastContactTime[oldInstance.id]
- self.log.worning("removing %s" % oldInstance)
self.data.removeInstance(oldInstance)
- self.log.warning("done remove %s" % oldInstance)
+
else:
if (instance.state):
# XXXstroucki does this matter?
diff --git a/src/tashi/nodemanager/nodemanagerservice.py b/src/tashi/nodemanager/nodemanagerservice.py
index 3af587e..f536ea1 100755
--- a/src/tashi/nodemanager/nodemanagerservice.py
+++ b/src/tashi/nodemanager/nodemanagerservice.py
@@ -78,8 +78,8 @@
self.id = self.cm.registerNodeManager(self.host, self.instances.values())
# start service threads
- threading.Thread(target=self.__registerWithClusterManager).start()
- threading.Thread(target=self.__statsThread).start()
+ threading.Thread(name="registerWithClusterManager", target=self.__registerWithClusterManager).start()
+ threading.Thread(name="statsThread", target=self.__statsThread).start()
def __initAccounting(self):
self.accountBuffer = []
diff --git a/src/tashi/nodemanager/vmcontrol/qemu.py b/src/tashi/nodemanager/vmcontrol/qemu.py
index 0caedfc..865a5ba 100644
--- a/src/tashi/nodemanager/vmcontrol/qemu.py
+++ b/src/tashi/nodemanager/vmcontrol/qemu.py
@@ -162,14 +162,14 @@
"""Will return a dict of instances by vmId to the caller"""
return dict((x, self.controlledVMs[x].instance) for x in self.controlledVMs.keys())
- def __matchHostPids(self, controlledVMs):
+ def __matchHostPids(self):
"""This is run in a separate polling thread and it must do things that are thread safe"""
- vmIds = controlledVMs.keys()
+ vmIds = self.controlledVMs.keys()
pids = self.__getHostPids()
for vmId in vmIds:
- child = controlledVMs[vmId]
+ child = self.controlledVMs[vmId]
instance = child.instance
name = instance.name
@@ -180,9 +180,9 @@
# remove info file
os.unlink(self.INFO_DIR + "/%d"%(vmId))
- # XXXstroucki why not use self.controlledVMs
- # argument, so why modify this fn's formal?
- del controlledVMs[vmId]
+ # XXXstroucki python should handle
+ # locking here (?)
+ del self.controlledVMs[vmId]
# remove any stats (appropriate?)
try:
@@ -230,8 +230,11 @@
# let the NM know
try:
- if (not child.migratingOut):
- self.nm.vmStateChange(vmId, None, InstanceState.Exited)
+ # XXXstroucki: we don't want to treat
+ # the source VM of a migration exiting
+ # as an actual
+ # exit, but the NM should probably know.
+ self.nm.vmStateChange(vmId, None, InstanceState.Exited)
except Exception:
log.exception("vmStateChange failed for VM %s" % (name))
else:
@@ -290,7 +293,7 @@
while True:
try:
time.sleep(self.POLL_DELAY)
- self.__matchHostPids(self.controlledVMs)
+ self.__matchHostPids()
except:
log.exception("Exception in poolVMsLoop")