merge stroucki-dropthrift from trunk
Nodemanager.cfg, TashiDefaults.cfg: remove thrift config items


git-svn-id: https://svn.apache.org/repos/asf/incubator/tashi/branches/stroucki-dropthrift@1297655 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/etc/NodeManager.cfg b/etc/NodeManager.cfg
index a47bccf..48f4044 100644
--- a/etc/NodeManager.cfg
+++ b/etc/NodeManager.cfg
@@ -80,7 +80,6 @@
 statsInterval = 0.0
 ;accountingHost = clustermanager
 ;accountingPort = 2228
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
 
 [Security]
 authAndEncrypt = False
diff --git a/etc/TashiDefaults.cfg b/etc/TashiDefaults.cfg
index fd034eb..101da69 100644
--- a/etc/TashiDefaults.cfg
+++ b/etc/TashiDefaults.cfg
@@ -57,7 +57,6 @@
 allowDuplicateNames = False
 ;accountingHost = clustermanager
 ;accountingPort = 2228
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
 
 [GetentOverride]
 baseData = tashi.clustermanager.data.Pickled
@@ -110,7 +109,6 @@
 clusterManagerHost = localhost 
 clusterManagerPort = 9882
 statsInterval = 0.0
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
 
 [Qemu]
 qemuBin = /usr/bin/kvm
diff --git a/src/tashi/accounting/accounting.py b/src/tashi/accounting/accounting.py
index 93d2999..6856e49 100755
--- a/src/tashi/accounting/accounting.py
+++ b/src/tashi/accounting/accounting.py
@@ -17,6 +17,8 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+import os
+import time
 import sys
 import signal
 import logging.config
@@ -26,13 +28,13 @@
 #from rpyc.utils.authenticators import TlsliteVdbAuthenticator
 
 #from tashi.rpycservices.rpyctypes import *
-from tashi.util import getConfig, createClient, instantiateImplementation, boolean, debugConsole, signalHandler
+from tashi.util import getConfig, createClient, instantiateImplementation, boolean, debugConsole
 import tashi
 
 class Accounting(object):
-	def __init__(self, config, cmclient):
+	def __init__(self, config):
 		self.config = config
-		self.cm = cmclient
+		self.cm = createClient(config)
 		self.hooks = []
 		self.log = logging.getLogger(__file__)
 
@@ -62,25 +64,43 @@
 
 		debugConsole(globals())
 
-		try:
-			t.start()
-		except KeyboardInterrupt:
-			self.handleSIGTERM(signal.SIGTERM, None)
-
-	@signalHandler(signal.SIGTERM)
-	def handleSIGTERM(self, signalNumber, stackFrame):
-		self.log.info('Exiting cluster manager after receiving a SIGINT signal')
+		t.start()
+		# shouldn't exit by itself
 		sys.exit(0)
 
 def main():
 	(config, configFiles) = getConfig(["Accounting"])
 	publisher = instantiateImplementation(config.get("Accounting", "publisher"), config)
 	tashi.publisher = publisher
-	cmclient = createClient(config)
 	logging.config.fileConfig(configFiles)
-	accounting = Accounting(config, cmclient)
+	log = logging.getLogger(__name__)
+	log.info('Using configuration file(s) %s' % configFiles)
 
-	accounting.initAccountingServer()
+	accounting = Accounting(config)
+
+	# handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+	child = os.fork()
+
+	if child == 0:
+		accounting.initAccountingServer()
+		# shouldn't exit by itself
+		sys.exit(0)
+
+	else:
+		# main
+		try:
+			os.waitpid(child, 0)
+		except KeyboardInterrupt:
+			log.info("Exiting accounting service after receiving a SIGINT signal")
+			os._exit(0)
+		except Exception:
+			log.exception("Abnormal termination of accounting service")
+			os._exit(-1)
+
+		log.info("Exiting accounting service after service thread exited")
+		os._exit(-1)
+
+	return
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/agents/primitive.py b/src/tashi/agents/primitive.py
index d21b12f..2e70ec9 100755
--- a/src/tashi/agents/primitive.py
+++ b/src/tashi/agents/primitive.py
@@ -19,6 +19,7 @@
 
 import time
 import logging.config
+import sys
 
 from tashi.rpycservices.rpyctypes import Errors, HostState, InstanceState, TashiException
 
@@ -26,9 +27,9 @@
 import tashi
 
 class Primitive(object):
-	def __init__(self, config, cmclient):
+	def __init__(self, config):
 		self.config = config
-		self.cm = cmclient
+		self.cm = createClient(config)
 		self.hooks = []
 		self.log = logging.getLogger(__file__)
 		self.scheduleDelay = float(self.config.get("Primitive", "scheduleDelay"))
@@ -250,7 +251,7 @@
 				for i in oldInstances:
 					# XXXstroucki what about paused and saved VMs?
 					# XXXstroucki: do we need to look at Held VMs here?
-					if (i not in self.instances and (oldInstances[i].state == InstanceState.Running or oldInstances[i].state == InstanceState.Destroying)):
+					if (i not in self.instances and (oldInstances[i].state == InstanceState.Running or oldInstances[i].state == InstanceState.Destroying or oldInstances[i].state == InstanceState.ShuttingDown)):
 						self.log.info("VM exited: %s" % (oldInstances[i].name))
 						for hook in self.hooks:
 							hook.postDestroy(oldInstances[i])
@@ -283,10 +284,17 @@
 	(config, configFiles) = getConfig(["Agent"])
 	publisher = instantiateImplementation(config.get("Agent", "publisher"), config)
 	tashi.publisher = publisher
-	cmclient = createClient(config)
 	logging.config.fileConfig(configFiles)
-	agent = Primitive(config, cmclient)
-	agent.start()
+	agent = Primitive(config)
+
+	try:
+		agent.start()
+	except KeyboardInterrupt:
+		pass
+
+	log = logging.getLogger(__file__)
+	log.info("Primitive exiting")
+	sys.exit(0)
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/clustermanager/clustermanager.py b/src/tashi/clustermanager/clustermanager.py
index db61194..818bbb0 100755
--- a/src/tashi/clustermanager/clustermanager.py
+++ b/src/tashi/clustermanager/clustermanager.py
@@ -17,11 +17,12 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+import os
 import sys
-import signal
+import time
 import logging.config
 
-from tashi.util import signalHandler, boolean, instantiateImplementation, getConfig, debugConsole
+from tashi.util import boolean, instantiateImplementation, getConfig, debugConsole
 import tashi
 
 from tashi.rpycservices import rpycservices
@@ -54,19 +55,11 @@
 	t.service._type = 'ClusterManagerService'
 
 	debugConsole(globals())
-	
-	try:
-		t.start()
-	except KeyboardInterrupt:
-		handleSIGTERM(signal.SIGTERM, None)
 
-@signalHandler(signal.SIGTERM)
-def handleSIGTERM(signalNumber, stackFrame):
-	global log
+	t.start()
+	# shouldn't exit by itself
+	return
 
-	log.info('Exiting cluster manager after receiving a SIGINT signal')
-	sys.exit(0)
-	
 def main():
 	global log
 	
@@ -80,7 +73,32 @@
 	
 	# bind the database
 	log.info('Starting cluster manager')
-	startClusterManager(config)
+
+	# handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+	child = os.fork()
+
+	if child == 0:
+		startClusterManager(config)
+		# shouldn't exit by itself
+		sys.exit(0)
+
+	else:
+		# main
+		try:
+			os.waitpid(child, 0)
+		except KeyboardInterrupt:
+			log.info("Exiting cluster manager after receiving a SIGINT signal")
+			os._exit(0)
+		except Exception:
+			log.exception("Abnormal termination of cluster manager")
+			os._exit(-1)
+
+		log.info("Exiting cluster manager after service thread exited")
+		os._exit(-1)
+
+	return
+
+
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/nodemanager/nodemanager.py b/src/tashi/nodemanager/nodemanager.py
index 66d2d5b..2c39903 100755
--- a/src/tashi/nodemanager/nodemanager.py
+++ b/src/tashi/nodemanager/nodemanager.py
@@ -20,8 +20,10 @@
 import logging.config
 import signal
 import sys
+import os
+import time
 
-from tashi.util import instantiateImplementation, getConfig, debugConsole, signalHandler
+from tashi.util import instantiateImplementation, getConfig, debugConsole
 import tashi
 from tashi import boolean
 
@@ -29,12 +31,8 @@
 from rpyc.utils.server import ThreadedServer
 from rpyc.utils.authenticators import TlsliteVdbAuthenticator
 
-@signalHandler(signal.SIGTERM)
-def handleSIGTERM(signalNumber, stackFrame):
-	sys.exit(0)
-
 def main():
-	global config, dfs, vmm, service, server, log, notifier
+	global config, log
 	
 	(config, configFiles) = getConfig(["NodeManager"])
 	publisher = instantiateImplementation(config.get("NodeManager", "publisher"), config)
@@ -42,6 +40,35 @@
 	logging.config.fileConfig(configFiles)
 	log = logging.getLogger(__name__)
 	log.info('Using configuration file(s) %s' % configFiles)
+
+	# handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+	child = os.fork()
+	
+	if child == 0:
+		startNodeManager()
+		# shouldn't exit by itself
+		sys.exit(0)
+
+	else:
+		# main
+		try:
+			os.waitpid(child, 0)
+		except KeyboardInterrupt:
+			log.info("Exiting node manager after receiving a SIGINT signal")
+			os._exit(0)
+		except Exception:
+			log.exception("Abnormal termination of node manager")
+			os._exit(-1)
+
+		log.info("Exiting node manager after service thread exited")
+		os._exit(-1)
+
+	return
+
+def startNodeManager():
+	global config, dfs, vmm, service, server, log, notifier
+	publisher = instantiateImplementation(config.get("NodeManager", "publisher"), config)
+	tashi.publisher = publisher
 	dfs = instantiateImplementation(config.get("NodeManager", "dfs"), config)
 	vmm = instantiateImplementation(config.get("NodeManager", "vmm"), config, dfs, None)
 	service = instantiateImplementation(config.get("NodeManager", "service"), config, vmm)
@@ -59,14 +86,11 @@
 	t.service._type = 'NodeManagerService'
 
 	debugConsole(globals())
-	
-	try:
-		t.start()
-	except KeyboardInterrupt:
-		handleSIGTERM(signal.SIGTERM, None)
-	except Exception, e:
-		sys.stderr.write(str(e) + "\n")
-		sys.exit(-1)
+
+	t.start()
+	# shouldn't exit by itself
+	sys.exit(0)
+
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/nodemanager/vmcontrol/qemu.py b/src/tashi/nodemanager/vmcontrol/qemu.py
index e89a1a2..99c7edc 100644
--- a/src/tashi/nodemanager/vmcontrol/qemu.py
+++ b/src/tashi/nodemanager/vmcontrol/qemu.py
@@ -867,10 +867,14 @@
 		myTicks = userTicks + sysTicks
 		vsize = (int(ws[22]))/1024.0/1024.0
 		rss = (int(ws[23])*4096)/1024.0/1024.0
-		cpuSeconds = myTicks/ticksPerSecond
-		lastCpuSeconds = cpuStats.get(vmId, cpuSeconds)
-		cpuLoad = (cpuSeconds - lastCpuSeconds)/(now - last)
-		cpuStats[vmId] = cpuSeconds
+		cpuSeconds = myTicks/self.ticksPerSecond
+		# XXXstroucki be more exact here?
+		last = time.time() - self.statsInterval
+		lastCpuSeconds = self.cpuStats.get(vmId, cpuSeconds)
+		if lastCpuSeconds is None:
+			lastCpuSeconds = cpuSeconds
+		cpuLoad = (cpuSeconds - lastCpuSeconds)/(time.time() - last)
+		self.cpuStats[vmId] = cpuSeconds
 		try:
 			child = self.controlledVMs[vmId]
 		except:
@@ -880,7 +884,7 @@
 		(recvMBs, sendMBs, recvBytes, sendBytes) = (0.0, 0.0, 0.0, 0.0)
 		for i in range(0, len(child.instance.nics)):
 			netDev = "%s%d.%d" % (self.ifPrefix, child.instance.id, i)
-			(tmpRecvMBs, tmpSendMBs, tmpRecvBytes, tmpSendBytes) = netStats.get(netDev, (0.0, 0.0, 0.0, 0.0))
+			(tmpRecvMBs, tmpSendMBs, tmpRecvBytes, tmpSendBytes) = self.netStats.get(netDev, (0.0, 0.0, 0.0, 0.0))
 			(recvMBs, sendMBs, recvBytes, sendBytes) = (recvMBs + tmpRecvMBs, sendMBs + tmpSendMBs, recvBytes + tmpRecvBytes, sendBytes + tmpSendBytes)
 		self.stats[vmId] = self.stats.get(vmId, {})
 		child = self.controlledVMs.get(vmId, None)
@@ -902,9 +906,9 @@
 
 	# thread
 	def statsThread(self):
-		ticksPerSecond = float(os.sysconf('SC_CLK_TCK'))
-		netStats = {}
-		cpuStats = {}
+		self.ticksPerSecond = float(os.sysconf('SC_CLK_TCK'))
+		self.netStats = {}
+		self.cpuStats = {}
 		# XXXstroucki be more exact here?
 		last = time.time() - self.statsInterval
 		while True:
@@ -920,7 +924,7 @@
 						ws = ld.split()
 						recvBytes = float(ws[0])
 						sendBytes = float(ws[8])
-						(recvMBs, sendMBs, lastRecvBytes, lastSendBytes) = netStats.get(dev, (0.0, 0.0, recvBytes, sendBytes))
+						(recvMBs, sendMBs, lastRecvBytes, lastSendBytes) = self.netStats.get(dev, (0.0, 0.0, recvBytes, sendBytes))
 						if (recvBytes < lastRecvBytes):
 							# We seem to have overflowed
 							# XXXstroucki How likely is this to happen?
@@ -936,7 +940,7 @@
 								lastSendBytes = lastSendBytes - 2**32
 						recvMBs = (recvBytes-lastRecvBytes)/(now-last)/1024.0/1024.0
 						sendMBs = (sendBytes-lastSendBytes)/(now-last)/1024.0/1024.0
-						netStats[dev] = (recvMBs, sendMBs, recvBytes, sendBytes)
+						self.netStats[dev] = (recvMBs, sendMBs, recvBytes, sendBytes)
 
 
 				for vmId in self.controlledVMs:
diff --git a/src/tashi/util.py b/src/tashi/util.py
index 03006a7..dd8a28d 100644
--- a/src/tashi/util.py
+++ b/src/tashi/util.py
@@ -211,27 +211,27 @@
 		raise Exception("No config file could be found: %s" % (str(allLocations)))
 	return (config, configFiles)
 
-def __getShellFn():
+def __getShellFn(globalDict):
 	if sys.version_info < (2, 6, 1):
 		from IPython.Shell import IPShellEmbed
-		return IPShellEmbed()
+		return IPShellEmbed(user_ns=globalDict)
 	else:
 		import IPython
-		return IPython.embed()
+		return IPython.embed(user_ns=globalDict)
 
 def debugConsole(globalDict):
 	"""A debugging console that optionally uses pysh"""
 	def realDebugConsole(globalDict):
 		try :
 			import atexit
-			shellfn = __getShellFn()
+			shellfn = __getShellFn(globalDict)
 			def resetConsole():
 # XXXpipe: make input window sane
 				(stdin, stdout) = os.popen2("reset")
 				stdout.read()
 			dbgshell = shellfn()
 			atexit.register(resetConsole)
-			dbgshell(local_ns=globalDict, global_ns=globalDict)
+			dbgshell()
 		except Exception:
 			CONSOLE_TEXT=">>> "
 			input = " "