merge Luke's branch with the trunk


git-svn-id: https://svn.apache.org/repos/asf/incubator/tashi/branches/luke-zoni-staging@1351880 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/INSTALL b/INSTALL
index 6240c35..127cc2b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -124,6 +124,7 @@
 given by the hostname command. If you plan on eventually having several 
 hosts and networks, feel free to add them now.
 
+root@grml:/usr/local/tashi# cd bin
 root@grml:/usr/local/tashi/bin# DEBUG=1 ./clustermanager
 2012-01-26 23:12:33,972 [./clustermanager:INFO] Using configuration file(s) ['/usr/local/tashi/etc/TashiDefaults.cfg']
 2012-01-26 23:12:33,972 [./clustermanager:INFO] Starting cluster manager
@@ -152,15 +153,14 @@
 
 In [1]: from tashi.rpycservices.rpyctypes import Host, HostState, Network
 
-In [2]: data.baseDataObject.hosts[1] = Host(d={'id':1,'name':'grml','state': HostState.Normal,'up':False})
+In [2]: data.baseDataObject.hosts[0] = Host(d={'id':0,'name':'grml','state': HostState.Normal,'up':False})
 
-In [3]: data.baseDataObject.networks[1]=Network(d={'id':0,'name':'default'})
+In [3]: data.baseDataObject.networks[0]=Network(d={'id':0,'name':'My Network'})
 
 In [4]: data.baseDataObject.save()
 
-In [5]: import os
-
-In [6]: os.kill(os.getpid(), 9)
+In [5]: (^C)
+2012-03-07 20:00:00,456 [./bin/clustermanager:INFO] Exiting cluster manager after receiving a SIGINT signal
 
 Run the cluster manager in the background:
 root@grml:/usr/local/tashi/bin# ./clustermanager &
diff --git a/Makefile b/Makefile
index 57655a4..618050d 100644
--- a/Makefile
+++ b/Makefile
@@ -33,6 +33,7 @@
 	mkdir apache-tashi
 	cp -rp doc etc Makefile src DISCLAIMER INSTALL LICENSE NOTICE README apache-tashi/
 	find apache-tashi -type d -name ".svn"|xargs rm -rf
+	-chgrp -R incubator apache-tashi
 	tar zcf apache-tashi.tar.gz apache-tashi
 	rm -rf apache-tashi
 
@@ -117,7 +118,7 @@
 #  Zoni 
 bin/zoni-cli:
 	@echo Symlinking in zoni-cli...
-	(cd bin; ln -s ../src/zoni/client/zoni-cli.py .)
+	(cd bin; ln -s ../src/zoni/client/zoni-cli.py zoni-client)
 # why necessarily put this in /usr/local/bin like nothing else?
 usr/local/bin/zoni:
 	@echo Creating /usr/local/bin/zoni
@@ -127,11 +128,11 @@
 	if test -e /usr/local/bin/zoni; then echo Removing zoni...; rm /usr/local/bin/zoni; fi
 
 ## for now only print warnings having to do with bad indentation. pylint doesn't make it easy to enable only 1,2 checks
-disabled_warnings=$(shell pylint --list-msgs|grep :W0| awk -F: '{ORS=","; if ($$2 != "W0311" && $$2 != "W0312"){ print $$2}}')
+disabled_warnings=$(shell pylint --list-msgs|grep :W0| awk -F: '{ORS=","; if ($$2 != "W0311" && $$2 != "W0312"){ print $$2}}')",F0401"
 pysrc=$(shell find . \! -path '*gen-py*' \! -path '*services*' \! -path '*messagingthrift*' \! -name '__init__.py' -name "*.py")
 tidy: $(addprefix tidyfile/,$(pysrc))
-	@echo Insuring .py files are nice and tidy!
+	@echo Ensured .py files are nice and tidy!
 
 tidyfile/%: %
 	@echo Checking tidy for $*
-	pylint --report=no --disable-msg-cat=R,C,E --disable-msg=$(disabled_warnings) --indent-string="\t" $* 2> /dev/null; 
+	pylint --report=no --disable=R,C,E --disable=$(disabled_warnings) --indent-string="\t" $* 2> /dev/null; 
diff --git a/doc/DEVELOPMENT b/doc/DEVELOPMENT
index 32d1b20..ac6197c 100644
--- a/doc/DEVELOPMENT
+++ b/doc/DEVELOPMENT
@@ -8,3 +8,9 @@
 
 Other ideas:
    * Make available a console aggregator for user's VMs.
+
+Python caveats:
+   * We've liked to use variables like bin, id, sum, input, etc. are
+     built-in, and will be flagged by pydev
+   * pydev does not like python modules with a dash in the name
+
diff --git a/doc/INSTALL2 b/doc/INSTALL2
index 66ad7dc..34c437f 100644
--- a/doc/INSTALL2
+++ b/doc/INSTALL2
@@ -48,6 +48,16 @@
 Note that the entire path of a network connection must be configured to 
 use jumbo frames, if the virtual machines are to use them.
 
+If you have large numbers of VLANs, and don't want to hardcode them into
+each VM host, you can find a sample qemu-ifup in the doc directory. This
+script will need to be adapted to your local standards by changing the
+basic parameters at the top. This script can then be linked to by the name
+Tashi expects them to have. For example, if you have a VLAN 1001, you will
+create a link from /etc/qemu-ifup.1001 to this script.
+
+The script will handle the creation of the VM interface, and creation of the
+bridge and VLANs if they haven't been created before.
+
 ---+ Accounting server
 
 An accounting server is available in the distribution. It will log 
diff --git a/doc/sample.qemu-ifup b/doc/sample.qemu-ifup
new file mode 100644
index 0000000..3323211
--- /dev/null
+++ b/doc/sample.qemu-ifup
@@ -0,0 +1,51 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# by Richard Gass and Michael Stroucken
+
+# Adapt the following two parameters to your installation
+# Uplink interface
+UPLINKIF="eth0"  
+# Prefix for bridge naming
+BRIDGEPREFIX="br"
+
+vlanID=$(echo $0 | awk -F "ifup." '{print $2}')
+vmIf=$1
+
+#  see if tagged interface exists
+bridgeUplinkIf="${UPLINKIF}.${vlanID}"
+cat /proc/net/vlan/config | grep "${bridgeUplinkIf} "
+if [ $? -gt 0 ];then
+        echo "creating tagged interface"
+        vconfig add ${UPLINKIF} ${vlanID}
+        ip link set ${bridgeUplinkIf} up
+fi
+ 
+#  Check for the bridge
+bridgeName="${BRIDGEPREFIX}${vlanID}"
+brctl show | grep "^${bridgeName}"  
+if [ $? -gt 0 ];then
+        echo "creating bridge interface"
+        brctl addbr ${bridgeName}
+        brctl addif ${bridgeName} ${bridgeUplinkIf}
+        ip link set ${bridgeName} up
+fi
+ 
+/sbin/ifconfig ${vmIf} 0.0.0.0 up
+/usr/sbin/brctl addif ${bridgeName} ${vmIf}
+exit 0
diff --git a/etc/NodeManager.cfg b/etc/NodeManager.cfg
index a47bccf..48f4044 100644
--- a/etc/NodeManager.cfg
+++ b/etc/NodeManager.cfg
@@ -80,7 +80,6 @@
 statsInterval = 0.0
 ;accountingHost = clustermanager
 ;accountingPort = 2228
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
 
 [Security]
 authAndEncrypt = False
diff --git a/etc/TashiDefaults.cfg b/etc/TashiDefaults.cfg
index fd034eb..1472174 100644
--- a/etc/TashiDefaults.cfg
+++ b/etc/TashiDefaults.cfg
@@ -54,10 +54,10 @@
 allowMismatchedVersions = False
 maxMemory = 8192
 maxCores = 8
+defaultNetwork = 0
 allowDuplicateNames = False
 ;accountingHost = clustermanager
 ;accountingPort = 2228
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
 
 [GetentOverride]
 baseData = tashi.clustermanager.data.Pickled
@@ -110,11 +110,9 @@
 clusterManagerHost = localhost 
 clusterManagerPort = 9882
 statsInterval = 0.0
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
 
 [Qemu]
 qemuBin = /usr/bin/kvm
-infoDir = /var/tmp/VmControlQemu/
 pollDelay = 1.0
 migrationRetries = 10
 monitorTimeout = 60.0
@@ -124,6 +122,9 @@
 statsInterval = 0.0
 scratchDir = /tmp
 scratchVg = vgscratch
+suspendHandler = gzip
+resumeHandler = zcat
+reservedMem = 512
 
 [XenPV]
 vmNamePrefix = tashi
diff --git a/src/tashi/accounting/accounting.py b/src/tashi/accounting/accounting.py
index 93d2999..698379a 100755
--- a/src/tashi/accounting/accounting.py
+++ b/src/tashi/accounting/accounting.py
@@ -17,8 +17,8 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+import os
 import sys
-import signal
 import logging.config
 
 from tashi.rpycservices import rpycservices
@@ -26,13 +26,15 @@
 #from rpyc.utils.authenticators import TlsliteVdbAuthenticator
 
 #from tashi.rpycservices.rpyctypes import *
-from tashi.util import getConfig, createClient, instantiateImplementation, boolean, debugConsole, signalHandler
+from tashi.util import createClient, instantiateImplementation, debugConsole
+from tashi.utils.config import Config
+
 import tashi
 
 class Accounting(object):
-	def __init__(self, config, cmclient):
+	def __init__(self, config):
 		self.config = config
-		self.cm = cmclient
+		self.cm = createClient(config)
 		self.hooks = []
 		self.log = logging.getLogger(__file__)
 
@@ -43,17 +45,20 @@
 			name = name.lower()
 			if (name.startswith("hook")):
 				try:
-					self.hooks.append(instantiateImplementation(value, config, cmclient, False))
+					self.hooks.append(instantiateImplementation(value, self.config, self.cm, False))
 				except:
 					self.log.exception("Failed to load hook %s" % (value))
 					
 	def initAccountingServer(self):
 		service = instantiateImplementation(self.config.get("Accounting", "service"), self.config)
 
+		#XXXstroucki: disabled authAndEncrypt for now
 		#if boolean(self.config.get("Security", "authAndEncrypt")):
 		if False:
 			pass
 		else:
+			# XXXstroucki: ThreadedServer is liable to have
+			# exceptions within if an endpoint is lost.
 			t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(self.config.get('AccountingService', 'port')), auto_register=False)
 
 		t.logger.setLevel(logging.ERROR)
@@ -62,25 +67,44 @@
 
 		debugConsole(globals())
 
-		try:
-			t.start()
-		except KeyboardInterrupt:
-			self.handleSIGTERM(signal.SIGTERM, None)
-
-	@signalHandler(signal.SIGTERM)
-	def handleSIGTERM(self, signalNumber, stackFrame):
-		self.log.info('Exiting cluster manager after receiving a SIGINT signal')
+		t.start()
+		# shouldn't exit by itself
 		sys.exit(0)
 
 def main():
-	(config, configFiles) = getConfig(["Accounting"])
+	config = Config(["Accounting"])
+	configFiles = config.getFiles()
 	publisher = instantiateImplementation(config.get("Accounting", "publisher"), config)
 	tashi.publisher = publisher
-	cmclient = createClient(config)
 	logging.config.fileConfig(configFiles)
-	accounting = Accounting(config, cmclient)
+	log = logging.getLogger(__name__)
+	log.info('Using configuration file(s) %s' % configFiles)
 
-	accounting.initAccountingServer()
+	accounting = Accounting(config)
+
+	# handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+	child = os.fork()
+
+	if child == 0:
+		accounting.initAccountingServer()
+		# shouldn't exit by itself
+		sys.exit(0)
+
+	else:
+		# main
+		try:
+			os.waitpid(child, 0)
+		except KeyboardInterrupt:
+			log.info("Exiting accounting service after receiving a SIGINT signal")
+			os._exit(0)
+		except Exception:
+			log.exception("Abnormal termination of accounting service")
+			os._exit(-1)
+
+		log.info("Exiting accounting service after service thread exited")
+		os._exit(-1)
+
+	return
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/accounting/accountingservice.py b/src/tashi/accounting/accountingservice.py
index b1c035a..56c1c90 100644
--- a/src/tashi/accounting/accountingservice.py
+++ b/src/tashi/accounting/accountingservice.py
@@ -5,15 +5,15 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #   http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
-# under the License.    
+# under the License.
 
 import logging
 import threading
@@ -22,43 +22,43 @@
 from tashi import createClient
 
 class AccountingService(object):
-        """RPC service for the Accounting service"""
-        
-        def __init__(self, config):
-            self.log = logging.getLogger(__name__)
-            self.log.setLevel(logging.INFO)
+	"""RPC service for the Accounting service"""
 
-	    self.config = config
+	def __init__(self, config):
+		self.log = logging.getLogger(__name__)
+		self.log.setLevel(logging.INFO)
 
-	    self.pollSleep = None
+		self.config = config
 
-	    # XXXstroucki new python has fallback values
-	    try:
-		    self.pollSleep = self.config.getint("AccountingService", "pollSleep")
-	    except:
-		    pass
+		self.pollSleep = None
 
-	    if self.pollSleep is None:
-		    self.pollSleep = 600
+		# XXXstroucki new python has fallback values
+		try:
+			self.pollSleep = self.config.getint("AccountingService", "pollSleep")
+		except:
+			pass
 
-            self.cm = createClient(config)
-            threading.Thread(target=self.__start).start()
+		if self.pollSleep is None:
+			self.pollSleep = 600
+
+		self.cm = createClient(config)
+		threading.Thread(target=self.__start).start()
 
 	# remote
-        def record(self, strings):
-            for string in strings:
-                self.log.info("Remote: %s" % (string))
+	def record(self, strings):
+		for string in strings:
+			self.log.info("Remote: %s" % (string))
 
-        def __start(self):
-            while True:
-                try:
-                    instances = self.cm.getInstances()
-                    for instance in instances:
-                        # XXXstroucki this currently duplicates what the CM was doing.
-                        self.log.info('Accounting: id %d host %d vmId %d user %d cores %d memory %d' % (instance.id, instance.hostId, instance.vmId, instance.userId, instance.cores, instance.memory))
-                except:
-                    self.log.warning("Accounting iteration failed")
+	def __start(self):
+		while True:
+			try:
+				instances = self.cm.getInstances()
+				for instance in instances:
+					# XXXstroucki this currently duplicates what the CM was doing.
+					self.log.info('Accounting: id %s host %s vmId %s user %s cores %s memory %s' % (instance.id, instance.hostId, instance.vmId, instance.userId, instance.cores, instance.memory))
+			except:
+				self.log.warning("Accounting iteration failed")
 
-                        
-                # wait to do the next iteration
-                time.sleep(self.pollSleep)
+
+			# wait to do the next iteration
+			time.sleep(self.pollSleep)
diff --git a/src/tashi/agents/dhcpdns.py b/src/tashi/agents/dhcpdns.py
index a1741e1..9e95843 100644
--- a/src/tashi/agents/dhcpdns.py
+++ b/src/tashi/agents/dhcpdns.py
@@ -22,7 +22,7 @@
 import subprocess
 import time
 from instancehook import InstanceHook
-from tashi.rpycservices.rpyctypes import Instance, NetworkConfiguration
+from tashi.rpycservices.rpyctypes import Instance
 from tashi import boolean
 
 class DhcpDns(InstanceHook):
@@ -55,15 +55,21 @@
 		self.ipMax = {}
 		self.currentIP = {}
 		self.usedIPs = {}
-		for k in self.ipRange:
-			ipRange = self.ipRange[k]
-			(min, max) = ipRange.split("-")	
-			min = min.strip()
-			max = max.strip()
-			ipNum = self.strToIp(min)
-			self.ipMin[k] = self.strToIp(min)
-			self.ipMax[k] = self.strToIp(max)
-			self.currentIP[k] = self.ipMin[k]
+
+		self.initIPs()
+
+	def initIPs(self):
+		self.usedIPs = {}
+		for network in self.ipRange:
+			ipRange = self.ipRange[network]
+			(ipMin, ipMax) = ipRange.split("-")	
+			ipMin = ipMin.strip()
+			ipMax = ipMax.strip()
+			ipNum = self.strToIp(ipMin)
+			self.ipMin[network] = self.strToIp(ipMin)
+			self.ipMax[network] = self.strToIp(ipMax)
+			self.currentIP[network] = self.ipMin[network]
+
 		instances = self.client.getInstances()
 		for i in instances:
 			for nic in i.nics:
@@ -72,7 +78,7 @@
 					ipNum = self.strToIp(ip)
 					self.log.info('Added %s->%s during reinitialization' % (i.name, ip))
 					self.usedIPs[ipNum] = ip
-				except Exception, e:
+				except Exception:
 					pass
 		
 	def strToIp(self, s):
@@ -87,12 +93,17 @@
 		return "%d.%d.%d.%d" % ((ip>>24)&0xff, (ip>>16)&0xff, (ip>>8)&0xff, ip&0xff)
 	
 	def allocateIP(self, nic):
+		# XXXstroucki: if the network is not defined having an ip
+		# range, this will throw a KeyError. Should be logged.
 		network = nic.network
 		allocatedIP = None
 		requestedIP = self.strToIp(nic.ip)
 		wrapToMinAlready = False
 		if (requestedIP <= self.ipMax[network] and requestedIP >= self.ipMin[network] and (requestedIP not in self.usedIPs)):
 			allocatedIP = requestedIP
+
+		# nic.ip will be updated later in preCreate if chosen
+		# ip not available
 		while (allocatedIP == None):
 			if (self.currentIP[network] > self.ipMax[network] and wrapToMinAlready):
 				raise UserWarning("No available IP addresses for network %d" % (network))
@@ -127,7 +138,7 @@
 		stdin.write("set hardware-type = 00:00:00:01\n") # Ethernet
 		stdin.write("create\n")
 		stdin.close()
-		output = stdout.read()
+		__output = stdout.read()
 		stdout.close()
 
 	def removeDhcp(self, name, ipaddr=None):
@@ -146,7 +157,7 @@
 		stdin.write("open\n")
 		stdin.write("remove\n")
 		stdin.close()
-		output = stdout.read()
+		__output = stdout.read()
 		stdout.close()
 	
 	def addDns(self, name, ip):
@@ -169,15 +180,15 @@
 				stdin.write("update add %s %d IN PTR %s.%s.\n" % (reverseIpStr, self.dnsExpire, name, self.dnsDomain))
 				stdin.write("\n")
 			stdin.close()
-			output = stdout.read()
+			__output = stdout.read()
 			stdout.close()
 		finally:
 			os.kill(child.pid, signal.SIGTERM)
-			(pid, status) = os.waitpid(child.pid, os.WNOHANG)
+			(pid, __status) = os.waitpid(child.pid, os.WNOHANG)
 			while (pid == 0): 
 				time.sleep(0.5)
 				os.kill(child.pid, signal.SIGTERM)
-				(pid, status) = os.waitpid(child.pid, os.WNOHANG)
+				(pid, __status) = os.waitpid(child.pid, os.WNOHANG)
 	
 	def removeDns(self, name):
 		cmd = "nsupdate"
@@ -196,15 +207,15 @@
 			stdin.write("update delete %s.%s A\n" % (name, self.dnsDomain))
 			stdin.write("\n")
 			stdin.close()
-			output = stdout.read()
+			__output = stdout.read()
 			stdout.close()
 		finally:
 			os.kill(child.pid, signal.SIGTERM)
-			(pid, status) = os.waitpid(child.pid, os.WNOHANG)
+			(pid, __status) = os.waitpid(child.pid, os.WNOHANG)
 			while (pid == 0): 
 				time.sleep(0.5)
 				os.kill(child.pid, signal.SIGTERM)
-				(pid, status) = os.waitpid(child.pid, os.WNOHANG)
+				(pid, __status) = os.waitpid(child.pid, os.WNOHANG)
 	
 	def doUpdate(self, instance):
 		newInstance = Instance()
@@ -229,7 +240,7 @@
 					dhcpName = instance.name + "-nic%d" % (i)
 				self.log.info("Adding %s:{%s->%s} to DHCP" % (dhcpName, nic.mac, ip))
 				self.addDhcp(dhcpName, ip, nic.mac)
-			except Exception, e:
+			except Exception:
 				self.log.exception("Failed to add host %s to DHCP/DNS" % (instance.name))
 		self.doUpdate(instance)
 
@@ -242,8 +253,11 @@
 			ip = nic.ip
 			try:
 				ipNum = self.strToIp(ip)
+				# XXXstroucki: if this fails with KeyError,
+				# we must have double-assigned the same IP
+				# address. How does this happen?
 				del self.usedIPs[ipNum]
-			except Exception, e:
+			except Exception:
 				self.log.exception("Failed to remove host %s, ip %s from pool of usedIPs" % (instance.name, ip))
 			try:
 				if (i == 0):
@@ -251,9 +265,13 @@
 				else:
 					dhcpName = instance.name + "-nic%d" % (i)
 				self.removeDhcp(dhcpName)
-			except Exception, e:
+			except Exception:
 				self.log.exception("Failed to remove host %s from DHCP" % (instance.name))
 		try:
+			# XXXstroucki: this can fail if the resolver can't
+			# resolve the dns server name (line 190). Perhaps
+			# the hostname should be then pushed onto a list
+			# to try again next time.
 			self.removeDns(instance.name)
-		except Exception, e:
+		except Exception:
 			self.log.exception("Failed to remove host %s from DNS" % (instance.name))
diff --git a/src/tashi/agents/instancehook.py b/src/tashi/agents/instancehook.py
index 03b5684..cd62b40 100644
--- a/src/tashi/agents/instancehook.py
+++ b/src/tashi/agents/instancehook.py
@@ -1,5 +1,3 @@
-#! /usr/bin/env python
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+# superclass for instance hooks.
+
 class InstanceHook(object):
 	def __init__(self, config, client, post=False):
 		if (self.__class__ is InstanceHook):
diff --git a/src/tashi/agents/locality-server.py b/src/tashi/agents/locality-server.py
deleted file mode 100755
index ac835ed..0000000
--- a/src/tashi/agents/locality-server.py
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/usr/bin/python
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License. 
-
-# this module provides a service to locate servers that are close
-# to a VM. Uses all-pairs shortest path algorithm. Need to provide
-# a topology for the underlying physical infrastructure.
-
-from socket import gethostname
-import os
-import threading
-import time
-import socket
-
-from tashi.services.ttypes import *
-
-from thrift import Thrift
-from thrift.transport import TSocket
-from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
-from thrift.server import TServer
-
-from tashi.services import clustermanagerservice
-from tashi.util import getConfig, createClient
-from tashi.parallel import *
-
-import tashi.services.layoutlocality.localityservice as localityservice
-
-from numpy import *
-from scipy import *
-
-cnames = {}
-def cannonicalName(hn):
-       try:
-               if cnames.has_key(hn):
-                       return cnames[hn]
-               r = socket.gethostbyname_ex(hn)[0]
-               cnames[hn] = r
-               return r
-       except:
-               return hn
-
-# define matrix multiplication that can be used to calculate a min-plus
-# distance product
-def genMul(A, B, add, mult):
-       '''generalized matrix multiplication'''
-       C = zeros((shape(A)[0], shape(B)[1]))
-       for i in range(shape(C)[0]):
-               for j in range(shape(C)[1]):
-                       C[i,j] = add(mult(A[i,:], B[:,j]))
-       return C
-
-def addHost(graph, hostVals, host):
-       if not graph.has_key(host):
-               graph[host] = []
-       if not hostVals.has_key(host):
-               hostVals[host] = len(hostVals)
-
-def graphConnect(graph, h1, h2):
-       if not h1 in graph[h2]:
-               graph[h2].append(h1)
-       if not h2 in graph[h1]:
-               graph[h1].append(h2)
-
-def graphFromFile(fn = 'serverLayout', graph = {}, hostVals = {}):
-       f = open(fn)
-       for line in f.readlines():
-               line = line.split()
-               if len(line) < 1:
-                       continue
-               server = cannonicalName(line[0].strip())
-
-               addHost(graph, hostVals, server)
-               for peer in line[1:]:
-                       peer = cannonicalName(peer.strip())
-                       addHost(graph, hostVals, peer)
-                       graphConnect(graph, server, peer)
-       return graph, hostVals
-
-def graphFromTashi(client, transport, graph={}, hostVals={}):
-       print 'getting graph'
-       if not transport.isOpen():
-               transport.open()
-       hosts = client.getHosts()
-       instances = client.getInstances()
-       for instance in instances:
-               host = [cannonicalName(h.name) for h in hosts if h.id == instance.hostId]
-               if len(host) <1 :
-                       print 'cant find vm host'
-                       continue
-               host = host[0]
-               print 'host is ', host
-               addHost(graph, hostVals, host)
-               print 'added host'
-               vmhost = cannonicalName(instance.name)
-               addHost(graph, hostVals, vmhost)
-               print 'added vm'
-               graphConnect(graph, host, vmhost)
-               print 'connected'
-       print 'returning from graphFromTashi'
-       return graph, hostVals
-
-
-
-def graphToArray(graph, hostVals):
-       a = zeros((len(hostVals), len(hostVals)))
-       for host in graph.keys():
-               if not hostVals.has_key(host):
-                       continue
-               a[hostVals[host], hostVals[host]] = 1
-               for peer in graph[host]:
-                       if not hostVals.has_key(peer):
-                               continue
-                       a[hostVals[host], hostVals[peer]] = 1
-       a[a==0] = inf
-       for i in range(shape(a)[0]):
-               a[i,i]=0
-       return a
-
-def shortestPaths(graphArray):
-       a = graphArray
-       for i in range(math.ceil(math.log(shape(a)[0],2))):
-               a = genMul(a,a,min,plus)
-       return a
-
-def plus(A, B):
-       return A + B
-
-
-def getHopCountMatrix(sourceHosts, destHosts, array, hostVals):
-       a = zeros((len(sourceHosts), len(destHosts)))
-       a[a==0] = inf
-       for i in range(len(sourceHosts)):
-               sh = cannonicalName(sourceHosts[i])
-               shv = None
-               if hostVals.has_key(sh):
-                       shv = hostVals[sh]
-               else:
-                       print 'host not found', sh
-                       continue
-               for j in range(len(destHosts)):
-                       dh = cannonicalName(destHosts[j])
-                       dhv = None
-                       if hostVals.has_key(dh):
-                               dhv = hostVals[dh]
-                       else:
-                               print 'dest not found', dh
-                               continue
-                       print sh, dh, i,j, shv, dhv, array[shv, dhv]
-                       a[i,j] = array[shv, dhv]
-       return a
-
-
-class LocalityService:
-       def __init__(self):
-               (config, configFiles) = getConfig(["Agent"])
-               self.port = int(config.get('LocalityService', 'port'))
-               print 'Locality service on port %i' % self.port
-               self.processor = localityservice.Processor(self)
-               self.transport = TSocket.TServerSocket(self.port)
-               self.tfactory = TTransport.TBufferedTransportFactory()
-               self.pfactory = TBinaryProtocol.TBinaryProtocolFactory()
-               self.server = TServer.TThreadedServer(self.processor,
-                                                     self.transport,
-                                                     self.tfactory,
-                                                     self.pfactory)
-
-               self.hostVals =[]
-               self.array = array([[]])
-               self.rtime = 0
-
-
-               self.fileName = os.path.expanduser(config.get("LocalityService", "staticLayout"))
-               (self.client, self.transport) = createClient(config)
-
-               self.server.serve()
-
-       @synchronizedmethod
-       def refresh(self):
-               if time.time() - self.rtime < 10:
-                       return
-               g, self.hostVals = graphFromFile(self.fileName)
-               try:
-                       g, self.hostVals = graphFromTashi(self.client, self.transport, g, self.hostVals)
-               except e:
-                       print e
-                       print 'could not get instance list from cluster manager'
-               print 'graph to array'
-               a = graphToArray(g, self.hostVals)
-               print 'calling shortest paths ', a.shape
-               self.array = shortestPaths(a)
-               print 'computed shortest paths'
-               print self.array
-               print self.hostVals
-       @synchronizedmethod
-       def getHopCountMatrix(self, sourceHosts, destHosts):
-               self.refresh()
-               print 'getting hop count matrix for', sourceHosts, destHosts
-               hcm =  getHopCountMatrix(sourceHosts, destHosts, self.array, self.hostVals)
-               print hcm
-               return hcm
-
-
-def main():
-
-       #XXXstroucki This code has not been updated for several years.
-       # It may still be useful as an example.
-       import sys
-       sys.exit(0);
-
-       ls = LocalityService()
-
-if __name__ == "__main__":
-       main()
diff --git a/src/tashi/agents/mauipacket.py b/src/tashi/agents/mauipacket.py
deleted file mode 100644
index 5423db5..0000000
--- a/src/tashi/agents/mauipacket.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import subprocess
-import time
-import pseudoDes
-
-class MauiPacket:
-	def __init__(self, key=0):
-		self.size = 0
-		self.char = '\n'
-		self.chksum = '0'*16
-		self.timestamp = int(time.time())
-		self.auth = ''
-		self.data = []
-		self.msg = ''
-		self.key=key
-	def readPacket(self, istream):
-		self.msg = ''
-
-		size = istream.read(8)
-		self.msg = self.msg+size
-		self.size = int(size)
-
-		self.char = istream.read(1)
-		self.msg = self.msg + self.char
-
-		packet = istream.read(self.size)
-		self.msg = self.msg + packet
-
-		packet = packet.split()
-		
-		for i in range(len(packet)):
-			item = packet[i].split('=')
-			if item[0] == 'CK':
-				self.chksum = item[1]
-			if item[0] == 'TS':
-				self.timestamp = int(item[1])
-			if item[0] == 'AUTH':
-				self.auth = item[1]
-			if item[0] == 'DT':
-				self.data = packet[i:]
-				self.data=self.data[0].split('=',1)[1:] + self.data[1:]
-
-	def checksumMessage(self, message, key=None):
-		if key == None:
-			key = self.key
-		if type(key) == type(''):
-			key = int(key)
-		chksum = pseudoDes.generateKey(message, key)
-		chksum = '%016x' % chksum
-		return chksum
-	def getChecksum(self):
-		cs = self.msg.partition('TS=')
-		cs = cs[1]+cs[2]
-		chksum = self.checksumMessage(cs)
-		return chksum
-	def verifyChecksum(self):
-		chksum = self.getChecksum()
-		if chksum != self.chksum:
-			print 'verifyChecksum: "%s"\t"%s"'%(chksum, self.chksum)
-			print 'verifyChecksum (types): %s\t%s' %(type(chksum), type(self.chksum))
-			return False
-		return True
-	def set(self, data, auth=None, key=None, timestamp=None):
-		if timestamp==None:
-			timestamp = int(time.time())
-		self.data = data
-		if auth !=None:
-			self.auth = auth
-		if key != None:
-			self.key = key
-		self.timstamp=timestamp
-		self.fixup()
-	def fixup(self):
-		datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
-		self.chksum = self.checksumMessage(datastring)
-
-		pktstring = 'CK=%s %s'%(self.chksum, datastring)
-		self.size = len(pktstring)
-	def __str__(self):
-		datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
-		self.chksum = self.checksumMessage(datastring)
-
-		pktstring = 'CK=%s %s'%(self.chksum, datastring)
-		self.msg = ''
-		self.msg = self.msg + '%08i'%len(pktstring)
-		self.msg = self.msg + self.char
-		self.msg = self.msg + pktstring
-
-		return self.msg
-	def prettyString(self):
-		s = '''Maui Packet
------------
-size:\t\t%i
-checksum:\t%s
-timestamp:\t%s
-auth:\t\t%s
-data:
-%s
------------'''
-		s = s%(self.size, self.chksum, self.timestamp, self.auth, self.data)
-		return s
diff --git a/src/tashi/agents/mauiwiki.py b/src/tashi/agents/mauiwiki.py
index 1af9b62..fdb3574 100755
--- a/src/tashi/agents/mauiwiki.py
+++ b/src/tashi/agents/mauiwiki.py
@@ -17,20 +17,122 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# XXXstroucki: wiki is a text based resource manager that maui can
+# use. It also seems to have disappeared from the face of the web.
+# This code is unmaintained.
+
+# XXXstroucki former file mauipacket.py
+#import subprocess
 import time
-import hashlib
-import sys
-import subprocess
-import socket, SocketServer
-from socket import gethostname
-import os
+import SocketServer
+from tashi.utils import pseudoDes
+from tashi.rpycservices.rpyctypes import HostState, InstanceState
+
+class MauiPacket:
+	def __init__(self, key=0):
+		self.size = 0
+		self.char = '\n'
+		self.chksum = '0'*16
+		self.timestamp = int(time.time())
+		self.auth = ''
+		self.data = []
+		self.msg = ''
+		self.key=key
+	def readPacket(self, istream):
+		self.msg = ''
+
+		size = istream.read(8)
+		self.msg = self.msg+size
+		self.size = int(size)
+
+		self.char = istream.read(1)
+		self.msg = self.msg + self.char
+
+		packet = istream.read(self.size)
+		self.msg = self.msg + packet
+
+		packet = packet.split()
+		
+		for i in range(len(packet)):
+			item = packet[i].split('=')
+			if item[0] == 'CK':
+				self.chksum = item[1]
+			if item[0] == 'TS':
+				self.timestamp = int(item[1])
+			if item[0] == 'AUTH':
+				self.auth = item[1]
+			if item[0] == 'DT':
+				self.data = packet[i:]
+				self.data=self.data[0].split('=',1)[1:] + self.data[1:]
+
+	def checksumMessage(self, message, key=None):
+		if key == None:
+			key = self.key
+		if type(key) == type(''):
+			key = int(key)
+		chksum = pseudoDes.generateKey(message, key)
+		chksum = '%016x' % chksum
+		return chksum
+	def getChecksum(self):
+		cs = self.msg.partition('TS=')
+		cs = cs[1]+cs[2]
+		chksum = self.checksumMessage(cs)
+		return chksum
+	def verifyChecksum(self):
+		chksum = self.getChecksum()
+		if chksum != self.chksum:
+			print 'verifyChecksum: "%s"\t"%s"'%(chksum, self.chksum)
+			print 'verifyChecksum (types): %s\t%s' %(type(chksum), type(self.chksum))
+			return False
+		return True
+	def set(self, data, auth=None, key=None, timestamp=None):
+		if timestamp==None:
+			timestamp = int(time.time())
+		self.data = data
+		if auth !=None:
+			self.auth = auth
+		if key != None:
+			self.key = key
+		self.timestamp=timestamp
+		self.fixup()
+	def fixup(self):
+		datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
+		self.chksum = self.checksumMessage(datastring)
+
+		pktstring = 'CK=%s %s'%(self.chksum, datastring)
+		self.size = len(pktstring)
+	def __str__(self):
+		datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
+		self.chksum = self.checksumMessage(datastring)
+
+		pktstring = 'CK=%s %s'%(self.chksum, datastring)
+		self.msg = ''
+		self.msg = self.msg + '%08i'%len(pktstring)
+		self.msg = self.msg + self.char
+		self.msg = self.msg + pktstring
+
+		return self.msg
+	def prettyString(self):
+		s = '''Maui Packet
+-----------
+size:\t\t%i
+checksum:\t%s
+timestamp:\t%s
+auth:\t\t%s
+data:
+%s
+-----------'''
+		s = s%(self.size, self.chksum, self.timestamp, self.auth, self.data)
+		return s
+
+# XXXstroucki original file mauiwiki.py
 import threading
 import logging.config
 
 from tashi.parallel import synchronizedmethod
 from tashi.services.ttypes import *
-from tashi.util import getConfig, createClient, instantiateImplementation, boolean
-from tashi.agents.mauipacket import MauiPacket
+from tashi.util import getConfig, createClient, instantiateImplementation
+#from tashi.agents.mauipacket import MauiPacket
 import tashi.util
 
 def jobnameToId(jobname):
@@ -57,24 +159,24 @@
 	def postDestroy(self, inst):
 		for hook in self.hooks:
 			hook.postDestroy(inst)
-	def idToInst(self, id):
+	def idToInst(self, _id):
 		instances = self.client.getInstances()
 		print 'instances ', instances
-		insts = [i for i in instances if str(i.id)==str(id)]
+		insts = [i for i in instances if str(i.id)==str(_id)]
 		if len(insts) == 0:
-			raise "No instance with ID %s"%id
+			raise "No instance with ID %s"%_id
 		if len(insts) > 1:
-			raise "Multiple instances with ID %s"%id
+			raise "Multiple instances with ID %s"%_id
 		inst = insts[0]
 		return inst
-	def destroyById(self, id):
-		inst = self.idToInst(id)
-		self.client.destroyVm(int(id))
+	def destroyById(self, _id):
+		inst = self.idToInst(_id)
+		self.client.destroyVm(int(_id))
 		self.postDestroy(inst)
-	def activateById(self, id, host):
-		inst = self.idToInst(id)
+	def activateById(self, _id, host):
+		inst = self.idToInst(_id)
 		self.preCreate(inst)
-		self.client.activateVm(int(id), host)
+		self.client.activateVm(int(_id), host)
 
 def cmplists(a, b):
 	for i in range(len(a)):
@@ -301,8 +403,8 @@
 					if j.updateTime >= updatetime and j.id in joblist]
 		jl = {}
 		for job in jobs:
-			id = "%s.%i"%(job.name, job.id)
-			jl[id] = {'STATE':self.wikiInstanceState(job),
+			_id = "%s.%i"%(job.name, job.id)
+			jl[_id] = {'STATE':self.wikiInstanceState(job),
 			          'UNAME':self.users[job.userId].name,
 			          'GNAME':self.users[job.userId].name,
 			          'UPDATETIME':int(job.updateTime),
@@ -313,14 +415,14 @@
 			          'RMEM':str(job.memory),
 			          'WCLIMIT':str(self.defaultJobTime)}
 			if job.hostId != None:
-				jl[id]['TASKLIST'] = self.hosts[job.hostId].name
+				jl[_id]['TASKLIST'] = self.hosts[job.hostId].name
 		return jl
 	@synchronizedmethod
-	def activateById(self, id, host):
-		if not self.instances.has_key(id):
+	def activateById(self, _id, host):
+		if not self.instances.has_key(_id):
 			raise "no such instance"
-		self.ihooks.activateById(id, host)
-		self.instances[id].state=InstanceState.Activating
+		self.ihooks.activateById(_id, host)
+		self.instances[_id].state=InstanceState.Activating
 
 class MauiListener(SocketServer.StreamRequestHandler):
 	def setup(self):
diff --git a/src/tashi/agents/primitive.py b/src/tashi/agents/primitive.py
index 99ef702..5014d47 100755
--- a/src/tashi/agents/primitive.py
+++ b/src/tashi/agents/primitive.py
@@ -19,16 +19,18 @@
 
 import time
 import logging.config
+import sys
 
 from tashi.rpycservices.rpyctypes import Errors, HostState, InstanceState, TashiException
 
-from tashi.util import getConfig, createClient, instantiateImplementation, boolean
+from tashi.util import createClient, instantiateImplementation, boolean
+from tashi.utils.config import Config
 import tashi
 
 class Primitive(object):
-	def __init__(self, config, cmclient):
+	def __init__(self, config):
 		self.config = config
-		self.cm = cmclient
+		self.cm = createClient(config)
 		self.hooks = []
 		self.log = logging.getLogger(__file__)
 		self.scheduleDelay = float(self.config.get("Primitive", "scheduleDelay"))
@@ -40,10 +42,10 @@
 			name = name.lower()
 			if (name.startswith("hook")):
 				try:
-					self.hooks.append(instantiateImplementation(value, config, cmclient, False))
+					self.hooks.append(instantiateImplementation(value, config, self.cm, False))
 				except:
 					self.log.exception("Failed to load hook %s" % (value))
-	        self.hosts = {}
+		self.hosts = {}
 		self.load = {}
 		self.instances = {}
 		self.muffle = {}
@@ -62,9 +64,9 @@
 		for h in self.cm.getHosts():
 			#XXXstroucki get all hosts here?
 			#if (h.up == True and h.state == HostState.Normal):
-				hosts[ctr] = h
-				ctr = ctr + 1
-				load[h.id] = []
+			hosts[ctr] = h
+			ctr = ctr + 1
+			load[h.id] = []
 			
 		load[None] = []
 		_instances = self.cm.getInstances()
@@ -75,8 +77,9 @@
 		# XXXstroucki put held machines behind pending ones
 		heldInstances = []
 		for i in instances.itervalues():
+			# Nonrunning VMs will have hostId of None, but
+			# so will Suspended VMs.
 			if (i.hostId or i.state == InstanceState.Pending):
-				# Nonrunning VMs will have hostId of None
 				load[i.hostId] = load[i.hostId] + [i.id]
 			elif (i.hostId is None and i.state == InstanceState.Held):
 				heldInstances = heldInstances + [i.id]
@@ -199,14 +202,17 @@
 							if myDisk == i.disks[0].uri and i.disks[0].persistent == True:
 								count += 1
 						if count > 1:
-								minMaxHost = None
+							minMaxHost = None
 
 			if (minMaxHost):
 				# found a host
 				if (not inst.hints.get("__resume_source", None)):
 					# only run preCreate hooks if newly starting
 					for hook in self.hooks:
-						hook.preCreate(inst)
+						try:
+							hook.preCreate(inst)
+						except:
+							self.log.warning("Failed to run preCreate hook")
 				self.log.info("Scheduling instance %s (%d mem, %d cores, %d uid) on host %s" % (inst.name, inst.memory, inst.cores, inst.userId, minMaxHost.name))	
 				rv = "fail"
 				try:
@@ -241,8 +247,21 @@
 	def start(self):
 		oldInstances = {}
 
+		# XXXstroucki: scheduling races have been observed, where
+		# a vm is scheduled on a host that had not updated its
+		# capacity with the clustermanager, leading to overloaded
+		# hosts. I think the place to insure against this happening
+		# is in the nodemanager. This scheduler will keep an
+		# internal state of cluster loading, but that is best
+		# effort and will be refreshed from CM once the buffer
+		# of vms to be scheduled is exhausted.
+
 		while True:
 			try:
+				# XXXstroucki: to get a list of vms to be
+				# scheduled, it asks the CM for a full
+				# cluster state, and will look at those
+				# without a host.
 				self.__getState()
 				
 				# Check for VMs that have exited and call
@@ -250,7 +269,7 @@
 				for i in oldInstances:
 					# XXXstroucki what about paused and saved VMs?
 					# XXXstroucki: do we need to look at Held VMs here?
-					if (i not in self.instances and (oldInstances[i].state == InstanceState.Running or oldInstances[i].state == InstanceState.Destroying)):
+					if (i not in self.instances and (oldInstances[i].state == InstanceState.Running or oldInstances[i].state == InstanceState.Destroying or oldInstances[i].state == InstanceState.ShuttingDown)):
 						self.log.info("VM exited: %s" % (oldInstances[i].name))
 						for hook in self.hooks:
 							hook.postDestroy(oldInstances[i])
@@ -280,13 +299,22 @@
 			time.sleep(self.scheduleDelay)
 
 def main():
-	(config, configFiles) = getConfig(["Agent"])
+	config = Config(["Agent"])
+	configFiles = config.getFiles()
+
 	publisher = instantiateImplementation(config.get("Agent", "publisher"), config)
 	tashi.publisher = publisher
-	cmclient = createClient(config)
 	logging.config.fileConfig(configFiles)
-	agent = Primitive(config, cmclient)
-	agent.start()
+	agent = Primitive(config)
+
+	try:
+		agent.start()
+	except KeyboardInterrupt:
+		pass
+
+	log = logging.getLogger(__file__)
+	log.info("Primitive exiting")
+	sys.exit(0)
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/agents/primitive_zoni.py b/src/tashi/agents/primitive_zoni.py
index c770e75..34fd0d7 100755
--- a/src/tashi/agents/primitive_zoni.py
+++ b/src/tashi/agents/primitive_zoni.py
@@ -17,6 +17,11 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+# XXXstroucki: this apparently originated from a copy of the primitive
+# scheduler code sometime in 2010. It aims to keep a pool of tashi servers
+# available, and other servers shut down. Could this be better suited for
+# a hook function of the scheduler?
+
 from socket import gethostname
 import os
 import socket
diff --git a/src/tashi/client/client.py b/src/tashi/client/client.py
deleted file mode 100755
index 71b5b20..0000000
--- a/src/tashi/client/client.py
+++ /dev/null
@@ -1,213 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-import inspect
-import os
-import sys
-import types
-from tashi.services.ttypes import *
-from thrift.protocol.TBinaryProtocol import TBinaryProtocol
-from thrift.transport.TTransport import TBufferedTransport
-from thrift.transport.TSocket import TSocket
-
-from tashi.services import clustermanagerservice
-from tashi import vmStates
-
-from tashi.util import getConfig
-
-def makeHTMLTable(list):
-	(stdin_r, stdin_w) = os.pipe()
-
-# XXXpipe: find number of columns in current window
-	pipe = os.popen("tput cols")
-	columns = pipe.read().strip()
-	keys = {}
-	for k in list:
-		for k2 in k.__dict__.keys():
-			if (not k2.endswith("Obj")):
-				keys[k2] = k2
-	if ('id' in keys):
-		del keys['id']
-		keylist = ['id'] + keys.keys()
-	else:
-		keylist = keys.keys()
-	output = "<html>"
-	output = output + "<table>"
-	output = output + "<tr>"
-	for k in keylist:
-		output = output + "<td>%s</td>" % (k)
-	output = output + "</tr>"
-	for k in list:
-		output = output + "<tr>"
-		for k2 in keylist:
-			if (k2 == "state"):
-				output = output + "<td>%s</td>" % (str(vmStates[k.__dict__.get(k2, None)]))
-			else:
-				output = output + "<td>%s</td>" % (str(k.__dict__.get(k2, None)))
-		output = output + "</tr>"
-	output = output + "</table>"
-	output = output + "</html>"
-	pid = os.fork()
-	if (pid == 0):
-		os.close(stdin_w)
-		os.dup2(stdin_r, 0)
-		os.close(stdin_r)
-		os.execl("/usr/bin/lynx", "/usr/bin/lynx", "-width=%s" % (columns), "-dump", "-stdin")
-		sys.exit(-1)
-	os.close(stdin_r)
-	os.write(stdin_w, output)
-	os.close(stdin_w)
-	os.waitpid(pid, 0)
-
-def getFunction(argv):
-	"""Tries to determine the name of the function requested by the user -- may be called multiple times if the binary name is 'client'"""
-	function = "None"
-	if (len(argv) > 0):
-		function = argv[0].strip()
-		if (function.rfind("/") != -1):
-			function = function[function.rfind("/")+1:]
-		if (function.rfind(".") != -1):
-			function = function[:function.rfind(".")]
-	return function
-
-def getFunctionInfo(m):
-	"""Gets a string that describes a function from the interface"""
-	f = getattr(clustermanagerservice.Iface, m)
-	argspec = inspect.getargspec(f)[0][1:]
-	return m + inspect.formatargspec(argspec)
-
-def usage():
-	"""Print program usage"""
-	print "Available methods:"
-	for m in methods:
-		print "\t" + getFunctionInfo(m)
-	print
-	print "Examples:"
-	print "\tgetInstances"
-	print "\taddUser 'User(d={\"username\":\"foobar\"})'"
-	print "\tremoveUser 2"
-	print "\tcreateVM 1 1"
-
-def simpleType(obj):
-	"""Determines whether an object is a simple type -- used as a helper function to pprint"""
-	if (type(obj) is not type([])):
-		if (not getattr(obj, "__dict__", None)):
-			return True
-	return False
-
-def pprint(obj, depth = 0, key = None):
-	"""My own version of pprint that prints out a dict in a readable, but slightly more compact format"""
-	valueManip = lambda x: x
-	if (key):
-		keyString = key + ": "
-		if (key == "state"):
-			valueManip = lambda x: vmStates[x]
-	else:
-		keyString = ""
-	if (type(obj) is type([])):
-		if (reduce(lambda x, y: x and simpleType(y), obj, True)):
-			print (" " * (depth * INDENT)) + keyString + str(obj)
-		else:
-			print (" " * (depth * INDENT)) + keyString + "["
-			for o in obj:
-				pprint(o, depth + 1)
-			print (" " * (depth * INDENT)) + "]"
-	elif (getattr(obj, "__dict__", None)):
-		if (reduce(lambda x, y: x and simpleType(y), obj.__dict__.itervalues(), True)):
-			print (" " * (depth * INDENT)) + keyString + str(obj)
-		else:
-			print (" " * (depth * INDENT)) + keyString + "{"
-			for (k, v) in obj.__dict__.iteritems():
-				pprint(v, depth + 1, k)
-			print (" " * (depth * INDENT)) + "}"
-	else:
-		print (" " * (depth * INDENT)) + keyString + str(valueManip(obj))
-
-def main():
-	"""Main function for the client program"""
-	global INDENT, methods, exitCode
-	exitCode = 0
-	INDENT = (os.getenv("INDENT", 4))
-	methods = filter(lambda x: not x.startswith("__"), clustermanagerservice.Iface.__dict__.keys())
-	function = getFunction(sys.argv)
-	if (function == "client"):
-		function = getFunction(sys.argv[1:])
-	if (function == "--makesyms"):
-		for m in methods:
-			os.symlink(sys.argv[0], m)
-		sys.exit(0)
-	if (function == "--rmsyms"):
-		for m in methods:
-			os.unlink(m)
-		sys.exit(0)
-
-	(config,configFiles) = getConfig(["Client"])
-	cfgHost = config.get('Client', 'clusterManagerHost')
-	cfgPort = config.get('Client', 'clusterManagerPort')
-	cfgTimeout = float(config.get('Client', 'clusterManagerTimeout'))
-	host = os.getenv('TASHI_CM_HOST', cfgHost)
-	port = os.getenv('TASHI_CM_PORT', cfgPort)
-	timeout = float(os.getenv('TASHI_CM_TIMEOUT', cfgTimeout)) * 1000.0
-
-	socket = TSocket(host, int(port))
-	socket.setTimeout(timeout)
-	transport = TBufferedTransport(socket)
-	protocol = TBinaryProtocol(transport)
-	client = clustermanagerservice.Client(protocol)
-	client._transport = transport
-	client._transport.open()
-	f = getattr(client, function, None)
-	if not f:
-		usage()
-		sys.exit(-1)
-	args = map(lambda x: eval(x), sys.argv[1:])
-	try:
-		res = f(*args)
-		def cmp(x, y):
-			try:
-				if (x.id < y.id):
-					return -1
-				elif (y.id < x.id):
-					return 1
-				else:
-					return 0
-			except Exception, e:
-				return 0
-		if (type(res) == types.ListType):
-			res.sort(cmp)
-		if (os.getenv("USE_HTML_TABLES")):
-			try:
-				makeHTMLTable(res)
-			except:
-				pprint(res)
-		else:
-			pprint(res)
-	except TashiException, e:
-		print e.msg
-		exitCode = e.errno
-	except TypeError, e:
-		print e
-		print "\t" + getFunctionInfo(function)
-		exitCode = -1
-	finally:
-		client._transport.close()
-	sys.exit(exitCode)
-
-if __name__ == "__main__":
-	main()
diff --git a/src/tashi/client/tashi-client.py b/src/tashi/client/tashi-client.py
index db24816..8afdb7d 100755
--- a/src/tashi/client/tashi-client.py
+++ b/src/tashi/client/tashi-client.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/python
 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -21,8 +21,10 @@
 import random
 import sys
 import types
-from tashi.rpycservices.rpyctypes import *
-from tashi import vmStates, hostStates, boolean, getConfig, stringPartition, createClient
+from tashi.rpycservices.rpyctypes import NetworkConfiguration,\
+	DiskConfiguration, HostState, Instance, Host, TashiException
+from tashi.utils.config import Config
+from tashi import vmStates, hostStates, boolean, stringPartition, createClient
 
 users = {}
 networks = {}
@@ -50,6 +52,22 @@
 			return users[user].id
 	raise ValueError("Unknown user %s" % (userStr))
 
+def checkHid(host):
+	userId = getUser()
+	hosts = client.getHosts()
+	hostId = None
+	try:
+		hostId = int(host)
+	except:
+		for h in hosts:
+			if (h.name == host):
+				hostId = h.id
+	if (hostId is None):
+		raise ValueError("Unknown host %s" % (str(host)))
+
+	# XXXstroucki permissions for host related stuff?
+	return hostId
+
 def checkIid(instance):
 	userId = getUser()
 	instances = client.getInstances()
@@ -78,10 +96,17 @@
 
 def getDefaultNetwork():
 	fetchNetworks()
-	networkId = 1
+	networkId = 0
 	for network in networks:
+		if (getattr(networks[network], "default", False) is True):
+			networkId = network
+			break
+
+		# Naming the network "default" is deprecated, and
+		# this functionality will be removed soon
 		if (networks[network].name == "default"):
 			networkId = network
+			break
 	return networkId
 
 def randomNetwork():
@@ -93,7 +118,7 @@
 		disks = []
 		for strDisk in strDisks:
 			strDisk = strDisk.strip()
-			(l, s, r) = stringPartition(strDisk, ":")
+			(l, __s, r) = stringPartition(strDisk, ":")
 			if (r == ""):
 				r = "False"
 			r = boolean(r)
@@ -109,12 +134,12 @@
 		nics = []
 		for strNic in strNics:
 			strNic = strNic.strip()
-			(l, s, r) = stringPartition(strNic, ":")
+			(l, __s, r) = stringPartition(strNic, ":")
 			n = l
 			if (n == ''):
 				n = getDefaultNetwork()
 			n = int(n)
-			(l, s, r) = stringPartition(r, ":")
+			(l, __s, r) = stringPartition(r, ":")
 			ip = l
 			if (ip == ''):
 				ip = None
@@ -133,7 +158,7 @@
 		hints = {}
 		for strHint in strHints:
 			strHint = strHint.strip()
-			(l, s, r) = stringPartition(strHint, "=")
+			(l, __s, r) = stringPartition(strHint, "=")
 			hints[l] = r
 		return hints
 	except:
@@ -186,12 +211,26 @@
 		instances.append(client.createVm(instance))
 	return instances
 
+def shutdownMany(basename):
+	return __shutdownOrDestroyMany("shutdown", basename)
+
 def destroyMany(basename):
+	return __shutdownOrDestroyMany("destroy", basename)
+
+def __shutdownOrDestroyMany(method, basename):
 	instances = client.getInstances()
 	count = 0
 	for i in instances:
 		if (i.name.startswith(basename + "-") and i.name[len(basename)+1].isdigit()):
-			client.destroyVm(i.id)
+			if method == "shutdown":
+				client.shutdownVm(i.id)
+
+			elif method == "destroy":
+				client.destroyVm(i.id)
+
+			else:
+				raise ValueError("Unknown method")
+
 			count = count + 1
 	if (count == 0):
 		raise ValueError("That is an unused basename")
@@ -213,6 +252,7 @@
 'copyImage': (None, None), 
 'createVm': (None, ['id', 'hostId', 'name', 'user', 'state', 'disk', 'memory', 'cores']),
 'createMany': (createMany, ['id', 'hostId', 'name', 'user', 'state', 'disk', 'memory', 'cores']),
+'shutdownMany': (shutdownMany, None),
 'destroyMany': (destroyMany, None),
 'getVmLayout': (getVmLayout, ['id', 'name', 'state', 'instances', 'usedMemory', 'memory', 'usedCores', 'cores']),
 'getInstances': (None, ['id', 'hostId', 'name', 'user', 'state', 'disk', 'memory', 'cores']),
@@ -225,6 +265,7 @@
 'createMany': [('userId', int, getUser, False), ('basename', str, lambda: requiredArg('basename'), True), ('cores', int, lambda: 1, False), ('memory', int, lambda: 128, False), ('disks', parseDisks, lambda: requiredArg('disks'), True), ('nics', parseNics, randomNetwork, False), ('hints', parseHints, lambda: {}, False), ('count', int, lambda: requiredArg('count'), True)],
 'shutdownVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
 'destroyVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
+'shutdownMany': [('basename', str, lambda: requiredArg('basename'), True)],
 'destroyMany': [('basename', str, lambda: requiredArg('basename'), True)],
 'suspendVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
 'resumeVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
@@ -235,6 +276,7 @@
 'getImages': [],
 'copyImage': [('src', str, lambda: requiredArg('src'),True), ('dst', str, lambda: requiredArg('dst'), True)],
 'getHosts': [],
+'setHostState': [('host', checkHid, lambda: requiredArg('host'), True), ('state', str, lambda: requiredArg('state'), True)],
 'getUsers': [],
 'getNetworks': [],
 'getInstances': [],
@@ -250,6 +292,7 @@
 'createMany': '[Instance(d={"userId":userId,"name":basename,"cores":cores,"memory":memory,"disks":disks,"nics":nics,"hints":hints}), count]',
 'shutdownVm': '[instance]',
 'destroyVm': '[instance]',
+'shutdownMany': '[basename]',
 'destroyMany': '[basename]',
 'suspendVm': '[instance]',
 'resumeVm': '[instance]',
@@ -260,6 +303,7 @@
 'unregisterHost' : '[hostId]',
 'getSlots' : '[cores, memory]',
 'copyImage' : '[src, dst]',
+'setHostState' : '[host, state]',
 }
 
 # Descriptions
@@ -268,6 +312,7 @@
 'createMany': 'Utility function that creates many VMs with the same set of parameters',
 'shutdownVm': 'Attempts to shutdown a VM nicely',
 'destroyVm': 'Immediately destroys a VM -- it is the same as unplugging a physical machine and should be used for non-persistent VMs or when all else fails',
+'shutdownMany': 'Attempts to gracefully shut down a group of VMs created with createMany',
 'destroyMany': 'Destroys a group of VMs created with createMany',
 'suspendVm': 'Suspends a running VM to disk',
 'resumeVm': 'Resumes a suspended VM from disk',
@@ -276,6 +321,7 @@
 'unpauseVm': 'Unpauses a paused VM',
 'getSlots': 'Get a count of how many VMs could be started in the cluster',
 'getHosts': 'Gets a list of hosts running Node Managers',
+'setHostState': 'Set the state of a host, eg. Normal or Drained',
 'getUsers': 'Gets a list of users',
 'getNetworks': 'Gets a list of available networks for VMs to be placed on',
 'getInstances': 'Gets a list of all VMs in the cluster',
@@ -293,6 +339,7 @@
 'createMany': ['--basename foobar --disks i386-hardy.qcow2 --count 4'],
 'shutdownVm': ['--instance 12345', '--instance foobar'],
 'destroyVm': ['--instance 12345', '--instance foobar'],
+'shutdownMany': ['--basename foobar'],
 'destroyMany': ['--basename foobar'],
 'suspendVm': ['--instance 12345', '--instance foobar'],
 'resumeVm': ['--instance 12345', '--instance foobar'],
@@ -301,6 +348,7 @@
 'unpauseVm': ['--instance 12345', '--instance foobar'],
 'getSlots': ['--cores 1 --memory 128'],
 'getHosts': [''],
+'setHostState': ['--host fnord --state Drained'],
 'getUsers': [''],
 'getNetworks': [''],
 'getInstances': [''],
@@ -321,7 +369,8 @@
 			print "Unknown function %s" % (func)
 			print
 		functions = argLists
-		print "%s is the client program for Tashi, a system for cloud-computing on BigData." % (os.path.basename(sys.argv[0]))
+		print "%s is the client program for Tashi" % (os.path.basename(sys.argv[0]))
+		print "Tashi, a system for cloud-computing on BigData"
 		print "Visit http://incubator.apache.org/tashi/ for more information."
 		print
 	else:
@@ -373,9 +422,9 @@
 		except:
 			obj.state = 'Unknown'
 
-def genKeys(list):
+def genKeys(_list):
 	keys = {}
-	for row in list:
+	for row in _list:
 		for item in row.__dict__.keys():
 			keys[item] = item
 	if ('id' in keys):
@@ -385,25 +434,25 @@
 		keys = keys.values()
 	return keys
 
-def makeTable(list, keys=None):
-	(consoleWidth, consoleHeight) = (9999, 9999)
+def makeTable(_list, keys=None):
+	(consoleWidth, __consoleHeight) = (9999, 9999)
 	try:
 # XXXpipe: get number of rows and column on current window
 		stdout = os.popen("stty size")
-		r = stdout.read()
+		__r = stdout.read()
 		stdout.close()
 	except:
 		pass
-	for obj in list:
+	for obj in _list:
 		transformState(obj)
 	if (keys == None):
-		keys = genKeys(list)
+		keys = genKeys(_list)
 	for (show, k) in show_hide:
 		if (show):
 			if (k != "all"):
 				keys.append(k)
 			else:
-				keys = genKeys(list)
+				keys = genKeys(_list)
 		else:
 			if (k in keys):
 				keys.remove(k)
@@ -412,7 +461,7 @@
 	maxWidth = {}
 	for k in keys:
 		maxWidth[k] = len(k)
-	for row in list:
+	for row in _list:
 		for k in keys:
 			if (k in row.__dict__):
 				maxWidth[k] = max(maxWidth[k], len(str(row.__dict__[k])))
@@ -445,8 +494,8 @@
 			return 1
 		else:
 			return 0
-	list.sort(cmp=sortFunction)
-	for row in list:
+	_list.sort(cmp=sortFunction)
+	for row in _list:
 		line = ""
 		for k in keys:
 			row.__dict__[k] = row.__dict__.get(k, "")
@@ -507,11 +556,12 @@
 	"""Main function for the client program"""
 	global INDENT, exitCode, client
 	exitCode = 0
+	exception = None
 	INDENT = (os.getenv("INDENT", 4))
 	if (len(sys.argv) < 2):
 		usage()
 	function = matchFunction(sys.argv[1])
-	(config, configFiles) = getConfig(["Client"])
+	config = Config(["Client"])
 
 	# build a structure of possible arguments
 	possibleArgs = {}
@@ -551,30 +601,54 @@
 			if (arg.startswith("--")):
 				if (arg[2:] in possibleArgs):
 					(parg, conv, default, required) = possibleArgs[arg[2:]]
-					val = conv(args.pop(0))
+					try:
+						val = None
+						lookahead = args[0]
+						if not lookahead.startswith("--"):
+							val = args.pop(0)
+					except:
+						pass
+
+					val = conv(val)
 					if (val == None):
 						val = default()
 
 					vals[parg] = val
 					continue
+			# somewhat lame, but i don't want to rewrite the fn at this time
+			exception = ValueError("Unknown argument %s" % (arg)) 
 
-			raise ValueError("Unknown argument %s" % (arg)) 
-
-		
-		f = getattr(client, function, None)
+		f = None
+		try:
+			f = extraViews[function][0]
+		except:
+			pass
 
 		if (f is None):
-			f = extraViews[function][0]
-		if (function in convertArgs):
-			fargs = eval(convertArgs[function], globals(), vals)
-		else:
-			fargs = []
-		res = f(*fargs)
+			f = getattr(client, function, None)
+
+		try:
+			if exception is not None:
+				raise exception
+
+			if (function in convertArgs):
+				fargs = eval(convertArgs[function], globals(), vals)
+			else:
+				fargs = []
+
+			res = f(*fargs)
+		except Exception, e:
+			print "Failed in calling %s: %s" % (function, e)
+			print "Please run tashi-client --examples for syntax information"
+			sys.exit(-1)
+
 		if (res != None):
 			keys = extraViews.get(function, (None, None))[1]
 			try:
 				if (type(res) == types.ListType):
 					makeTable(res, keys)
+				elif (type(res) == types.StringType):
+					print res
 				else:
 					makeTable([res], keys)
 					
diff --git a/src/tashi/client/test.py b/src/tashi/client/test.py
deleted file mode 100644
index a53eefa..0000000
--- a/src/tashi/client/test.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-import unittest
-import logging
-import sys
-import signal
-import os.path
-import copy
-import time
-import random
-from ConfigParser import ConfigParser
-
-from tashi.services.ttypes import *
-from thrift.transport.TSocket import TSocket
-from thrift.protocol.TBinaryProtocol import TBinaryProtocol
-from thrift.transport.TTransport import TBufferedTransport
-
-from tashi.services import clustermanagerservice
-from tashi.messaging.threadpool import synchronized
-from tashi.messaging.tashimessaging import TestTashiSubscriber
-
-from tashi.util import getConfig
-
-import tashi.client.client
-
-class ClientConnection(object):
-	'''Creates an rpc proxy'''
-	def __init__(self, host, port):
-		self.host = host
-		self.port = port
-		self.transport = TBufferedTransport(TSocket(host, int(port)))
-		self.protocol = TBinaryProtocol(self.transport)
-		self.client = clustermanagerservice.Client(self.protocol)
-		self.client._transport = self.transport
-		self.client._transport.open()
-	def __del__(self):
-		self.client._transport.close()
-
-def incrementor(init=0):
-	while 1:
-		yield init
-		init = init + 1
-
-# FIXME: don't duplicate code from clustermanager
-# def getConfig(args):
-#	 config = ConfigParser()
-#	 configFiles = [
-#		'/usr/share/tashi/ClusterManagerDefaults.cfg',
-#		'/etc/tashi/ClusterManager.cfg',
-#		os.path.expanduser('~/.tashi/ClusterManager.cfg')
-#		] + ([args[0]] if len(args) > 0 else [])
-
-#	 configFiles = config.read(configFiles)
-#	 if len(configFiles) == 0:
-#		 print >>sys.stderr, 'Unable to find the configuration file\n'
-#		 sys.exit(3)
-
-#	 return config
-
-
-class TestClient(unittest.TestCase):
-	@synchronized()
-	def getPortNum(self):
-		return self.portnum.next()
-
-	"""macro test cases for single-host tests
-
-	Assumes cwd is 'src/tashi/client/'
-	"""
-	def setUp(self):
-		"""Create a CM and single NM on local host"""
-		logging.info('setting up test')
-		
-		(self.config, self.configfiles) = getConfig([])
-
-		self.port = 1717		# FIXME: take this (and other things) from config file
-		self.portnum = incrementor(self.port)
-
-		self.cwd = os.getcwd()
-		self.srcd = os.path.dirname(os.path.dirname(self.cwd))
-		
-		self.environ = copy.copy(os.environ)
-		self.environ['PYTHONPATH'] = self.srcd
-		logging.info('base path = %s' % self.srcd)
-
-		self.nm = os.spawnlpe(os.P_NOWAIT, 'python', 'python', 
-							  os.path.join(self.srcd, 'tashi', 'nodemanager', 'nodemanager.py'),
-							  self.environ)
-		self.cm = os.spawnlpe(os.P_WAIT, 'python', 'python',
-							 os.path.join(self.srcd,  'tashi', 'clustermanager', 'clustermanager.py'),
-							 '--drop', '--create',
-							 os.path.expanduser('~/.tashi/ClusterManager.cfg'),
-							 self.environ)
-		self.cm = os.spawnlpe(os.P_NOWAIT, 'python', 'python',
-							 os.path.join(self.srcd,  'tashi', 'clustermanager', 'clustermanager.py'),
-							 os.path.expanduser('~/.tashi/ClusterManager.cfg'),
-							 self.environ)
-		# since we are spawning with P_NOWAIT, we need to sleep to ensure that the CM is listening
-		time.sleep(1)
-		try:
-			self.connection = ClientConnection('localhost', self.config.get('ClusterManagerService', 'port'))
-		except Exception, e:
-			logging.warning('client connection failed')
-			ex = None
-			try:
-				logging.warning("setUp killing node manager " + str(self.nm))
-				os.kill(self.nm, signal.SIGKILL)
-			except Exception, e:
-				ex = e
-				logging.warning('could not kill node manager: '+ str(e))
-			try:
-				logging.warning('setUp killing cluster manager ' + str(self.cm))
-				os.kill(self.cm, signal.SIGKILL)
-			except Exception, e:
-				ex = e
-				logging.warning('could not kill cluster manager: ' + str(e))
-			if e != None:
-				raise e
-
-		logging.info('node manager PID: %i' % self.nm)
-	def tearDown(self):
-		'''Kill the CM and NM that were created by setUP'''
-		logging.info('tearing down test')
-		ex = None
-		try:
-			logging.debug("killing cluster manager " + str(self.cm))
-			os.kill(self.cm, signal.SIGKILL)
-		except Exception, e:
-			ex = e
-			logging.error('Could not kill cluster manager: ' + str(e))
-			
-		try:
-			logging.debug("killing node manager " + str(self.nm))
-			os.kill(self.nm, signal.SIGKILL)
-		except Exception, e:
-			ex = e
-			logging.error('Could not kill node manager: ' + str(e))
-		if ex != None:
-			raise ex
-	def testSetup(self):
-		'''empty test to ensure that setUp code works'''
-		logging.info('setting up')
-	def testHostManagement(self):
-		'''test adding/removing/listing hosts
-
-		Right now this just adds a single host: localhost.  Eventually
-		it should 1) take a list of hosts from a test configuration
-		file, 2) ensure that all were added, 3) remove a random
-		subset, 4) ensure that they were correctly removed, 5) remove
-		all, 6) ensure that they were correctly removed.'''
-
-		# get empty host list
-		hosts = self.connection.client.getHosts()
-		self.assertEqual(hosts, [], 'starting host list not empty: ' + str(hosts) )
-
-		# add a host
-		host = Host()
-		host.hostname = 'localhost'
-		host.enabled=True
-		self.connection.client.addHost(host)
-		hosts = self.connection.client.getHosts()
-		self.assertEqual(len(hosts), 1, 'wrong number of hosts %i, should be %i' % (len(hosts), 1) )
-		self.assertEqual(hosts[0].hostname, 'localhost', 'wrong hostname: ' + str(hosts[0].hostname) )
-
-		# remove first host
-		hid = hosts[0].id
-		self.connection.client.removeHost(hid)
-		hosts = self.connection.client.getHosts()
-		self.assertEqual(hosts, [], 'host list not empty after remove: ' + str(hosts) )
-
-	def testMessaging(self):
-		'''test messaging system started by CM
-
-		tests messages published directly, through events in the CM,
-		and the log system'''
-		# FIXME: add tests for generating events as a side-effect of
-		# rpc commands, as well as logging in the CM
-		portnum = self.getPortNum()
-		self.sub = TestTashiSubscriber(self.config, portnum)
-		self.assertEqual(self.sub.messageQueue.qsize(), 0)
-		self.pub = tashi.messaging.thriftmessaging.PublisherThrift(self.config.get('MessageBroker', 'host'),
-																   int(self.config.get('MessageBroker', 'port')))
-		self.pub.publish({'message-type':'text', 'message':'Hello World!'})
-		time.sleep(0.5)
-		print '*** QSIZE', self.sub.messageQueue.qsize()
-		self.assertEqual(self.sub.messageQueue.qsize(), 1)
-
-		self.log = logging.getLogger(__name__)
-		messageHandler = tashi.messaging.tashimessaging.TashiLogHandler(self.config)
-		self.log.addHandler(messageHandler)
-		# FIXME: why can't we log messages with severity below 'warning'?
-		self.log.warning('test log message')
-		time.sleep(0.5)
-		self.assertEqual(self.sub.messageQueue.qsize(), 2)
-
-		# This should generate at least one log message
-#		 hosts = self.connection.client.getHosts()
-#		 time.sleep(0.5)
-#		 if (self.sub.messageQueue.qsize() <= 2):
-#			 self.fail()
-
-	def testUserManagement(self):
-		'''test adding/removing/listing users
-
-		same as testHostManagement, but with users'''
-		usernames = ['sleepy', 'sneezy', 'dopey', 'doc',
-					 'grumpy', 'bashful', 'happy']
-		# add all users
-		for un in usernames:
-			user = User()
-			user.username = un
-			self.connection.client.addUser(user)
-		# ensure that all were added
-		users = self.connection.client.getUsers()
-		self.assertEqual(len(usernames), len(users))
-		for user in users:
-			usernames.remove(user.username)
-		self.assertEqual(0, len(usernames))
-		# remove a random subset
-		rm = random.sample(users, 4)
-		for user in rm:
-			self.connection.client.removeUser(user.id)
-			users.remove(user)
-		newUsers = self.connection.client.getUsers()
-		# This ensures that the remaining ones are what we expect:
-		for user in newUsers:
-			# if there is a user remaining that we asked to be removed,
-			# this will throw an exception
-			users.remove(user)
-		# if a user was removed that we did not intend, this will
-		# throw an exception
-		self.assertEqual(0, len(users))
-
-#	 def testInstanceConfigurationManagement(self):
-#		 '''test adding/removing/listing instance configurations
-
-#		 same as testHostManagement, but with instance configurations'''
-#		 self.fail('test not implemented')
-	def testHardDiskConfigurationManagement(self):
-		'''test adding/removing/listing hard disk configurations
-
-		same as testHostManagement, but with hard disk configurations'''
-
-		user = User(d={'username':'sleepy'})
-		self.connection.client.addUser(user)
-		users = self.connection.client.getUsers()
-
-		per = PersistentImage()
-		per.userId = users[0].id
-		per.name = 'sleepy-PersistentImage'
-		self.connection.client.addPersistentImage(per)
-		pers = self.connection.client.getPersistentImages()
-
-		inst = InstanceConfiguration()
-		inst.name = 'sleepy-inst'
-		inst.memory = 512
-		inst.cores = 1
-		self.connection.client.addInstanceConfiguration(inst)
-		insts = self.connection.client.getInstanceConfigurations()
-
-		hdc = HardDiskConfiguration()
-		hdc.index = 0
-		hdc.persistentImageId = pers[0].id
-		hdc.persistent = False
-		hdc.instanceConfigurationId = insts[0].id
-
-#	 def testCreateDestroyShutdown(self):
-#		 '''test creating/destroying/shutting down VMs
-
-#		 not implemented'''
-#		 self.fail('test not implemented')
-#	 def testSuspendResume(self):
-#		 '''test suspending/resuming VMs
-
-#		 not implemented'''
-#		 self.fail('test not implemented')
-#	 def testMigrate(self):
-#		 '''test migration
-
-#		 not implemented'''
-#		 self.fail('test not implemented')
-#	 def testPauseUnpause(self):
-#		 '''test pausing/unpausing VMs
-
-#		 not implemented'''
-#		 self.fail('test not implemented')
-
-
-##############################
-# Test Code
-##############################
-if __name__ == '__main__':
-	logging.basicConfig(level=logging.NOTSET,
-						format="%(asctime)s %(levelname)s:\t %(message)s",
-						stream=sys.stdout)
-
-	suite = unittest.TestLoader().loadTestsFromTestCase(TestClient)
-	unittest.TextTestRunner(verbosity=2).run(suite)
-
diff --git a/src/tashi/clustermanager/clustermanager.py b/src/tashi/clustermanager/clustermanager.py
index db61194..83131cf 100755
--- a/src/tashi/clustermanager/clustermanager.py
+++ b/src/tashi/clustermanager/clustermanager.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python
 
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -17,11 +17,12 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+import os
 import sys
-import signal
 import logging.config
 
-from tashi.util import signalHandler, boolean, instantiateImplementation, getConfig, debugConsole
+from tashi.util import boolean, instantiateImplementation, debugConsole
+from tashi.utils.config import Config
 import tashi
 
 from tashi.rpycservices import rpycservices
@@ -46,6 +47,9 @@
 		users[config.get('AllowedUsers', 'nodeManagerUser')] = config.get('AllowedUsers', 'nodeManagerPassword')
 		users[config.get('AllowedUsers', 'agentUser')] = config.get('AllowedUsers', 'agentPassword')
 		authenticator = TlsliteVdbAuthenticator.from_dict(users)
+
+		# XXXstroucki ThreadedServer is liable to have exceptions
+		# occur within if an endpoint is lost.
 		t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('ClusterManagerService', 'port')), auto_register=False, authenticator=authenticator)
 	else:
 		t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('ClusterManagerService', 'port')), auto_register=False)
@@ -54,24 +58,17 @@
 	t.service._type = 'ClusterManagerService'
 
 	debugConsole(globals())
-	
-	try:
-		t.start()
-	except KeyboardInterrupt:
-		handleSIGTERM(signal.SIGTERM, None)
 
-@signalHandler(signal.SIGTERM)
-def handleSIGTERM(signalNumber, stackFrame):
-	global log
+	t.start()
+	# shouldn't exit by itself
+	return
 
-	log.info('Exiting cluster manager after receiving a SIGINT signal')
-	sys.exit(0)
-	
 def main():
 	global log
 	
 	# setup configuration and logging
-	(config, configFiles) = getConfig(["ClusterManager"])
+	config = Config(["ClusterManager"])
+	configFiles = config.getFiles()
 	publisher = instantiateImplementation(config.get("ClusterManager", "publisher"), config)
 	tashi.publisher = publisher
 	logging.config.fileConfig(configFiles)
@@ -80,7 +77,32 @@
 	
 	# bind the database
 	log.info('Starting cluster manager')
-	startClusterManager(config)
+
+	# handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+	child = os.fork()
+
+	if child == 0:
+		startClusterManager(config)
+		# shouldn't exit by itself
+		sys.exit(0)
+
+	else:
+		# main
+		try:
+			os.waitpid(child, 0)
+		except KeyboardInterrupt:
+			log.info("Exiting cluster manager after receiving a SIGINT signal")
+			os._exit(0)
+		except Exception:
+			log.exception("Abnormal termination of cluster manager")
+			os._exit(-1)
+
+		log.info("Exiting cluster manager after service thread exited")
+		os._exit(-1)
+
+	return
+
+
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/clustermanager/clustermanagerservice.py b/src/tashi/clustermanager/clustermanagerservice.py
index 284ffcb..fd56db3 100644
--- a/src/tashi/clustermanager/clustermanagerservice.py
+++ b/src/tashi/clustermanager/clustermanagerservice.py
@@ -19,9 +19,8 @@
 import threading
 import time
 
-from tashi.rpycservices import rpycservices             
-from tashi.rpycservices.rpyctypes import Errors, InstanceState, HostState, TashiException
-from tashi import boolean, ConnectionManager, vmStates, version, scrubString
+from tashi.rpycservices.rpyctypes import Errors, InstanceState, Instance, HostState, TashiException
+from tashi import boolean, ConnectionManager, vmStates, hostStates, version, scrubString
 
 class ClusterManagerService(object):
 	"""RPC service for the ClusterManager"""
@@ -36,7 +35,7 @@
 		else:
 			self.username = None
 			self.password = None
-		self.proxy = ConnectionManager(self.username, self.password, int(self.config.get('ClusterManager', 'nodeManagerPort')))
+		self.proxy = ConnectionManager(self.username, self.password, int(self.config.get('ClusterManager', 'nodeManagerPort')), authAndEncrypt=self.authAndEncrypt)
 		self.dfs = dfs
 		self.convertExceptions = boolean(config.get('ClusterManagerService', 'convertExceptions'))
 		self.log = logging.getLogger(__name__)
@@ -49,6 +48,9 @@
 		self.allowMismatchedVersions = boolean(self.config.get('ClusterManagerService', 'allowMismatchedVersions'))
 		self.maxMemory = int(self.config.get('ClusterManagerService', 'maxMemory'))
 		self.maxCores = int(self.config.get('ClusterManagerService', 'maxCores'))
+
+		self.defaultNetwork = self.config.getint('ClusterManagerService', 'defaultNetwork', 0)
+
 		self.allowDuplicateNames = boolean(self.config.get('ClusterManagerService', 'allowDuplicateNames'))
 
 		self.accountingHost = None
@@ -62,7 +64,7 @@
 		self.__initAccounting()
 		self.__initCluster()
 
-		threading.Thread(target=self.__monitorCluster).start()
+		threading.Thread(name="monitorCluster", target=self.__monitorCluster).start()
 
 	def __initAccounting(self):
 		self.accountBuffer = []
@@ -71,7 +73,7 @@
 		try:
 			if (self.accountingHost is not None) and \
 				    (self.accountingPort is not None):
-				self.accountingClient=rpycservices.client(self.accountingHost, self.accountingPort)
+				self.accountingClient = ConnectionManager(self.username, self.password, self.accountingPort)[self.accountingHost]
 		except:
 			self.log.exception("Could not init accounting")
 
@@ -126,7 +128,7 @@
 			except:
 				self.log.exception("Invalid host data")
 
-                secondary = ','.join(filter(None, (hostText, instanceText)))
+		secondary = ','.join(filter(None, (hostText, instanceText)))
 
 		line = "%s|%s|%s" % (now, text, secondary)
 
@@ -232,7 +234,7 @@
 				# get a list of VMs running on host
 				try:
 					hostProxy = self.proxy[host.name]
-					remoteInstances = [hostProxy.getVmInfo(vmId) for vmId in hostProxy.listVms()]
+					remoteInstances = [self.__getVmInfo(host.name, vmId) for vmId in hostProxy.listVms()]
 				except:
 					self.log.warning('Failure getting instances from host %s' % (host.name))
 					self.data.releaseHost(host)
@@ -241,6 +243,9 @@
 				# register instances I don't know about
 				for instance in remoteInstances:
 					if (instance.id not in myInstances):
+						if instance.state == InstanceState.Exited:
+							self.log.warning("%s telling me about exited instance %s, ignoring." % (host.name, instance.id))
+							continue
 						instance.hostId = host.id
 						instance = self.data.registerInstance(instance)
 						self.data.releaseInstance(instance)
@@ -269,18 +274,22 @@
 		
 		# iterate through all VMs I believe are active
 		for instanceId in self.instanceLastContactTime.keys():
-			# Don't query non-running VMs. eg. if a VM
-			# is suspended, and has no host, then there's
-			# no one to ask
-			if instance.state != InstanceState.Running and \
-			   instance.state != InstanceState.Activating and \
-			   instance.state != InstanceState.Orphaned:
-				continue
 
 			# XXXstroucki should lock instance here?
-			if (self.instanceLastContactTime[instanceId] < (self.__now() - self.allowDecayed)):
+			try:
+				lastContactTime = self.instanceLastContactTime[instanceId]
+			except KeyError:
+				continue
+
+			if (lastContactTime < (self.__now() - self.allowDecayed)):
 				try:
 					instance = self.data.acquireInstance(instanceId)
+					# Don't query non-running VMs. eg. if a VM
+					# is suspended, and has no host, then there's
+					# no one to ask
+					if instance.state not in [InstanceState.Running, InstanceState.Activating, InstanceState.Orphaned]:
+						self.data.releaseInstance(instance)
+						continue
 				except:
 					continue
 
@@ -293,22 +302,34 @@
 
 				# get updated state on VM
 				try:
-					hostProxy = self.proxy[host.name]
-					newInstance = hostProxy.getVmInfo(instance.vmId)
+					newInstance = self.__getVmInfo(host.name, instance.vmId)
 				except:
 					self.log.warning('Failure getting data for instance %s from host %s' % (instance.name, host.name))
 					self.data.releaseInstance(instance)
 					continue
 
-				# replace existing state with new state
-				# XXXstroucki more?
-				instance.state = newInstance.state
-				self.instanceLastContactTime[instanceId] = self.__now()
-				instance.decayed = False
-				self.data.releaseInstance(instance)
+				# update the information we have on the vm
+				#before = instance.state
+				rv = self.__vmUpdate(instance, newInstance, None)
+				if (rv == "release"):
+					self.data.releaseInstance(instance)
+
+				if (rv == "remove"):
+					self.data.removeInstance(instance)
 
 
-	def normalize(self, instance):
+	def __getVmInfo(self, host, vmid):
+		hostProxy = self.proxy[host]
+		rv = hostProxy.getVmInfo(vmid)
+		if isinstance(rv, Exception):
+			raise rv
+
+		if not isinstance(rv, Instance):
+			raise ValueError
+
+		return rv
+
+	def __normalize(self, instance):
 		instance.id = None
 		instance.vmId = None
 		instance.hostId = None
@@ -336,18 +357,20 @@
 				del instance.hints[hint]
 		return instance
 	
+	# extern
 	def createVm(self, instance):
 		"""Function to add a VM to the list of pending VMs"""
 		# XXXstroucki: check for exception here
-		instance = self.normalize(instance)
+		instance = self.__normalize(instance)
 		instance = self.data.registerInstance(instance)
 		self.data.releaseInstance(instance)
 		self.__ACCOUNT("CM VM REQUEST", instance=instance)
 		return instance
-	
+
+	# extern
 	def shutdownVm(self, instanceId):
 		instance = self.data.acquireInstance(instanceId)
-		self.__stateTransition(instance, InstanceState.Running, InstanceState.ShuttingDown)
+		self.__stateTransition(instance, None, InstanceState.ShuttingDown)
 		self.data.releaseInstance(instance)
 		self.__ACCOUNT("CM VM SHUTDOWN", instance=instance)
 		hostname = self.data.getHost(instance.hostId).name
@@ -357,7 +380,8 @@
 			self.log.exception('shutdownVm failed for host %s vmId %d' % (instance.name, instance.vmId))
 			raise
 		return
-	
+
+	# extern
 	def destroyVm(self, instanceId):
 		instance = self.data.acquireInstance(instanceId)
 		if (instance.state is InstanceState.Pending or instance.state is InstanceState.Held):
@@ -365,7 +389,7 @@
 			self.data.removeInstance(instance)
 		elif (instance.state is InstanceState.Activating):
 			self.__ACCOUNT("CM VM DESTROY STARTING", instance=instance)
-			self.__stateTransition(instance, InstanceState.Activating, InstanceState.Destroying)
+			self.__stateTransition(instance, None, InstanceState.Destroying)
 			self.data.releaseInstance(instance)
 		else:
 			# XXXstroucki: This is a problem with keeping
@@ -381,15 +405,21 @@
 						self.proxy[hostname].destroyVm(instance.vmId)
 						self.data.releaseInstance(instance)
 				except:
-					self.log.exception('destroyVm failed on host %s vmId %s' % (hostname, str(instance.vmId)))
+					self.log.warning('destroyVm failed on host %s vmId %s' % (hostname, str(instance.vmId)))
 					self.data.removeInstance(instance)
 
 
 		return
 	
+	# extern
 	def suspendVm(self, instanceId):
 		instance = self.data.acquireInstance(instanceId)
-		self.__stateTransition(instance, InstanceState.Running, InstanceState.Suspending)
+		try:
+			self.__stateTransition(instance, InstanceState.Running, InstanceState.Suspending)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		self.data.releaseInstance(instance)
 		self.__ACCOUNT("CM VM SUSPEND", instance=instance)
 		hostname = self.data.getHost(instance.hostId).name
@@ -401,15 +431,22 @@
 			raise TashiException(d={'errno':Errors.UnableToSuspend, 'msg':'Failed to suspend %s' % (instance.name)})
 		return
 	
+	# extern
 	def resumeVm(self, instanceId):
 		instance = self.data.acquireInstance(instanceId)
-		self.__stateTransition(instance, InstanceState.Suspended, InstanceState.Pending)
+		try:
+			self.__stateTransition(instance, InstanceState.Suspended, InstanceState.Pending)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		source = "suspend/%d_%s" % (instance.id, instance.name)
 		instance.hints['__resume_source'] = source
 		self.data.releaseInstance(instance)
 		self.__ACCOUNT("CM VM RESUME", instance=instance)
 		return instance
 	
+	# extern
 	def migrateVm(self, instanceId, targetHostId):
 		instance = self.data.acquireInstance(instanceId)
 		self.__ACCOUNT("CM VM MIGRATE", instance=instance)
@@ -421,7 +458,13 @@
 		except:
 			self.data.releaseInstance(instance)
 			raise
-		self.__stateTransition(instance, InstanceState.Running, InstanceState.MigratePrep)
+
+		try:
+			self.__stateTransition(instance, InstanceState.Running, InstanceState.MigratePrep)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		self.data.releaseInstance(instance)
 		try:
 			# Prepare the target
@@ -433,7 +476,12 @@
 			self.log.exception('prepReceiveVm failed')
 			raise
 		instance = self.data.acquireInstance(instance.id)
-		self.__stateTransition(instance, InstanceState.MigratePrep, InstanceState.MigrateTrans)
+		try:
+			self.__stateTransition(instance, InstanceState.MigratePrep, InstanceState.MigrateTrans)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		self.data.releaseInstance(instance)
 		try:
 			# Send the VM
@@ -449,15 +497,23 @@
 
 		try:
 			# Notify the target
-			vmId = self.proxy[targetHost.name].receiveVm(instance, cookie)
+			__vmid = self.proxy[targetHost.name].receiveVm(instance, cookie)
 		except Exception:
 			self.log.exception('receiveVm failed')
 			raise
+
+		self.log.info("migrateVM finished")
 		return
-	
+
+	# extern
 	def pauseVm(self, instanceId):
 		instance = self.data.acquireInstance(instanceId)
-		self.__stateTransition(instance, InstanceState.Running, InstanceState.Pausing)
+		try:
+			self.__stateTransition(instance, InstanceState.Running, InstanceState.Pausing)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		self.data.releaseInstance(instance)
 		self.__ACCOUNT("CM VM PAUSE", instance=instance)
 		hostname = self.data.getHost(instance.hostId).name
@@ -467,13 +523,24 @@
 			self.log.exception('pauseVm failed on host %s with vmId %d' % (hostname, instance.vmId))
 			raise
 		instance = self.data.acquireInstance(instanceId)
-		self.__stateTransition(instance, InstanceState.Pausing, InstanceState.Paused)
+		try:
+			self.__stateTransition(instance, InstanceState.Pausing, InstanceState.Paused)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		self.data.releaseInstance(instance)
 		return
 
+	# extern
 	def unpauseVm(self, instanceId):
 		instance = self.data.acquireInstance(instanceId)
-		self.__stateTransition(instance, InstanceState.Paused, InstanceState.Unpausing)
+		try:
+			self.__stateTransition(instance, InstanceState.Paused, InstanceState.Unpausing)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		self.data.releaseInstance(instance)
 		self.__ACCOUNT("CM VM UNPAUSE", instance=instance)
 		hostname = self.data.getHost(instance.hostId).name
@@ -483,25 +550,61 @@
 			self.log.exception('unpauseVm failed on host %s with vmId %d' % (hostname, instance.vmId))
 			raise
 		instance = self.data.acquireInstance(instanceId)
-		self.__stateTransition(instance, InstanceState.Unpausing, InstanceState.Running)
+		try:
+			self.__stateTransition(instance, InstanceState.Unpausing, InstanceState.Running)
+		except TashiException:
+			self.data.releaseInstance(instance)
+			raise
+
 		self.data.releaseInstance(instance)
 		return
-	
+
+	# extern
 	def getHosts(self):
 		return self.data.getHosts().values()
 	
+	# extern
+	def setHostState(self, hostId, state):
+		state = state.lower()
+		hostState = None
+		if state == "normal":
+			hostState = HostState.Normal
+		if state == "drained":
+			hostState = HostState.Drained
+
+		if hostState is None:
+			return "%s is not a valid host state" % state
+
+		host = self.data.acquireHost(hostId)
+		try:
+			host.state = hostState
+		finally:
+			self.data.releaseHost(host)
+
+		return "Host state set to %s." % hostStates[hostState]
+
+	# extern
 	def getNetworks(self):
-		return self.data.getNetworks().values()
-	
+		networks = self.data.getNetworks()
+		for network in networks:
+			if self.defaultNetwork == networks[network].id:
+				setattr(networks[network], "default", True)
+
+		return networks.values()
+
+	# extern
 	def getUsers(self):
 		return self.data.getUsers().values()
-	
+
+	# extern
 	def getInstances(self):
 		return self.data.getInstances().values()
 
+	# extern
 	def getImages(self):
 		return self.data.getImages()
 	
+	# extern
 	def copyImage(self, src, dst):
 		imageSrc = self.dfs.getLocalHandle("images/" + src)
 		imageDst = self.dfs.getLocalHandle("images/" + dst)
@@ -515,6 +618,7 @@
 		except Exception, e:
 			self.log.exception('DFS image copy failed: %s (%s->%s)' % (e, imageSrc, imageDst))
 
+	# extern
 	def vmmSpecificCall(self, instanceId, arg):
 		instance = self.data.getInstance(instanceId)
 		hostname = self.data.getHost(instance.hostId).name
@@ -526,7 +630,7 @@
 			raise
 		return res
 	
-#	@timed
+	# extern
 	def registerNodeManager(self, host, instances):
 		"""Called by the NM every so often as a keep-alive/state polling -- state changes here are NOT AUTHORITATIVE"""
 
@@ -559,45 +663,47 @@
 		# let the host communicate what it is running
 		# and note that the information is not stale
 		for instance in instances:
+			if instance.state == InstanceState.Exited:
+				self.log.warning("%s reporting exited instance %s, ignoring." % (host.name, instance.id))
+				continue
 			self.instanceLastContactTime.setdefault(instance.id, 0)
 
 		self.data.releaseHost(oldHost)
 		return host.id
 	
-	def vmUpdate(self, instanceId, instance, oldState):
-		try:
-			oldInstance = self.data.acquireInstance(instanceId)
-		except TashiException, e:
-			# shouldn't have a lock to clean up after here
-			if (e.errno == Errors.NoSuchInstanceId):
-				self.log.warning('Got vmUpdate for unknown instanceId %d' % (instanceId))
-				return
-		except:
-			self.log.exception("Could not acquire instance")
-			raise
+	def __vmUpdate(self, oldInstance, instance, oldState):
+		# this function assumes a lock is held on the instance
+		# already, and will be released elsewhere
 
-		self.instanceLastContactTime[instanceId] = self.__now()
+		self.instanceLastContactTime[oldInstance.id] = self.__now()
 		oldInstance.decayed = False
-		self.__ACCOUNT("CM VM UPDATE", instance=oldInstance)
 
 		if (instance.state == InstanceState.Exited):
 			# determine why a VM has exited
 			hostname = self.data.getHost(oldInstance.hostId).name
+
 			if (oldInstance.state not in [InstanceState.ShuttingDown, InstanceState.Destroying, InstanceState.Suspending]):
 				self.log.warning('Unexpected exit on %s of instance %s (vmId %d)' % (hostname, oldInstance.name, oldInstance.vmId))
+
 			if (oldInstance.state == InstanceState.Suspending):
 				self.__stateTransition(oldInstance, InstanceState.Suspending, InstanceState.Suspended)
 				oldInstance.hostId = None
 				oldInstance.vmId = None
-				self.data.releaseInstance(oldInstance)
+				return "release"
+
+			if (oldInstance.state == InstanceState.MigrateTrans):
+				# Just await update from target host
+				return "release"
+
 			else:
 				del self.instanceLastContactTime[oldInstance.id]
-				self.data.removeInstance(oldInstance)
+				return "remove"
+
 		else:
 			if (instance.state):
 				# XXXstroucki does this matter?
 				if (oldState and oldInstance.state != oldState):
-					self.log.warning('Got vmUpdate of state from %s to %s, but the instance was previously %s' % (vmStates[oldState], vmStates[instance.state], vmStates[oldInstance.state]))
+					self.log.warning('Doing vmUpdate of state from %s to %s, but the instance was previously %s' % (vmStates[oldState], vmStates[instance.state], vmStates[oldInstance.state]))
 				oldInstance.state = instance.state
 			if (instance.vmId):
 				oldInstance.vmId = instance.vmId
@@ -610,11 +716,44 @@
 							if (oldNic.mac == nic.mac):
 								oldNic.ip = nic.ip
 
-			self.data.releaseInstance(oldInstance)
+			return "release"
+
 
 		return "success"
-	
+
+	# extern
+	def vmUpdate(self, instanceId, instance, oldState):
+		try:
+			oldInstance = self.data.acquireInstance(instanceId)
+		except TashiException, e:
+			# shouldn't have a lock to clean up after here
+			if (e.errno == Errors.NoSuchInstanceId):
+				self.log.warning('Got vmUpdate for unknown instanceId %d' % (instanceId))
+				return
+		except:
+			self.log.exception("Could not acquire instance")
+			raise
+
+		import copy
+		displayInstance = copy.copy(oldInstance)
+		displayInstance.state = instance.state
+		self.__ACCOUNT("CM VM UPDATE", instance=displayInstance)
+
+		rv = self.__vmUpdate(oldInstance, instance, oldState)
+
+		if (rv == "release"):
+			self.data.releaseInstance(oldInstance)
+
+		if (rv == "remove"):
+			self.data.removeInstance(oldInstance)
+
+		return "success"
+
+	# extern
 	def activateVm(self, instanceId, host):
+		# XXXstroucki: check my idea of the host's capacity before
+		# trying.
+
 		dataHost = self.data.acquireHost(host.id)
 
 		if (dataHost.name != host.name):
@@ -632,7 +771,7 @@
 		self.__ACCOUNT("CM VM ACTIVATE", instance=instance)
 
 		if ('__resume_source' in instance.hints):
-			self.__stateTransition(instance, InstanceState.Pending, InstanceState.Resuming)
+			self.__stateTransition(instance, None, InstanceState.Resuming)
 		else:
 			# XXXstroucki should held VMs be continually tried? Or be explicitly set back to pending?
 			#self.__stateTransition(instance, InstanceState.Pending, InstanceState.Activating)
@@ -678,12 +817,13 @@
 		self.data.releaseInstance(instance)
 		return "success"
 
-        def registerHost(self, hostname, memory, cores, version):
-                hostId, alreadyRegistered = self.data.registerHost(hostname, memory, cores, version)
-                if alreadyRegistered:
-                        self.log.info("Host %s is already registered, it was updated now" % hostname)
-                else:
-                        self.log.info("A host was registered - hostname: %s, version: %s, memory: %s, cores: %s" % (hostname, version, memory, cores))
+	# extern
+	def registerHost(self, hostname, memory, cores, version):
+		hostId, alreadyRegistered = self.data.registerHost(hostname, memory, cores, version)
+		if alreadyRegistered:
+			self.log.info("Host %s is already registered, it was updated now" % hostname)
+		else:
+			self.log.info("A host was registered - hostname: %s, version: %s, memory: %s, cores: %s" % (hostname, version, memory, cores))
 
 		try:
 			host = self.data.getHost(hostId)
@@ -691,9 +831,10 @@
 		except:
 			self.log.warning("Failed to lookup host %s" % hostId)
 
-                return hostId
+		return hostId
 
-        def unregisterHost(self, hostId):
+	# extern
+	def unregisterHost(self, hostId):
 		try:
 			host = self.data.getHost(hostId)
 			self.__ACCOUNT("CM HOST UNREGISTER", host=host)
@@ -701,9 +842,9 @@
 			self.log.warning("Failed to lookup host %s" % hostId)
 			return
 
-                self.data.unregisterHost(hostId)
-                self.log.info("Host %s was unregistered" % hostId)
-                return
+		self.data.unregisterHost(hostId)
+		self.log.info("Host %s was unregistered" % hostId)
+		return
 
 	# service thread
 	def __monitorCluster(self):
diff --git a/src/tashi/clustermanager/data/datainterface.py b/src/tashi/clustermanager/data/datainterface.py
index e58fb6d..a22297e 100644
--- a/src/tashi/clustermanager/data/datainterface.py
+++ b/src/tashi/clustermanager/data/datainterface.py
@@ -43,25 +43,28 @@
 	def getHosts(self):
 		raise NotImplementedError
 	
-	def getHost(self, id):
+	def getHost(self, _id):
+		raise NotImplementedError
+
+	def getImages(self):
 		raise NotImplementedError
 	
 	def getInstances(self):
 		raise NotImplementedError
 	
-	def getInstance(self, id):
+	def getInstance(self, _id):
 		raise NotImplementedError
 	
 	def getNetworks(self):
 		raise NotImplementedError
 	
-	def getNetwork(self, id):
+	def getNetwork(self, _id):
 		raise NotImplementedError
 	
 	def getUsers(self):
 		raise NotImplementedError
 	
-	def getUser(self, id):
+	def getUser(self, _id):
 		raise NotImplementedError
 
 	def registerHost(self, hostname, memory, cores, version):
diff --git a/src/tashi/clustermanager/data/fromconfig.py b/src/tashi/clustermanager/data/fromconfig.py
index 8511a07..68465fa 100644
--- a/src/tashi/clustermanager/data/fromconfig.py
+++ b/src/tashi/clustermanager/data/fromconfig.py
@@ -15,17 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+#XXXstroucki: for compatibility with python 2.5
 from __future__ import with_statement
+
+import logging
 import threading
 import os
 import ConfigParser
 
-from tashi.rpycservices.rpyctypes import Host, Network, User, TashiException, Errors, HostState
+from tashi.rpycservices.rpyctypes import Host, Network, User, TashiException, Errors, HostState, Instance
 from tashi.clustermanager.data import DataInterface
 
 class FromConfig(DataInterface):
 	def __init__(self, config):
 		DataInterface.__init__(self, config)
+		self.log = logging.getLogger(__name__)
 		self.hosts = {}
 		self.instances = {}
 		self.networks = {}
@@ -78,6 +82,10 @@
 		return instanceId
 	
 	def registerInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		self.acquireLock(self.instanceLock)
 		try:
 			if (instance.id is not None and instance.id not in self.instances):
@@ -107,6 +115,10 @@
 		return instance
 	
 	def releaseInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		try:
 			if (instance.id not in self.instances): # MPR: should never be true, but good to check
 				raise TashiException(d={'errno':Errors.NoSuchInstanceId,'msg':"No such instanceId - %d" % (instance.id)})
@@ -114,6 +126,10 @@
 			self.releaseLock(instance._lock)
 	
 	def removeInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		self.acquireLock(self.instanceLock)
 		try:
 			del self.instances[instance.id]
@@ -122,6 +138,10 @@
 			self.releaseLock(self.instanceLock)
 	
 	def acquireHost(self, hostId):
+		if type(hostId) is not int:
+			self.log.exception("Argument is not of type int, but of type %s" % (type(hostId)))
+			raise TypeError
+
 		self.hostLock.acquire()
 		host = self.hosts.get(hostId, None)
 		if (host is None):
@@ -134,6 +154,10 @@
 
 	
 	def releaseHost(self, host):
+		if type(host) is not Host:
+			self.log.exception("Argument is not of type Host, but of type %s" % (type(host)))
+			raise TypeError
+
 		try:
 			if (host.id not in self.hosts): # MPR: should never be true, but good to check
 				raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (host.id)})
@@ -145,47 +169,47 @@
 	def getHosts(self):
 		return self.hosts
 	
-	def getHost(self, id):
-		host = self.hosts.get(id, None)
+	def getHost(self, _id):
+		host = self.hosts.get(_id, None)
 		if (not host):
-			raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (id)})
+			raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (_id)})
 		return host
 
 	def getInstances(self):
 		return self.instances
 	
-	def getInstance(self, id):
-		instance = self.instances.get(id, None)
+	def getInstance(self, _id):
+		instance = self.instances.get(_id, None)
 		if (not instance):
-			raise TashiException(d={'errno':Errors.NoSuchInstanceId,'msg':"No such instanceId - %d" % (id)})
+			raise TashiException(d={'errno':Errors.NoSuchInstanceId,'msg':"No such instanceId - %d" % (_id)})
 		return instance
 	
 	def getNetworks(self):
 		return self.networks
 	
-	def getNetwork(self, id):
-		return self.networks[id]
+	def getNetwork(self, _id):
+		return self.networks[_id]
 	
 	def getUsers(self):
 		return self.users
 	
-	def getUser(self, id):
-		return self.users[id]
+	def getUser(self, _id):
+		return self.users[_id]
 		
 	def registerHost(self, hostname, memory, cores, version):
 		self.hostLock.acquire()
-		for id in self.hosts.keys():
-			if self.hosts[id].name == hostname:
-				host = Host(d={'id':id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
-				self.hosts[id] = host
+		for _id in self.hosts.keys():
+			if self.hosts[_id].name == hostname:
+				host = Host(d={'id':_id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
+				self.hosts[_id] = host
 				self.save()
 				self.hostLock.release()
-				return id, True
-		id = self.getNewId("hosts")
-		self.hosts[id] = Host(d={'id':id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
+				return _id, True
+		_id = self.getNewId("hosts")
+		self.hosts[_id] = Host(d={'id':_id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
 		self.save()
 		self.hostLock.release()
-		return id, False
+		return _id, False
 		
 	def unregisterHost(self, hostId):
 		self.hostLock.acquire()
@@ -200,10 +224,10 @@
 		maxId = 0
 		l = []
 		if(table == "hosts"):
-			for id in self.hosts.keys():
-				l.append(id)
-				if id >= maxId:
-					maxId = id
+			for _id in self.hosts.keys():
+				l.append(_id)
+				if _id >= maxId:
+					maxId = _id
 		l.sort() # sort to enable comparing with range output
 		# check if some id is released:
 		t = range(maxId + 1)
@@ -221,9 +245,9 @@
 		# and in what order does it get loaded
 		fileName = "./etc/Tashi.cfg"
 		if not os.path.exists(fileName):
-			file = open(fileName, "w")
-			file.write("[FromConfig]")
-			file.close()	
+			filehandle = open(fileName, "w")
+			filehandle.write("[FromConfig]")
+			filehandle.close()	
 		parser = ConfigParser.ConfigParser()
 		parser.read(fileName)
 		
@@ -231,7 +255,7 @@
 			parser.add_section("FromConfig")
 		
 		hostsInFile = []
-		for (name, value) in parser.items("FromConfig"):
+		for (name, __value) in parser.items("FromConfig"):
 			name = name.lower()
 			if (name.startswith("host")):
 				hostsInFile.append(name)
diff --git a/src/tashi/clustermanager/data/getentoverride.py b/src/tashi/clustermanager/data/getentoverride.py
index 21b2f8f..2cd5e69 100644
--- a/src/tashi/clustermanager/data/getentoverride.py
+++ b/src/tashi/clustermanager/data/getentoverride.py
@@ -15,16 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+import logging
 import subprocess
 import time
 import os
-from tashi.rpycservices.rpyctypes import User, LocalImages
+from tashi.rpycservices.rpyctypes import User, LocalImages, Instance, Host
 from tashi.clustermanager.data import DataInterface
 from tashi.util import instantiateImplementation, humanReadable
 
 class GetentOverride(DataInterface):
 	def __init__(self, config):
 		DataInterface.__init__(self, config)
+		self.log = logging.getLogger(__name__)
 		self.baseDataObject = instantiateImplementation(config.get("GetentOverride", "baseData"), config)
 		self.dfs = instantiateImplementation(config.get("ClusterManager", "dfs"), config)
 
@@ -33,40 +35,60 @@
 		self.fetchThreshold = float(config.get("GetentOverride", "fetchThreshold"))
 	
 	def registerInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		return self.baseDataObject.registerInstance(instance)
 	
 	def acquireInstance(self, instanceId):
 		return self.baseDataObject.acquireInstance(instanceId)
 	
 	def releaseInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		return self.baseDataObject.releaseInstance(instance)
 	
 	def removeInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		return self.baseDataObject.removeInstance(instance)
 	
 	def acquireHost(self, hostId):
+		if type(hostId) is not int:
+			self.log.exception("Argument is not of type int, but of type %s" % (type(hostId)))
+			raise TypeError
+
 		return self.baseDataObject.acquireHost(hostId)
 	
 	def releaseHost(self, host):
+		if type(host) is not Host:
+			self.log.exception("Argument is not of type Host, but of type %s" % (type(host)))
+			raise TypeError
+
 		return self.baseDataObject.releaseHost(host)
 	
 	def getHosts(self):
 		return self.baseDataObject.getHosts()
 	
-	def getHost(self, id):
-		return self.baseDataObject.getHost(id)
+	def getHost(self, _id):
+		return self.baseDataObject.getHost(_id)
 	
 	def getInstances(self):
 		return self.baseDataObject.getInstances()
 	
-	def getInstance(self, id):
-		return self.baseDataObject.getInstance(id)
+	def getInstance(self, _id):
+		return self.baseDataObject.getInstance(_id)
 	
 	def getNetworks(self):
 		return self.baseDataObject.getNetworks()
 	
-	def getNetwork(self, id):
-		return self.baseDataObject.getNetwork(id)
+	def getNetwork(self, _id):
+		return self.baseDataObject.getNetwork(_id)
 
 	def getImages(self):
 		count = 0
@@ -87,12 +109,12 @@
 			try:
 				for l in p.stdout.xreadlines():
 					ws = l.strip().split(":")
-					id = int(ws[2])
+					_id = int(ws[2])
 					name = ws[0]
 					user = User()
-					user.id = id
+					user.id = _id
 					user.name = name
-					myUsers[id] = user
+					myUsers[_id] = user
 				self.users = myUsers
 				self.lastUserUpdate = now
 			finally:	
@@ -102,9 +124,9 @@
 		self.fetchFromGetent()
 		return self.users
 	
-	def getUser(self, id):
+	def getUser(self, _id):
 		self.fetchFromGetent()
-		return self.users[id]
+		return self.users[_id]
 		
 	def registerHost(self, hostname, memory, cores, version):
 		return self.baseDataObject.registerHost(hostname, memory, cores, version)
diff --git a/src/tashi/clustermanager/data/ldapoverride.py b/src/tashi/clustermanager/data/ldapoverride.py
index 0236635..56b154c 100644
--- a/src/tashi/clustermanager/data/ldapoverride.py
+++ b/src/tashi/clustermanager/data/ldapoverride.py
@@ -17,9 +17,11 @@
 
 import subprocess
 import time
-from tashi.rpycservices.rpyctypes import User
+#XXXstroucki getImages requires os?
+import os
+from tashi.rpycservices.rpyctypes import User, LocalImages
+from tashi.util import instantiateImplementation, humanReadable
 from tashi.clustermanager.data import DataInterface
-from tashi.util import instantiateImplementation
 
 class LdapOverride(DataInterface):
 	def __init__(self, config):
@@ -31,6 +33,7 @@
 		self.nameKey = config.get("LdapOverride", "nameKey")
 		self.idKey = config.get("LdapOverride", "idKey")
 		self.ldapCommand = config.get("LdapOverride", "ldapCommand")
+		self.dfs = instantiateImplementation(config.get("ClusterManager", "dfs"), config)
 	
 	def registerInstance(self, instance):
 		return self.baseDataObject.registerInstance(instance)
@@ -53,20 +56,31 @@
 	def getHosts(self):
 		return self.baseDataObject.getHosts()
 	
-	def getHost(self, id):
-		return self.baseDataObject.getHost(id)
+	def getHost(self, _id):
+		return self.baseDataObject.getHost(_id)
 	
 	def getInstances(self):
 		return self.baseDataObject.getInstances()
 	
-	def getInstance(self, id):
-		return self.baseDataObject.getInstance(id)
+	def getInstance(self, _id):
+		return self.baseDataObject.getInstance(_id)
 	
 	def getNetworks(self):
 		return self.baseDataObject.getNetworks()
 	
-	def getNetwork(self, id):
-		return self.baseDataObject.getNetwork(id)
+	def getNetwork(self, _id):
+		return self.baseDataObject.getNetwork(_id)
+
+	def getImages(self):
+		count = 0
+		myList = []
+		for i in self.dfs.list("images"):
+			myFile = self.dfs.getLocalHandle("images/" + i)
+			if os.path.isfile(myFile):
+				image = LocalImages(d={'id':count, 'imageName':i, 'imageSize':humanReadable(self.dfs.stat(myFile)[6])})
+				myList.append(image)
+				count += 1
+		return myList
 
 	def fetchFromLdap(self):
 		now = time.time()
@@ -86,7 +100,7 @@
 								myUsers[user.id] = user
 							thisUser = {}
 						else:
-							(key, sep, val) = l.partition(":")
+							(key, __sep, val) = l.partition(":")
 							key = key.strip()
 							val = val.strip()
 							thisUser[key] = val
@@ -101,9 +115,9 @@
 		self.fetchFromLdap()
 		return self.users
 	
-	def getUser(self, id):
+	def getUser(self, _id):
 		self.fetchFromLdap()
-		return self.users[id]
+		return self.users[_id]
 		
 	def registerHost(self, hostname, memory, cores, version):
 		return self.baseDataObject.registerHost(hostname, memory, cores, version)
diff --git a/src/tashi/clustermanager/data/pickled.py b/src/tashi/clustermanager/data/pickled.py
index b3a6e03..043d756 100644
--- a/src/tashi/clustermanager/data/pickled.py
+++ b/src/tashi/clustermanager/data/pickled.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+import logging
 import cPickle
 import os
 import threading
@@ -24,6 +25,7 @@
 class Pickled(FromConfig):
 	def __init__(self, config):
 		DataInterface.__init__(self, config)
+		self.log = logging.getLogger(__name__)
 		self.file = self.config.get("Pickled", "file")
 		self.locks = {}
 		self.lockNames = {}
@@ -39,39 +41,51 @@
 	
 	def cleanInstances(self):
 		ci = {}
-		for i in self.instances.itervalues():
+		for __ignore, i in self.instances.items():
 			i2 = Instance(d=i.__dict__)
 			ci[i2.id] = i2
 		return ci
 	
 	def cleanHosts(self):
 		ch = {}
-		for h in self.hosts.itervalues():
+		for __ignore, h in self.hosts.items():
 			h2 = Host(d=h.__dict__)
 			ch[h2.id] = h2
 		return ch
 	
 	def save(self):
-		file = open(self.file, "w")
-		cPickle.dump((self.cleanHosts(), self.cleanInstances(), self.networks, self.users), file)
-		file.close()
+		# XXXstroucki lock here to serialize saves
+		filename = self.file
+		# XXXstroucki could be better
+		tempfile = "%s.new" % filename
+
+		filehandle = open(tempfile, "w")
+		cPickle.dump((self.cleanHosts(), self.cleanInstances(), self.networks, self.users), filehandle)
+		filehandle.close()
+		try:
+			os.rename(tempfile, filename)
+		except OSError:
+			# XXXstroucki: regular save will take place
+			# soon enough, ignore this until locking is
+			# in place.
+			pass
 
 	def load(self):
 		if (os.access(self.file, os.F_OK)):
-			file = open(self.file, "r")
-			(hosts, instances, networks, users) = cPickle.load(file)
-			file.close()
+			filehandle = open(self.file, "r")
+			(hosts, instances, networks, users) = cPickle.load(filehandle)
+			filehandle.close()
 		else:
 			(hosts, instances, networks, users) = ({}, {}, {}, {})
 		self.hosts = hosts
 		self.instances = instances
 		self.networks = networks
 		self.users = users
-		for i in self.instances.itervalues():
+		for __ignore, i in self.instances.items():
 			if (i.id >= self.maxInstanceId):
 				self.maxInstanceId = i.id + 1
 			i._lock = threading.Lock()
 			self.lockNames[i._lock] = "i%d" % (i.id)
-		for h in self.hosts.itervalues():
+		for __ignore, h in self.hosts.items():
 			h._lock = threading.Lock()
 			self.lockNames[h._lock] = "h%d" % (h.id)
diff --git a/src/tashi/clustermanager/data/sql.py b/src/tashi/clustermanager/data/sql.py
index 64e5681..bfa0273 100644
--- a/src/tashi/clustermanager/data/sql.py
+++ b/src/tashi/clustermanager/data/sql.py
@@ -130,6 +130,10 @@
 		return h
 	
 	def registerInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		self.instanceLock.acquire()
 		try:
 			if (instance.id is not None and instance.id not in self.getInstances()):
@@ -173,6 +177,10 @@
 		return instance
 	
 	def releaseInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		self.instanceLock.acquire()
 		try:
 			l = self.makeInstanceList(instance)
@@ -191,6 +199,10 @@
 			self.instanceLock.release()
 	
 	def removeInstance(self, instance):
+		if type(instance) is not Instance:
+			self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+			raise TypeError
+
 		self.instanceLock.acquire()
 		try:
 			self.executeStatement("DELETE FROM instances WHERE id = %d" % (instance.id))
@@ -205,6 +217,10 @@
 			self.instanceLock.release()
 	
 	def acquireHost(self, hostId):
+		if type(hostId) is not int:
+			self.log.exception("Argument is not of type int, but of type %s" % (type(hostId)))
+			raise TypeError
+
 		host = self.getHost(hostId)
 		self.hostLock.acquire()
 		self.hostLocks[host.id] = self.hostLocks.get(host.id, threading.Lock())
@@ -214,6 +230,10 @@
 		return host
 	
 	def releaseHost(self, host):
+		if type(host) is not Host:
+			self.log.exception("Argument is not of type Host, but of type %s" % (type(host)))
+			raise TypeError
+
 		l = self.makeHostList(host)
 		s = ""
 		for e in range(0, len(self.hostOrder)):
@@ -234,14 +254,14 @@
 	
 	def getHost(self, in_id):
 		try:
-			id = int(in_id)
+			_id = int(in_id)
 		except:
 			self.log.exception("Argument to getHost was not integer: %s" % in_id)
 
-		cur = self.executeStatement("SELECT * FROM hosts WHERE id = %d" % id)
+		cur = self.executeStatement("SELECT * FROM hosts WHERE id = %d" % _id)
 		r = cur.fetchone()
 		if (r == None):
-			raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (id)})
+			raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (_id)})
 		host = self.makeListHost(r)
 		return host
 	
@@ -256,16 +276,16 @@
 	
 	def getInstance(self, in_id):
 		try:
-			id = int(in_id)
+			_id = int(in_id)
 		except:
 			self.log.exception("Argument to getInstance was not integer: %s" % in_id)
 
-		cur = self.executeStatement("SELECT * FROM instances WHERE id = %d" % (id))
+		cur = self.executeStatement("SELECT * FROM instances WHERE id = %d" % (_id))
 		# XXXstroucki should only return one row.
 		# what about migration? should it be enforced?
 		r = cur.fetchone()
 		if (not r):
-			raise TashiException(d={'errno':Errors.NoSuchInstanceId, 'msg':"No such instanceId - %d" % (id)})
+			raise TashiException(d={'errno':Errors.NoSuchInstanceId, 'msg':"No such instanceId - %d" % (_id)})
 		instance = self.makeListInstance(r)
 		return instance
 	
@@ -278,22 +298,23 @@
 			networks[network.id] = network
 		return networks
 	
-	def getNetwork(self, id):
-		cur = self.executeStatement("SELECT * FROM networks WHERE id = %d" % (id))
+	def getNetwork(self, _id):
+		cur = self.executeStatement("SELECT * FROM networks WHERE id = %d" % (_id))
 		r = cur.fetchone()
 		network = Network(d={'id':r[0], 'name':r[1]})
 		return network
 
-        def getImages(self):
-                count = 0
-                myList = []
-                for i in self.dfs.list("images"):
-                        myFile = self.dfs.getLocalHandle("images/" + i)
-                        if os.path.isfile(myFile):
-                                image = LocalImages(d={'id':count, 'imageName':i, 'imageSize':humanReadable(self.dfs.stat(myFile)[6])})
-                                myList.append(image)
-                                count += 1
-                return myList
+	def getImages(self):
+		count = 0
+		myList = []
+		for i in self.dfs.list("images"):
+			myFile = self.dfs.getLocalHandle("images/" + i)
+			if os.path.isfile(myFile):
+				image = LocalImages(d={'id':count, 'imageName':i, 'imageSize':humanReadable(self.dfs.stat(myFile)[6])})
+				myList.append(image)
+				count += 1
+
+		return myList
 	
 	def getUsers(self):
 		cur = self.executeStatement("SELECT * from users")
@@ -304,8 +325,8 @@
 			users[user.id] = user
 		return users
 	
-	def getUser(self, id):
-		cur = self.executeStatement("SELECT * FROM users WHERE id = %d" % (id))
+	def getUser(self, _id):
+		cur = self.executeStatement("SELECT * FROM users WHERE id = %d" % (_id))
 		r = cur.fetchone()
 		user = User(d={'id':r[0], 'name':r[1], 'passwd':r[2]})
 		return user
@@ -316,20 +337,20 @@
 		res = cur.fetchall()
 		for r in res:
 			if r[1] == hostname:
-				id = r[0]
-				self.log.warning("Host %s already registered, update will be done" % id)
+				_id = r[0]
+				self.log.warning("Host %s already registered, update will be done" % _id)
 				s = ""
-				host = Host(d={'id': id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
+				host = Host(d={'id': _id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
 				l = self.makeHostList(host)
 				for e in range(0, len(self.hostOrder)):
 					s = s + self.hostOrder[e] + "=" + l[e]
 					if (e < len(self.hostOrder)-1):
 						s = s + ", "
-				self.executeStatement("UPDATE hosts SET %s WHERE id = %d" % (s, id))
+				self.executeStatement("UPDATE hosts SET %s WHERE id = %d" % (s, _id))
 				self.hostLock.release()
 				return r[0], True
-		id = self.getNewId("hosts")
-		host = Host(d={'id': id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
+		_id = self.getNewId("hosts")
+		host = Host(d={'id': _id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
 		l = self.makeHostList(host)
 		self.executeStatement("INSERT INTO hosts VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" % tuple(l))
 		self.hostLock.release()
@@ -353,10 +374,10 @@
 		maxId = 0 # the first id would be 1
 		l = []
 		for r in res:
-			id = r[0]
-			l.append(id)
-			if id >= maxId:
-				maxId = id
+			_id = r[0]
+			l.append(_id)
+			if _id >= maxId:
+				maxId = _id
 		l.sort() # sort to enable comparing with range output
 		# check if some id is released:
 		t = range(maxId + 1)
diff --git a/src/tashi/connectionmanager.py b/src/tashi/connectionmanager.py
index 5eeae6c..c9026dc 100644
--- a/src/tashi/connectionmanager.py
+++ b/src/tashi/connectionmanager.py
@@ -15,15 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.    
 
-from tashi.rpycservices import rpycservices
+from tashi import Connection
 #from tashi.rpycservices.rpyctypes import *
 
 class ConnectionManager(object):
-	def __init__(self, username, password, port, timeout=10000.0):
+	def __init__(self, username, password, port, timeout=10000.0, authAndEncrypt=False):
 		self.username = username
 		self.password = password
 		self.timeout = timeout
 		self.port = port
+		self.authAndEncrypt = authAndEncrypt
 	
 	def __getitem__(self, hostname):
 		port = self.port
@@ -31,4 +32,4 @@
 			port = hostname[1]
 			hostname = hostname[0]
 
-		return rpycservices.client(hostname, port, username=self.username, password=self.password)
+		return Connection(hostname, port, credentials=(self.username, self.password), authAndEncrypt=self.authAndEncrypt)
diff --git a/src/tashi/dfs/vfs.py b/src/tashi/dfs/vfs.py
index d039335..650a805 100644
--- a/src/tashi/dfs/vfs.py
+++ b/src/tashi/dfs/vfs.py
@@ -18,7 +18,6 @@
 # implementation of dfs interface functions
 
 import shutil
-import os
 import os.path
 from dfsinterface import DfsInterface
 
diff --git a/src/tashi/messaging/gangliapublisher.py b/src/tashi/messaging/gangliapublisher.py
index e589162..2d27947 100644
--- a/src/tashi/messaging/gangliapublisher.py
+++ b/src/tashi/messaging/gangliapublisher.py
@@ -17,7 +17,6 @@
 
 import os
 import time
-import types
 
 from tashi import scrubString
 
diff --git a/src/tashi/messaging/messageBroker.py b/src/tashi/messaging/messageBroker.py
deleted file mode 100644
index c21b57a..0000000
--- a/src/tashi/messaging/messageBroker.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-import ConfigParser
-import getopt
-
-import os
-import sys
-import time
-
-import thriftmessaging
-
-options = []
-long_options = ['port=']
-
-# FIXME: should initialize from config file
-params = {"port":1717}
-
-try:
-	optlist, args = getopt.getopt(sys.argv[1:], options, long_options)
-except getopt.GetoptError, err:
-	print str(err)
-	sys.exit(2)
-
-for opt in optlist:
-	if opt[0] == "--port":
-		try:
-			params["port"] = int(opt[1])
-		except:
-			print "--port expects an integer, got %s" % opt[1]
-			sys.exit(0)
-
-print "Starting message broker on port %i" % params["port"]
-broker = thriftmessaging.MessageBrokerThrift(params["port"], daemon=False)
-
diff --git a/src/tashi/messaging/messaging.py b/src/tashi/messaging/messaging.py
deleted file mode 100644
index c421d5c..0000000
--- a/src/tashi/messaging/messaging.py
+++ /dev/null
@@ -1,337 +0,0 @@
-#!/usr/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-import threading
-import thread
-import sys
-import os
-import socket
-import Queue
-import copy
-import random
-import traceback
-
-from threadpool import ThreadPoolClass, threadpool, ThreadPool
-from threadpool import threadpoolmethod, threaded, synchronized, synchronizedmethod
-
-class RWLock(object):
-	"""RWLock: Simple reader/writer lock implementation
-	FIXME: this implementation will starve writers!
-	Methods:
-		acquire() : take lock for read access
-		release() : release lock from read access
-		acquireWrite() : take lock for write access
-		releaseWrite() : release lock from write access"""
-	def __init__(self):
-		self.lock = threading.Condition()
-		self.readers = 0
-	def acquire(self):
-		self.lock.acquire()
-		self.readers = self.readers + 1
-		self.lock.release()
-	def release(self):
-		self.lock.acquire()
-		self.readers = self.readers - 1
-		self.lock.notify()
-		self.lock.release()
-	def acquireWrite(self):
-		self.lock.acquire()
-		while self.readers > 0:
-			self.lock.wait()
-	def releaseWrite(self):
-		self.lock.notify()
-		self.lock.release()
-
-
-
-class MessageBroker(object):
-	def __init__(self):
-		self.sublock = RWLock()
-		self.subscribers = []
-		self.random = random.Random()
-	def log(self, msg):
-		print "MessageBroker: Got log: '%s'" % str(msg)
-		return msg
-	def addSubscriber(self, subscriber):
-		self.sublock.acquireWrite()
-		self.subscribers.append(subscriber)
-		l = len(self.subscribers)
-		self.sublock.releaseWrite()
-		return l
-	def publish(self, message):
-		removesubs = []
-		i = self.random.randint(0,100)
-
-#		 subscribers = self.getSubscribers()
-#		 random.shuffle(subscribers)
-
-		self.sublock.acquire()
-
-		sys.stdout.flush()
-
-		for subscriber in self.subscribers:
-			try:
-				sys.stdout.flush()
-				assert(subscriber != self)
-				subscriber.publish(message)
-				sys.stdout.flush()
-			except Exception, e:
-				print e
-				removesubs.append(subscriber)
-
-		self.sublock.release()
-
-		if len(removesubs) > 0:
-			print "detected %i failed subscribers" % len(removesubs)
-			sys.stdout.flush()
-			self.sublock.acquireWrite()
-			for subscriber in removesubs:
-				try:
-					self.subscribers.remove(subscriber)
-				except:
-					pass
-			self.sublock.releaseWrite()
-	def getSubscribers(self):
-		self.sublock.acquire()
-		subs = copy.copy(self.subscribers)
-		self.sublock.release()
-		return subs
-	def removeSubscriber(self, subscriber):
-		self.sublock.acquireWrite()
-		try:
-			self.subscribers.remove(subscriber)
-		except:
-			pass
-		self.sublock.releaseWrite()
-	def publishList(self, messages):
-		for message in messages:
-			self.publish(message)
-
-class Subscriber(object):
-	def __init__(self, broker, pmatch={}, nmatch={}, synchronized=False):
-		self.broker = broker
-		self.lock = threading.Lock()
-		self.synchronized = synchronized
-		self.pmatch={}
-		self.nmatch={}
-		broker.addSubscriber(self)
-	def publish(self, message):
-		sys.stdout.flush()
-		msg = message
-		try:
-			if self.synchronized:
-				self.lock.acquire()
-			msg = self.filter(msg)
-			if (msg != None):
-				self.handle(msg)
-			if self.synchronized:
-				self.lock.release()
-		except Exception, x:
-			if self.synchronized:
-				self.lock.release()
-			print '%s, %s, %s' % (type(x), x, traceback.format_exc())
-	def publishList(self, messages):
-		for message in messages:
-			self.publish(message)
-	def handle(self, message):
-		print "Subscriber Default Handler: '%s'" % message
-	def setMatch(self, pmatch={}, nmatch={}):
-		self.lock.acquire()
-		self.pmatch=pmatch
-		self.nmatch=nmatch
-		self.lock.release()
-	def filter(self, message):
-		"""filter(self, message) : the filter function returns
-		the message, modified to be passed to the handler.
-		Returning (None) indicates that this is not a message
-		we are interested in, and it will not be passed to the
-		handler."""
-		send = True
-		for key in self.pmatch.keys():
-			if (not message.has_key(key)):
-				send = False
-				break
-			if self.pmatch[key] != None:
-				if message[key] != self.pmatch[key]:
-					send = False
-					break
-		if send == False:
-			return None
-		for key in message.keys():
-			if self.nmatch.has_key(key):
-				if self.nmatch[key] == None:
-					send = False
-					break
-				if self.nmatch[key] == message[key]:
-					send = False
-					break
-		if send == False:
-			return None
-		return message
-
-
-	
-class Publisher(object):
-	'''Superclass for pub/sub publishers
-
-	FIXME: use finer-grained locking'''
-	def __init__(self, broker, aggregate=100):
-		self.pending = []
-		self.pendingLock = threading.Lock()
-		self.aggregateSize = aggregate
-		self.broker = broker
-	@synchronizedmethod
-	def publish(self, message):
-		if message.has_key('aggregate') and message['aggregate'] == 'True':
-			self.aggregate(message)
-			return
-		else:
-			self.broker.publish(message)
-	@synchronizedmethod
-	def publishList(self, messages):
-		self.broker.publishList(messages)
-	@synchronizedmethod
-	def aggregate(self, message):
-		# we can make this lock-less by using a queue for pending
-		# messages
-		self.pendingLock.acquire()
-		self.pending.append(message)
-		if len(self.pending) >= self.aggregateSize:
-			self.broker.publishList(self.pending)
-			self.pending = []
-		self.pendingLock.release()
-	@synchronizedmethod
-	def setBroker(self, broker):
-		self.broker = broker
-
-##############################
-# Testing Code
-##############################
-import time
-import unittest
-import sys
-import logging
-
-		
-class TestSubscriber(Subscriber):
-	def __init__(self, *args, **kwargs):
-		self.queue = Queue.Queue()
-		Subscriber.__init__(self, *args, **kwargs)
-	def handle(self, message):
-		self.queue.put(message)
-
-class TestMessaging(unittest.TestCase):
-	def setUp(self):
-		self.broker = MessageBroker()
-		self.publisher = Publisher(self.broker)
-		self.subscriber = TestSubscriber(self.broker)
-	def testPublish(self):
-		self.publisher.publish( {'message':'hello world'} )
-		self.assertEqual(self.subscriber.queue.qsize(), 1)
-	def testPublishList(self):
-		nrmsgs = 10
-		msgs = []
-		for i in range(nrmsgs):
-			msgs.append( {'msgnum':str(i)} )
-		self.publisher.publishList( msgs )
-		self.assertEqual(self.subscriber.queue.qsize(), nrmsgs)
-	def testAggregate(self):
-		nrmsgs = self.publisher.aggregateSize
-		for i in range(nrmsgs):
-			self.assertEqual(self.subscriber.queue.qsize(), 0)
-			self.publisher.aggregate( {'msgnum':str(i)} )
-		self.assertEqual(self.subscriber.queue.qsize(), nrmsgs)
-	def testAggregateKeyword(self):
-		nrmsgs = self.publisher.aggregateSize
-		for i in range(nrmsgs):
-			self.assertEqual(self.subscriber.queue.qsize(), 0)
-			self.publisher.publish( {'msgnum':str(i), 'aggregate':'True'} )
-		self.assertEqual(self.subscriber.queue.qsize(), nrmsgs)
-
-if __name__ == '__main__':
-
-	logging.basicConfig(level=logging.INFO,
-						format="%(asctime)s %(levelname)s:\t %(message)s",
-						stream=sys.stdout)
-
-	suite = unittest.TestLoader().loadTestsFromTestCase(TestMessaging)
-	unittest.TextTestRunner(verbosity=2).run(suite) 
-
-	sys.exit(0)
-
-
-##############################
-# Old/Unused testing code
-##############################
-
-
-
-	print 'testing removeSubscriber'
-	broker.removeSubscriber(subscriber)
-	publisher.publish( {'message':"you shouldn't see this"} )
-
-	nsub = NullSubscriber(broker)
-	print 'timing publish'
-	nrmsg = 100000
-	tt = time.time()
-	for i in range(nrmsg):
-#		publisher.publish( {"message":"hello world!"} )
-		publisher.publish( {} )
-	tt = time.time() - tt
-	print "Published %i messages in %f seconds, %f msg/s"%(nrmsg,
-														   tt,
-														   nrmsg/tt)
-	broker.removeSubscriber(nsub)
-
-	class SlowSubscriber(Subscriber):
-		def handle(self, message):
-			print 'called slow subscriber with message', message
-			time.sleep(1)
-			print 'returning from slow subscriber with message', message
-	class ThreadedSubscriber(Subscriber):
-		@threaded
-		def handle(self, message):
-			print 'called threaded subscriber with message', message
-			time.sleep(1)
-			print 'returning from threaded subscriber with message', message
-	class ThreadPoolSubscriber(Subscriber, ThreadPoolClass):
-		@threadpoolmethod
-		def handle(self, message):
-			print 'called threadpool subscriber with message', message
-			time.sleep(1)
-			print 'returning from threadpool subscriber with message', message
-
-
-
-	tsub = ThreadedSubscriber(broker)
-	for i in range(8):
-		publisher.publish( {"msg":str(i)} )
-	broker.removeSubscriber(tsub)
-	time.sleep(3)
-
-	tpsub = ThreadPoolSubscriber(broker)
-	for i in range(8):
-		publisher.publish( {"msg":str(i)} )
-	broker.removeSubscriber(tpsub)
-	time.sleep(3)
-
-	ssub = SlowSubscriber(broker)
-	for i in range(4):
-		publisher.publish( {"msg":str(i)} )
-	broker.removeSubscriber(ssub)
diff --git a/src/tashi/messaging/messagingloghandler.py b/src/tashi/messaging/messagingloghandler.py
index fd503a1..b757894 100644
--- a/src/tashi/messaging/messagingloghandler.py
+++ b/src/tashi/messaging/messagingloghandler.py
@@ -34,7 +34,9 @@
 		try:
 			key = "log_%s_%d_%d" % (self.name, self.msgIndex, int(time.time()*1000))
 			val = self.format(record)
-			tashi.publisher.publish({key:val})
+			#XXXstroucki publisher does not exist
+			(_,_) = (key,val)
+			#tashi.publisher.publish({key:val})
 			self.msgIndex = self.msgIndex + 1
 		except Exception, e:
 			print e
diff --git a/src/tashi/messaging/soapmessaging.py b/src/tashi/messaging/soapmessaging.py
deleted file mode 100755
index be35fc9..0000000
--- a/src/tashi/messaging/soapmessaging.py
+++ /dev/null
@@ -1,229 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-from messaging import *
-
-import cPickle
-import soaplib.wsgi_soap
-import cherrypy.wsgiserver
-from soaplib.service import soapmethod
-from soaplib.serializers.primitive import *
-import SOAPpy.WSDL
-import time
-
-class MessageBrokerSoap(soaplib.wsgi_soap.SimpleWSGISoapApp, MessageBroker):
-	def __init__(self, port):
-		soaplib.wsgi_soap.SimpleWSGISoapApp.__init__(self)
-		MessageBroker.__init__(self)
-		self.port = port
-		def trdfn():
-			service = self
-			server = cherrypy.wsgiserver.CherryPyWSGIServer(("0.0.0.0",port), service)
-			server.start()
-		threading.Thread(target=trdfn).start()
-
-
-	@soapmethod(Array(String), Array(String), _returns=Null)
-	def log(self, keys, values):
-		message = {}
-		if len(keys) != len(values):
-			raise Exception, "Different lengths for keys and values"
-		for i in range(len(keys)):
-			message[keys[i]] = values[i]
-		MessageBroker.log(self, message)
-
-	@soapmethod(String, Integer, _returns=Null)
-	def addSubscriber(self, host, port):
-		subscriber = SubscriberSoapProxy(host, port)
-		MessageBroker.addSubscriber(self, subscriber)
-	
-	@soapmethod(String, Integer, _returns=Null)
-	def removeSubscriber(self, host, port):
-		# should this method really be able to peek into subscriber.host/port 
-		subscriber = None
-		subscribers = self.getSubscribers()
-		for subscriber in subscribers:
-			if subscriber.host == host and subscriber.port == port:
-				subscriber = subscriber
-		if subscriber != None:
-			MessageBroker.removeSubscriber(self, subscriber)
-		
-
-	@soapmethod(Array(String), Array(String), _returns=Null)
-	def publish(self, keys, values):
-		message = {}
-		if len(keys) != len(values):
-			raise Exception, "Different lengths for keys and values"
-		for i in range(len(keys)):
-			message[keys[i]] = values[i]
-		MessageBroker.publish(self, message)
-
-
-
-class MessageBrokerSoapProxy(object):
-	def __init__(self, host, port):
-		self.host = host
-		self.port = port
-		self.connection = SOAPpy.WSDL.Proxy("http://%s:%i/.wsdl"%(host,port))
-	def log(self, message):
-		keys = []
-		values = []
-		for k,v in message.items():
-			keys.append(k)
-			values.append(v)
-		self.connection.log(keys=keys, values=values)
-	def addSubscriber(self, subscriber):
-		self.connection.addSubscriber(host=subscriber.host, port=subscriber.port)
-	def publish(self, message):
-		keys = []
-		values = []
-		for k,v in message.items():
-			keys.append(k)
-			values.append(v)
-		self.connection.publish(keys=keys, values=values)
-	def removeSubscriber(self, subscriber):
-		self.connection.removeSubscriber(host=subscriber.host, port=subscriber.port)
-
-
-
-
-class SubscriberSoap(soaplib.wsgi_soap.SimpleWSGISoapApp, Subscriber):
-	def __init__(self, broker, port, synchronized=False):
-		soaplib.wsgi_soap.SimpleWSGISoapApp.__init__(self)
-		Subscriber.__init__(self, synchronized=synchronized)
-		self.host = socket.gethostname()
-		self.port = port
-		self.broker = broker
-		self.server = None
-		def trdfn():
-			service = self
-			self.server = cherrypy.wsgiserver.CherryPyWSGIServer(("0.0.0.0",port), service)
-			self.server.start()
-		threading.Thread(target=trdfn).start()
-#		broker.log("Subscriber started")
-		broker.addSubscriber(self)
-	@soapmethod(Array(String), Array(String), _returns=Integer)
-	def publish(self, keys, values):
-		message = {}
-		if len(keys) != len(values):
-			raise Exception, "Different lengths for keys and values"
-		for i in range(len(keys)):
-			message[keys[i]] = values[i]
-		Subscriber.publish(self, message)
-		return 0
-	def stop(self):
-		self.server.stop()
-
-class SubscriberSoapProxy(object):
-	def __init__(self, host, port):
-		self.host = host
-		self.port = port
-		self.connection = SOAPpy.WSDL.Proxy("http://%s:%i/.wsdl"%(host,port))
-	def publish(self, message):
-		keys = []
-		values = []
-		for k,v in message.items():
-			keys.append(k)
-			values.append(v)
-		self.connection.publish(keys=keys, values=values)
-
-
-####################
-# Testing Code 
-####################
-
-class CustomSubscriber(SubscriberSoap):
-	def handle(self, message):
-		print "Custom Subscriber: '%s'" % str(message)
-
-class NullSubscriber(SubscriberSoap):
-	def handle(self, message):
-		pass
-
-
-if __name__ == '__main__':
-	try:
-		portnum = 1717
-
-		print "\ntesting message broker"
-		broker = MessageBrokerSoap(portnum)
-		proxy = MessageBrokerSoapProxy("localhost", portnum)
-		portnum = portnum + 1 
-
-		print "\ntesting log function"
-		proxy.log( {"message":"Hello World!"} )
-#		proxy.log("It looks like log works")
-
-		print "\ntesting subscriber proxy"
-		subscriber = SubscriberSoap(proxy, portnum)
-		portnum = portnum + 1
-
-		print "\ntesting custom subscriber"
-		csub = CustomSubscriber(proxy, portnum)
-		portnum = portnum + 1
-
-		print "\ntesting publish"
-		proxy.publish( {"message":"Hello World!"} )
-
-		print "\ntesting stop"
-		subscriber.stop()
-		proxy.publish( {"message":"Everybody here?"} )
-
-		print "\ntesting removeSubscriber"
-		proxy.removeSubscriber(csub)
-		proxy.publish( {"message":"Nobody home"} )
-		proxy.addSubscriber(csub)
-		proxy.publish( {"message":"You're back!"} )
-
-		print "\ntesting filter"
-		csub.setMatch( {"print":"yes"} )
-		proxy.publish( {"print":"yes", "message":"this should be printed"} )
-		proxy.publish( {"print":"no", "message":"this should NOT be printed"} )
-		csub.setMatch()
-
-		print "\ntesting publish performance"
-		proxy.removeSubscriber(csub)
-		nrmsg = 10000
-		tt = time.time()
-		for i in range(nrmsg):
-			proxy.publish( {"message":"msg %i"%i} )
-		tt = time.time() - tt
-		print "Published %i messages in %f seconds, %f msg/s"%(nrmsg,
-															   tt,
-															   nrmsg/tt)
-
-		print "\ntesting publish/subscribe performance"
-		nsub = NullSubscriber(proxy, portnum)
-		portnum = portnum + 1
-		nrmsg = 10000
-		tt = time.time()
-		for i in range(nrmsg):
-			proxy.publish( {"message":"msg %i"%i} )
-		tt = time.time() - tt
-		print "Published %i messages in %f seconds, %f msg/s"%(nrmsg,
-															   tt,
-															   nrmsg/tt)
-
-																   
-
-	except Exception, e:
-#		raise e
-		print  e
-		sys.exit(0)
-	sys.exit(0)
diff --git a/src/tashi/messaging/tashimessaging.py b/src/tashi/messaging/tashimessaging.py
deleted file mode 100644
index 006400f..0000000
--- a/src/tashi/messaging/tashimessaging.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-from thriftmessaging import *
-import logging
-import Queue
-from ConfigParser import ConfigParser
-import time
-import socket
-import signal
-
-class TashiLogHandler(logging.Handler, PublisherThrift):
-	def __init__(self, config, *args, **kwargs):
-		self.messages = Queue.Queue()
-		self.config = config
-		logging.Handler.__init__(self, *args, **kwargs)
-		PublisherThrift.__init__(self, 
-								 config.get('MessageBroker', 'host'),
-								 int(config.get('MessageBroker', 'port')))
-	def emit(self, record):
-		# 'args', 'created', 'exc_info', 'exc_text', 'filename',
-		# 'funcName', 'getMessage', 'levelname', 'levelno', 'lineno',
-		# 'module', 'msecs', 'msg', 'name', 'pathname', 'process',
-		# 'relativeCreated', 'thread', 'threadName']
-		msg = {}
-		# args
-		# created
-		# exc_info
-		# exc_text
-		msg['log-filename'] = str(record.filename)
-		msg['log-funcname'] = str(record.funcName)
-		msg['log-levelname'] = str(record.levelname)
-		msg['log-level'] = str(record.levelno)
-		msg['log-lineno'] = str(record.lineno)
-		msg['log-module'] = str(record.module)
-		msg['log-msecs'] = str(record.msecs)
-		msg['log-message'] = str(record.msg)
-		msg['log-name'] = str(record.name)
-		msg['log-pathname'] = str(record.pathname)
-		msg['log-process'] = str(record.process)
-		# relativeCreated
-		msg['log-thread'] = str(record.thread)
-		msg['log-threadname'] = str(record.threadName)
-
-		# standard message fields
-		msg['timestamp'] = str(time.time())
-		msg['hostname'] = socket.gethostname()
-		msg['message-type'] = 'log'
-
-		self.messages.put(msg)
-		self.publish(msg)
-
-class TashiSubscriber(SubscriberThrift):
-	def __init__(self, config, port, **kwargs):
-		sys.stdout.flush()
-		brokerPort = int(config.get('MessageBroker', 'port'))
-		self.broker = MessageBrokerThriftProxy(config.get('MessageBroker', 'host'), brokerPort)
-		SubscriberThrift.__init__(self, self.broker, port, **kwargs)
-
-		
-
-##############################
-# Test Code
-##############################
-import unittest
-import sys
-
-class TestTashiSubscriber(TashiSubscriber):
-	def __init__(self, *args, **kwargs):
-		self.messageQueue = Queue.Queue()
-		TashiSubscriber.__init__(self, *args, **kwargs)
-	def handle(self, message):
-		self.messageQueue.put(message)
-
-
-def incrementor(start = 0):
-	while True:
-		a = start
-		start = start + 1
-		yield a
-increment = incrementor()
-
-class TestTashiMessaging(unittest.TestCase):
-	def setUp(self):
-		self.configFiles = [ '../../../etc/TestConfig.cfg']
-		self.config = ConfigParser()
-		self.configFiles = self.config.read(self.configFiles)
-		self.port = int(self.config.get('MessageBroker', 'port'))
-
-		try:
-			self.brokerPid = os.spawnlpe(os.P_NOWAIT, 'python', 'python', 
-										 './messageBroker.py', 
-										 '--port', str(self.port),
-										 os.environ)
-			self.port = self.port + 1
-			# FIXME: what's the best way to wait for the broker to be ready?
-			time.sleep(1)
-		except Exception, e:
-			sys.exit(0)
-		self.initialized = True
-		self.log = logging.getLogger('TestTashiMessaging')
-		self.handler = TashiLogHandler(self.config)
-		self.log.addHandler(self.handler)
-		self.sub = TestTashiSubscriber(self.config, int(self.port) + increment.next())
-	def tearDown(self):
-		os.kill(self.brokerPid, signal.SIGKILL)
-		# FIXME: wait for the port to be ready again
-		time.sleep(2)
-		self.log.removeHandler(self.handler)
-#		 self.sub.broker.removeSubscriber(self.sub)
-		pass
-	def testLog(self):
-		self.log.log(50, "Hello World!")
-		self.handler.messages.get(timeout=5)
-		self.sub.messageQueue.get(timeout=5)
-		self.assertEqual(self.handler.messages.qsize(), 0)
-		self.assertEqual(self.sub.messageQueue.qsize(), 0)
-	def testPublish(self):
-		sys.stdout.flush()
-		self.port = self.port + 1
-		self.handler.publish({'message':'hello world'})
-		self.sub.messageQueue.get(timeout=5)
-		self.assertEqual(self.sub.messageQueue.qsize(), 0)
-		
-
-if __name__=='__main__':
-
-
-#	 logging.basicConfig(level=logging.INFO,
-#						 format="%(asctime)s %(levelname)s:\t %(message)s",
-#						 stream=sys.stdout)
-
-	suite = unittest.TestLoader().loadTestsFromTestCase(TestTashiMessaging)
-	unittest.TextTestRunner(verbosity=2).run(suite)
diff --git a/src/tashi/messaging/threadpool.py b/src/tashi/messaging/threadpool.py
deleted file mode 100644
index 5684ef2..0000000
--- a/src/tashi/messaging/threadpool.py
+++ /dev/null
@@ -1,305 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-import threading
-import time
-import Queue
-import logging
-
-_log = logging.getLogger('tashi.messaging.threadpool')
-
-def threaded(func):
-	def fn(*args, **kwargs):
-		thread = threading.Thread(target=func, args=args, kwargs=kwargs)
-		thread.start()
-		return thread
-	return fn
-
-
-class ThreadPool(Queue.Queue):
-	def __init__(self, size=8, maxsize=0):
-		Queue.Queue.__init__(self, maxsize)
-		for i in range(size):
-			thread = threading.Thread(target=self._worker)
-			thread.setDaemon(True)
-			thread.start()
-	def _worker(self):
-		while True:
-			try:
-				func, args, kwargs = self.get()
-				func(*args, **kwargs)
-			except Exception, e:
-				_log.error(e)
-				# FIXME: do something smarter here, backtrace, log,
-				# allow user-defined error handling...
-				
-	def submit(self, func, *args, **kwargs):
-		self.put((func, args, kwargs))
-	def submitlist(self, func, args, kwargs):
-		self.put((func, args, kwargs))
-
-class ThreadPoolClass:
-	def __init__(self, size=8, maxsize=0):
-		self._threadpool_pool = ThreadPool(size=size, maxsize=maxsize)
-
-
-def threadpool(pool):
-	def dec(func):
-		def fn(*args, **kwargs):
-			pool.submit(func, *args, **kwargs)
-		return fn
-	return dec
-
-def threadpoolmethod(meth):
-	def fn(*args, **kwargs):
-		try:
-			pool = args[0]._threadpool_pool
-		except AttributeError:
-			pool = args[0].__dict__.setdefault('_threadpool_pool', ThreadPool())
-		# FIXME: how do we check parent class?
-#		assert args[0].__class__ == ThreadPoolClass, "Thread pool method must be in a ThreadPoolClass"
-		pool.submit(meth, *args, **kwargs)
-	return fn
-
-def synchronized(lock=None):
-	_log.debug('synchronized decorator factory called')
-	if lock==None:
-		lock = threading.RLock()
-	def dec(func):
-		_log.debug('synchronized decorator called')
-		def fn(*args, **kwargs):
-			_log.debug('getting sync lock')
-			lock.acquire()
-			_log.debug('got sync lock')
-			ex = None
-			try:
-				r = func(*args, **kwargs)
-			except Exception, e:
-				ex = e
-			_log.debug('releasing sync lock')
-			lock.release()
-			_log.debug('released sync lock')
-			if ex != None:
-				raise e
-			return r
-		return fn
-	return dec
-			
-def synchronizedmethod(func):
-	def fn(*args, **kwargs):
-		try:
-			lock = args[0]._synchronized_lock
-		except AttributeError:
-			lock = args[0].__dict__.setdefault('_synchronized_lock', threading.RLock())
-		lock.acquire()
-		ex = None
-		try:
-			func(*args, **kwargs)
-		except Exception, e:
-			ex = e
-		lock.release()
-		if ex != None:
-			raise e
-	return fn
-		
-
-##############################
-# Test Code
-##############################
-import unittest
-import sys
-import time
-
-class TestThreadPool(unittest.TestCase):
-	def setUp(self):
-		self.errmargin = 0.5
-
-	def testUnthreaded(self):
-		queue = Queue.Queue()
-		def slowfunc(sleep=1):
-			time.sleep(sleep)
-			queue.put(None)
-		tt = time.time()
-		for i in range(4):
-			slowfunc()
-		for i in range(4):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 4, 1) 
-
-	def testThreaded(self):
-		queue = Queue.Queue()
-		@threaded
-		def slowthreadfunc(sleep=1):
-			time.sleep(sleep)
-			queue.put(None)
-		tt = time.time()
-		for i in range(8):
-			slowthreadfunc()
-		for i in range(8):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 1, 1) 
-
-	def testThreadPool(self):
-		pool = ThreadPool(size=4)
-		queue = Queue.Queue()
-		@threadpool(pool)
-		def slowpoolfunc(sleep=1):
-			time.sleep(sleep)
-			queue.put(None)
-		tt = time.time()
-		for i in range(8):
-			slowpoolfunc()
-		for i in range(8):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 2, 1) 
-
-	def testUnthreadedMethod(self):
-		queue = Queue.Queue()
-		class slowclass:
-			def __init__(self, sleep=1):
-				self.sleep=sleep
-			def beslow(self):
-				time.sleep(self.sleep)
-				queue.put(None)
-		sc = slowclass()
-		tt = time.time()
-		for i in range(4):
-			sc.beslow()
-		for i in range(4):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 4, 1)
-	
-	def testThreadedMethod(self):
-		queue = Queue.Queue()
-		class slowclass:
-			def __init__(self, sleep=1):
-				self.sleep=sleep
-			@threaded
-			def beslow(self):
-				time.sleep(self.sleep)
-				queue.put(None)
-		sc = slowclass()
-		tt = time.time()
-		for i in range(4):
-			sc.beslow()
-		for i in range(4):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 1, 1)
-	
-	def testThreadPoolMethod(self):
-		queue = Queue.Queue()
-		class slowclass:
-			def __init__(self, sleep=1):
-				self.sleep=sleep
-			@threadpoolmethod
-			def beslow(self):
-				time.sleep(self.sleep)
-				queue.put(None)
-		sc = slowclass()
-		tt = time.time()
-		for i in range(16):
-			sc.beslow()
-		for i in range(16):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 2, 1)
-	
-	def testSynchronized(self):
-		queue = Queue.Queue()
-		@synchronized()
-		def addtoqueue():
-			time.sleep(1)
-			queue.put(None)
-		@threaded
-		def slowthreadfunc():
-			addtoqueue()
-		tt = time.time()
-		for i in range(4):
-			slowthreadfunc()
-		for i in range(4):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 4, 1) 
-
-	def testSynchronizedMethod(self):
-		queue = Queue.Queue()
-		class addtoqueue:
-			@synchronizedmethod
-			def addtoqueue1(self):
-				time.sleep(1)
-				queue.put(None)
-			@synchronizedmethod
-			def addtoqueue2(self):
-				time.sleep(1)
-				queue.put(None)
-		atc = addtoqueue()
-		@threaded
-		def slowthreadfunc1():
-			atc.addtoqueue1()
-		@threaded
-		def slowthreadfunc2():
-			atc.addtoqueue2()
-		tt = time.time()
-		for i in range(4):
-			slowthreadfunc1()
-			slowthreadfunc2()
-		for i in range(8):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 8, 1) 
-
-	def testUnsynchronizedMethod(self):
-		queue = Queue.Queue()
-		class addtoqueue:
-			def addtoqueue1(self):
-				time.sleep(1)
-				queue.put(None)
-			def addtoqueue2(self):
-				time.sleep(1)
-				queue.put(None)
-		atc = addtoqueue()
-		@threaded
-		def slowthreadfunc1():
-			atc.addtoqueue1()
-		@threaded
-		def slowthreadfunc2():
-			atc.addtoqueue2()
-		tt = time.time()
-		for i in range(4):
-			slowthreadfunc1()
-			slowthreadfunc2()
-		for i in range(8):
-			queue.get()
-		tt = time.time() - tt
-		self.assertAlmostEqual(tt, 1, 1) 
-
-
-
-if __name__=='__main__':
-	import sys
-
-	logging.basicConfig(level=logging.INFO,
-						format="%(asctime)s %(levelname)s:\t %(message)s",
-						stream=sys.stdout)
-
-	suite = unittest.TestLoader().loadTestsFromTestCase(TestThreadPool)
-	unittest.TextTestRunner(verbosity=2).run(suite)
diff --git a/src/tashi/messaging/thriftmessaging.py b/src/tashi/messaging/thriftmessaging.py
deleted file mode 100755
index 0c73ff0..0000000
--- a/src/tashi/messaging/thriftmessaging.py
+++ /dev/null
@@ -1,278 +0,0 @@
-#!/usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-import sys
-import time
-import socket
-import traceback
-import threading
-
-sys.path.append('./gen-py')
-import tashi.messaging.messagingthrift
-import tashi.messaging.messagingthrift.MessageBrokerThrift
-import tashi.messaging.messagingthrift.SubscriberThrift
-from tashi.messaging.messagingthrift.ttypes import *
-
-from thrift import Thrift
-from thrift.transport import TSocket
-from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
-from thrift.server import TServer
-
-from tashi import ConnectionManager
-
-from tashi.messaging.messaging import *
-from tashi.messaging.threadpool import ThreadPoolClass, threadpool, ThreadPool, threadpoolmethod, threaded
-
-class MessageBrokerThrift(MessageBroker):
-	def __init__(self, port, daemon=True):
-		MessageBroker.__init__(self)
-		self.processor = tashi.messaging.messagingthrift.MessageBrokerThrift.Processor(self)
-		self.transport = TSocket.TServerSocket(port)
-		self.tfactory = TTransport.TBufferedTransportFactory()
-		self.pfactory = TBinaryProtocol.TBinaryProtocolFactory()
-		self.proxy = ConnectionManager(tashi.messaging.messagingthrift.SubscriberThrift.Client, 0)
-		self.ready = threading.Event()
-#		 self.server = TServer.TSimpleServer(self.processor,
-#											 self.transport,
-#											 self.tfactory,
-#											 self.pfactory)
-#		 self.server = TServer.TThreadPoolServer(self.processor,
-#												 self.transport,
-#												 self.tfactory,
-#												 self.pfactory)
-		self.server = TServer.TThreadedServer(self.processor,
-												self.transport,
-												self.tfactory,
-												self.pfactory)
-		self.publishCalls = 0
-
-		def ssvrthrd():
-			try:
-				# FIXME: Race condition, the ready event should be set after
-				# starting the server.  However, server.serve()
-				# doesn't return under normal circumstances.  This
-				# seems to work in practice, even though it's clearly
-				# wrong.
-				self.ready.set()
-				self.server.serve()
-			except Exception, e:
-				print e
-				sys.stdout.flush()
-				pass
-		svt = threading.Thread(target=ssvrthrd)
-		svt.setDaemon(daemon)
-		svt.start()
-		self.ready.wait()
-	def log(self, message):
-		MessageBroker.log(self, message)
-	@synchronizedmethod
-	def addSubscriber(self, host, port):
-		subscribers = self.getSubscribers()
-		for sub in subscribers:
-			if sub.host == host and sub.port == port:
-				return
-		subscriber = SubscriberThriftProxy(host, port, self.proxy)
-		MessageBroker.addSubscriber(self, subscriber)
-	def removeSubscriber(self, host, port):
-		subscriber = None
-		subscribers = self.getSubscribers()
-		for sub in subscribers:
-			if sub.host == host and sub.port == port:
-				subscriber = sub
-		if subscriber != None:
-			MessageBroker.removeSubscriber(self, subscriber)
-	@synchronizedmethod
-	def publish(self, message):
-		self.publishCalls  = self.publishCalls + 1
-		sys.stdout.flush()
-		MessageBroker.publish(self, message)
-
-class MessageBrokerThriftProxy:
-	def __init__(self, host, port):
-		self.host = host
-		self.port = port
-		self.proxy = ConnectionManager(tashi.messaging.messagingthrift.MessageBrokerThrift.Client,port)
-	@synchronizedmethod
-	def log(self, message):
-		self.proxy[self.host, self.port].log(message)
-	@synchronizedmethod
-	def publish(self, message):
-		self.proxy[self.host, self.port].publish(message)
-	@synchronizedmethod
-	def publishList(self, messages):
-		self.proxy[self.host, self.port].publishList(messages)
-	@synchronizedmethod
-	def addSubscriber(self, subscriber):
-		self.proxy[self.host, self.port].addSubscriber(host=subscriber.host, port=subscriber.port)
-	@synchronizedmethod
-	def removeSubscriber(self, subscriber):
-		self.proxy[self.host, self.port].removeSubscriber(host=subscriber.host, port=subscriber.port)
-
-
-
-class SubscriberThrift(Subscriber, threading.Thread):
-	def __init__(self, broker, port, synchronized=False):
-		self.host = socket.gethostname()
-		self.port = port
-		self.processor = tashi.messaging.messagingthrift.SubscriberThrift.Processor(self)
-		self.transport = TSocket.TServerSocket(port)
-		self.tfactory = TTransport.TBufferedTransportFactory()
-		self.pfactory = TBinaryProtocol.TBinaryProtocolFactory()
-		self.server = TServer.TThreadedServer(self.processor,
-											  self.transport,
-											  self.tfactory,
-											  self.pfactory)
-		def ssvrthrd():
-			try:
-				self.server.serve()
-			except Exception, e:
-				pass
-
-
-		self.thread = threading.Thread(target=ssvrthrd)
-		self.thread.setDaemon(True)
-		self.thread.start()
-
-		# We have to call this AFTER initializing our server, so that
-		# the broker can contact us
-		# Wrap this in a try/catch because the broker may not be online yet
-		try:
-			Subscriber.__init__(self, broker,  synchronized=synchronized)		
-		except:
-			pass
-		threading.Thread.__init__(self)
-		self.setDaemon(True)
-		self.start()
-
-	def stop(self):
-#		 # FIXME: this is broken, there is no clear way to stop a
-#		 # Thrift server
-		self.broker.removeSubscriber(self)
-		self.transport.close()
-	def run(self):
-		while(True):
-			# renew subscription every 5 min
-			try:
-				self.broker.addSubscriber(self)
-			except:
-				pass
-			time.sleep(5*60)
-
-class SubscriberThriftProxy:
-	def __init__(self, host, port, proxy, aggregate = 100):
-		self.host = host
-		self.port = port
-		self.proxy = proxy
-		# for some reason, thrift clients are not thread-safe, lock during send
-		self.lock = threading.Lock()
-		self.pending = []
-		self.aggregateSize = aggregate
-	def publish(self, message):
-		self.lock.acquire()
-		sys.stdout.flush()
-		if message.has_key('aggregate') and message['aggregate'] == 'True':
-			self.pending.append(message)
-			if len(self.pending) >= self.aggregateSize:
-				try:
-					self.proxy[self.host, self.port].publishList(self.pending)
-				except Exception, e:
-					print e
-					self.lock.release()
-					raise e
-				self.pending = []
-		else:
-			try:
-				self.proxy[self.host, self.port].publish(message)
-			except Exception, e:
-				sys.stdout.flush()
-				print e
-				self.lock.release()
-				raise e
-		self.lock.release()
-
-class PublisherThrift(Publisher):
-	def __init__(self, host, port):
-		self.host = host
-		self.port = port
-		self.broker = MessageBrokerThriftProxy(host, port)
-		Publisher.__init__(self, self.broker)
-		
-####################
-# Testing Code 
-####################
-
-class TestSubscriberThrift(SubscriberThrift):
-	def __init__(self, *args, **kwargs):
-		self.queue = Queue.Queue()
-		SubscriberThrift.__init__(self, *args, **kwargs)
-	def handle(self, message):
-		self.queue.put(message)
-
-portnum = 1718
-class TestThriftMessaging(unittest.TestCase):
-	def setUp(self):
-		global portnum
-		self.broker = MessageBrokerThrift(portnum)
-		self.brokerPort = portnum
-		portnum = portnum + 1 
-		self.proxy = MessageBrokerThriftProxy('localhost', self.brokerPort)
-		self.publisher = PublisherThrift('localhost', self.brokerPort)
-		self.subscriber = TestSubscriberThrift(self.proxy, portnum)
-		portnum = portnum + 1
-	def tearDown(self):
-		pass
-	def testSetUp(self):
-		pass
-	def testPublish(self):
-		self.publisher.publish( {'message':'hello world'} )
-		self.subscriber.queue.get(True, timeout=5)
-		self.assertEqual(self.subscriber.queue.qsize(), 0)
-	def testPublishList(self):
-		nrmsgs = 10
-		msgs = []
-		for i in range(nrmsgs):
-			msgs.append( {'msgnum':str(i)} )
-		self.publisher.publishList( msgs )
-		for i in range(nrmsgs):
-			self.subscriber.queue.get(True, timeout=5)
-		self.assertEqual(self.subscriber.queue.qsize(), 0)
-	def testAggregate(self):
-		nrmsgs = self.publisher.aggregateSize
-		for i in range(nrmsgs):
-			self.assertEqual(self.subscriber.queue.qsize(), 0)
-			self.publisher.aggregate( {'msgnum':str(i)} )
-		for i in range(nrmsgs):
-			self.subscriber.queue.get(True, timeout=5)
-		self.assertEqual(self.subscriber.queue.qsize(), 0)
-	def testAggregateKeyword(self):
-		nrmsgs = self.publisher.aggregateSize
-		for i in range(nrmsgs):
-			self.assertEqual(self.subscriber.queue.qsize(), 0)
-			self.publisher.publish( {'msgnum':str(i), 'aggregate':'True'} )
-		for i in range(nrmsgs):
-			self.subscriber.queue.get(True, timeout=5)
-		self.assertEqual(self.subscriber.queue.qsize(), 0)
-
-
-if __name__=='__main__':
-	suite = unittest.TestLoader().loadTestsFromTestCase(TestThriftMessaging)
-	unittest.TextTestRunner(verbosity=2).run(suite)
-
-
diff --git a/src/tashi/nodemanager/nodemanager.py b/src/tashi/nodemanager/nodemanager.py
index 66d2d5b..b725b0a 100755
--- a/src/tashi/nodemanager/nodemanager.py
+++ b/src/tashi/nodemanager/nodemanager.py
@@ -18,30 +18,57 @@
 # under the License.    
 
 import logging.config
-import signal
 import sys
+import os
 
-from tashi.util import instantiateImplementation, getConfig, debugConsole, signalHandler
+from tashi.util import instantiateImplementation, debugConsole
 import tashi
 from tashi import boolean
 
 from tashi.rpycservices import rpycservices
+from tashi.utils.config import Config
+
 from rpyc.utils.server import ThreadedServer
 from rpyc.utils.authenticators import TlsliteVdbAuthenticator
 
-@signalHandler(signal.SIGTERM)
-def handleSIGTERM(signalNumber, stackFrame):
-	sys.exit(0)
-
 def main():
-	global config, dfs, vmm, service, server, log, notifier
+	global config, log
 	
-	(config, configFiles) = getConfig(["NodeManager"])
-	publisher = instantiateImplementation(config.get("NodeManager", "publisher"), config)
-	tashi.publisher = publisher
+	config = Config(["NodeManager"])
+	configFiles = config.getFiles()
+
 	logging.config.fileConfig(configFiles)
 	log = logging.getLogger(__name__)
 	log.info('Using configuration file(s) %s' % configFiles)
+
+	# handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+	child = os.fork()
+	
+	if child == 0:
+		startNodeManager()
+		# shouldn't exit by itself
+		sys.exit(0)
+
+	else:
+		# main
+		try:
+			os.waitpid(child, 0)
+		except KeyboardInterrupt:
+			log.info("Exiting node manager after receiving a SIGINT signal")
+			os._exit(0)
+		except Exception:
+			log.exception("Abnormal termination of node manager")
+			os._exit(-1)
+
+		log.info("Exiting node manager after service thread exited")
+		os._exit(-1)
+
+	return
+
+def startNodeManager():
+	global config, dfs, vmm, service, server, log, notifier
+	publisher = instantiateImplementation(config.get("NodeManager", "publisher"), config)
+	tashi.publisher = publisher
 	dfs = instantiateImplementation(config.get("NodeManager", "dfs"), config)
 	vmm = instantiateImplementation(config.get("NodeManager", "vmm"), config, dfs, None)
 	service = instantiateImplementation(config.get("NodeManager", "service"), config, vmm)
@@ -51,6 +78,9 @@
 		users = {}
 		users[config.get('AllowedUsers', 'clusterManagerUser')] = config.get('AllowedUsers', 'clusterManagerPassword')
 		authenticator = TlsliteVdbAuthenticator.from_dict(users)
+
+		# XXXstroucki: ThreadedServer is liable to have exceptions
+		# occur within if an endpoint is lost.
 		t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('NodeManagerService', 'port')), auto_register=False, authenticator=authenticator)
 	else:
 		t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('NodeManagerService', 'port')), auto_register=False)
@@ -59,14 +89,11 @@
 	t.service._type = 'NodeManagerService'
 
 	debugConsole(globals())
-	
-	try:
-		t.start()
-	except KeyboardInterrupt:
-		handleSIGTERM(signal.SIGTERM, None)
-	except Exception, e:
-		sys.stderr.write(str(e) + "\n")
-		sys.exit(-1)
+
+	t.start()
+	# shouldn't exit by itself
+	sys.exit(0)
+
 
 if __name__ == "__main__":
 	main()
diff --git a/src/tashi/nodemanager/nodemanagerservice.py b/src/tashi/nodemanager/nodemanagerservice.py
index c493ac9..1955ecf 100755
--- a/src/tashi/nodemanager/nodemanagerservice.py
+++ b/src/tashi/nodemanager/nodemanagerservice.py
@@ -5,63 +5,57 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #   http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
-# under the License.    
+# under the License.
 
 import logging
 import socket
 import threading
 import time
 
-from tashi.rpycservices import rpycservices
 from tashi.rpycservices.rpyctypes import InstanceState, TashiException, Errors, Instance
 from tashi import boolean, vmStates, ConnectionManager
-import tashi
-
 
 class NodeManagerService(object):
 	"""RPC handler for the NodeManager
-	   
-	   Perhaps in the future I can hide the dfs from the 
+
+	   Perhaps in the future I can hide the dfs from the
 	   VmControlInterface and do all dfs operations here?"""
-	
+
 	def __init__(self, config, vmm):
+		# XXXstroucki: vmm will wait for this constructor to complete
 		self.config = config
 		self.vmm = vmm
-		self.cmHost = config.get("NodeManagerService", "clusterManagerHost")
-		self.cmPort = int(config.get("NodeManagerService", "clusterManagerPort"))
-		self.authAndEncrypt = boolean(config.get('Security', 'authAndEncrypt'))
+		self.cmHost = self.config.get("NodeManagerService", "clusterManagerHost")
+		self.cmPort = int(self.config.get("NodeManagerService", "clusterManagerPort"))
+		self.authAndEncrypt = boolean(self.config.get('Security', 'authAndEncrypt'))
 		if self.authAndEncrypt:
-			self.username = config.get('AccessClusterManager', 'username')
-			self.password = config.get('AccessClusterManager', 'password')
+			self.username = self.config.get('AccessClusterManager', 'username')
+			self.password = self.config.get('AccessClusterManager', 'password')
 		else:
 			self.username = None
 			self.password = None
 		self.log = logging.getLogger(__file__)
-		self.convertExceptions = boolean(config.get('NodeManagerService', 'convertExceptions'))
-		self.registerFrequency = float(config.get('NodeManagerService', 'registerFrequency'))
-		self.statsInterval = float(self.config.get('NodeManagerService', 'statsInterval'))
-		self.registerHost = boolean(config.get('NodeManagerService', 'registerHost'))
+		self.convertExceptions = boolean(self.config.get('NodeManagerService', 'convertExceptions'))
+		self.registerFrequency = float(self.config.get('NodeManagerService', 'registerFrequency'))
+		self.statsInterval = float(self.config.get('NodeManagerService', 'statsInterval', default = 0))
+		self.registerHost = boolean(self.config.get('NodeManagerService', 'registerHost'))
 		try:
 			self.cm = ConnectionManager(self.username, self.password, self.cmPort)[self.cmHost]
 		except:
 			self.log.exception("Could not connect to CM")
+			# XXXstroucki: raise?
 			return
 
-		self.accountingHost = None
-		self.accountingPort = None
-		try:
-			self.accountingHost = self.config.get('NodeManagerService', 'accountingHost')
-			self.accountingPort = self.config.getint('NodeManagerService', 'accountingPort')
-		except:
-			pass
+		self.accountingHost = self.config.get('NodeManagerService', 'accountingHost')
+		self.accountingPort = self.config.getint('NodeManagerService', 'accountingPort')
 
 		self.notifyCM = []
 
@@ -76,25 +70,25 @@
 
 		self.__registerHost()
 
+		# XXXstroucki: should make an effort to retry
+		# This can time out now with an exception
 		self.id = self.cm.registerNodeManager(self.host, self.instances.values())
 
-		# XXXstroucki cut cross check for NM/VMM state
-
 		# start service threads
-		threading.Thread(target=self.__registerWithClusterManager).start()
-		threading.Thread(target=self.__statsThread).start()
-	
+		threading.Thread(name="registerWithClusterManager", target=self.__registerWithClusterManager).start()
+		threading.Thread(name="statsThread", target=self.__statsThread).start()
+
 	def __initAccounting(self):
-                self.accountBuffer = []
-                self.accountLines = 0
-                self.accountingClient = None
-                try:
-                        if (self.accountingHost is not None) and \
-                                    (self.accountingPort is not None):
-                                self.accountingClient=rpycservices.client(self.accountingHost, self.accountingPort)
-                except:
-                        self.log.exception("Could not init accounting")
-			
+		self.accountBuffer = []
+		self.accountLines = 0
+		self.accountingClient = None
+		try:
+			if (self.accountingHost is not None) and \
+						(self.accountingPort is not None):
+				self.accountingClient = ConnectionManager(self.username, self.password, self.accountingPort)[self.accountingHost]
+		except:
+			self.log.exception("Could not init accounting")
+
 	def __loadVmInfo(self):
 		try:
 			self.instances = self.vmm.getInstances()
@@ -105,15 +99,19 @@
 	# send data to CM
 	# XXXstroucki adapt this for accounting?
 	def __flushNotifyCM(self):
-		start = time.time()
 		# send data to CM, adding message to buffer if
 		# it fails
 		try:
 			notifyCM = []
 			try:
 				while (len(self.notifyCM) > 0):
+					# XXXstroucki ValueError: need more than 1 value to unpack
+					# observed here. How?
 					value = self.notifyCM.pop(0)
-					(instanceId, newInst, old, success) = value
+					try:
+						(instanceId, newInst, old, success) = value
+					except:
+						self.log.exception("problem with value: %s" % value)
 					try:
 						self.cm.vmUpdate(instanceId, newInst, old)
 					except TashiException, e:
@@ -135,7 +133,7 @@
 		#if (toSleep > 0):
 			#time.sleep(toSleep)
 
-        def __ACCOUNTFLUSH(self):
+	def __ACCOUNTFLUSH(self):
 		try:
 			if (self.accountingClient is not None):
 				self.accountingClient.record(self.accountBuffer)
@@ -145,45 +143,51 @@
 			self.log.exception("Failed to flush accounting data")
 
 
-        def __ACCOUNT(self, text, instance=None, host=None):
-                now = time.time()
-                instanceText = None
-                hostText = None
+	def __ACCOUNT(self, text, instance=None, host=None):
+		now = time.time()
+		instanceText = None
+		hostText = None
 
-                if instance is not None:
+		if instance is not None:
 			try:
-                        	instanceText = 'Instance(%s)' % (instance)
+				instanceText = 'Instance(%s)' % (instance)
 			except:
 				self.log.exception("Invalid instance data")
 
-                if host is not None:
+		if host is not None:
 			try:
-                        	hostText = "Host(%s)" % (host)
+				hostText = "Host(%s)" % (host)
 			except:
 				self.log.exception("Invalid host data")
 
-                secondary = ','.join(filter(None, (hostText, instanceText)))
+		secondary = ','.join(filter(None, (hostText, instanceText)))
 
-                line = "%s|%s|%s" % (now, text, secondary)
+		line = "%s|%s|%s" % (now, text, secondary)
 
-                self.accountBuffer.append(line)
-                self.accountLines += 1
+		self.accountBuffer.append(line)
+		self.accountLines += 1
 
 		# XXXstroucki think about force flush every so often
-                if (self.accountLines > 0):
-                        self.__ACCOUNTFLUSH()
+		if (self.accountLines > 0):
+			self.__ACCOUNTFLUSH()
 
 
 	# service thread function
 	def __registerWithClusterManager(self):
+		happy = False
 		while True:
 			#self.__ACCOUNT("TESTING")
 			start = time.time()
 			try:
 				instances = self.instances.values()
 				self.id = self.cm.registerNodeManager(self.host, instances)
+				if not happy:
+					happy = True
+					self.log.info("Registered with the CM")
+
 			except Exception:
 				self.log.exception('Failed to register with the CM')
+				happy = False
 
 			toSleep = start - time.time() + self.registerFrequency
 			if (toSleep > 0):
@@ -201,26 +205,29 @@
 						instance = self.instances.get(vmId, None)
 						if (not instance):
 							continue
-						id = instance.id
+						_id = instance.id
 						stats = self.vmm.getStats(vmId)
 						for stat in stats:
-							publishList.append({"vm_%d_%s" % (id, stat):stats[stat]})
+							publishList.append({"vm_%d_%s" % (_id, stat):stats[stat]})
 					except:
 						self.log.exception('statsThread threw an exception')
 				if (len(publishList) > 0):
-					tashi.publisher.publishList(publishList)
+					# XXXstroucki: no publisher currently
+					pass
+					#tashi.publisher.publishList(publishList)
 			except:
 				self.log.exception('statsThread threw an exception')
 			time.sleep(self.statsInterval)
 
-        def __registerHost(self):
-                hostname = socket.gethostname()
+	def __registerHost(self):
+		hostname = socket.gethostname()
 		# populate some defaults
-		# XXXstroucki: I think it's better if the nodemanager fills these in properly when registering with the clustermanager
+		# XXXstroucki: I think it's better if the nodemanager fills these in
+		# properly when registering with the clustermanager
 		memory = 0
 		cores = 0
 		version = "empty"
-                #self.cm.registerHost(hostname, memory, cores, version)
+		#self.cm.registerHost(hostname, memory, cores, version)
 
 	def __getInstance(self, vmId):
 		instance = self.instances.get(vmId, None)
@@ -235,15 +242,23 @@
 
 
 		raise TashiException(d={'errno':Errors.NoSuchVmId,'msg':"There is no vmId %d on this host" % (vmId)})
-	
+
 	# remote
 	# Called from VMM to update self.instances
 	# but only changes are Exited, MigrateTrans and Running
 	# qemu.py calls this in the matchSystemPids thread
 	# xenpv.py: i have no real idea why it is called there
 	def vmStateChange(self, vmId, old, cur):
-		instance = self.__getInstance(vmId)
+		try:
+			instance = self.__getInstance(vmId)
+		except TashiException, e:
+			if e.errno == Errors.NoSuchVmId:
+				self.log.warning("Asked to change state for unknown VM. Has it not completed starting yet?")
+				return False
+			else:
+				raise
 
+		before = instance.state
 		if (instance.state == cur):
 			# Don't do anything if state is what it should be
 			return True
@@ -252,16 +267,25 @@
 			# make a note of mismatch, but go on.
 			# the VMM should know best
 			self.log.warning('VM state was %s, call indicated %s' % (vmStates[instance.state], vmStates[old]))
-                        
+
 		instance.state = cur
 
 		self.__ACCOUNT("NM VM STATE CHANGE", instance=instance)
-			      
+
 		newInst = Instance(d={'state':cur})
 		success = lambda: None
-		# send the state change up to the CM
-		self.notifyCM.append((instance.id, newInst, old, success))
-		self.__flushNotifyCM()
+
+		# if this instance was in MigrateTrans, and has exited
+		# then don't tell the CM; it is the source instance
+		# exiting, and the CM should have updated its information
+		# to the target instance's info.
+		# Otherwise, send the state change up to the CM
+
+		if before == InstanceState.MigrateTrans and cur == InstanceState.Exited:
+			pass
+		else:
+			self.notifyCM.append((instance.id, newInst, old, success))
+			self.__flushNotifyCM()
 
 		# cache change locally
 		self.instances[vmId] = instance
@@ -270,7 +294,6 @@
 			# At this point, the VMM will clean up,
 			# so forget about this instance
 			del self.instances[vmId]
-			return True
 
 		return True
 
@@ -278,10 +301,12 @@
 	def createInstance(self, instance):
 		vmId = instance.vmId
 		self.instances[vmId] = instance
-		
-	
+
+
 	# remote
 	def instantiateVm(self, instance):
+		# XXXstroucki: check my capacity before instantiating
+
 		self.__ACCOUNT("NM VM INSTANTIATE", instance=instance)
 		try:
 			vmId = self.vmm.instantiateVm(instance)
@@ -291,7 +316,7 @@
 			return vmId
 		except:
 			self.log.exception("Failed to start instance")
-	
+
 	# remote
 	def suspendVm(self, vmId, destination):
 		instance = self.__getInstance(vmId)
@@ -300,10 +325,12 @@
 		instance.state = InstanceState.Suspending
 		self.instances[vmId] = instance
 		threading.Thread(target=self.vmm.suspendVm, args=(vmId, destination)).start()
-	
+
 	# called by resumeVm as thread
 	def __resumeVmHelper(self, instance, name):
 		self.vmm.resumeVmHelper(instance, name)
+		# XXXstroucki should the VMM be responsible for setting
+		# state? It should know better.
 		instance.state = InstanceState.Running
 		newInstance = Instance(d={'id':instance.id,'state':instance.state})
 		success = lambda: None
@@ -323,7 +350,7 @@
 			self.log.exception('resumeVm failed')
 			raise TashiException(d={'errno':Errors.UnableToResume,'msg':"resumeVm failed on the node manager"})
 		return instance.vmId
-	
+
 	# remote
 	def prepReceiveVm(self, instance, source):
 		self.__ACCOUNT("NM VM MIGRATE RECEIVE PREP")
@@ -342,7 +369,9 @@
 	# XXXstroucki migrate out?
 	def __migrateVmHelper(self, instance, target, transportCookie):
 		self.vmm.migrateVm(instance.vmId, target.name, transportCookie)
-		del self.instances[instance.vmId]
+		# removal from self.instances done by communication from
+		# VMM as part of above migrateVm function
+		return
 
 	# remote
 	# XXXstroucki migrate out?
@@ -351,9 +380,9 @@
 		self.__ACCOUNT("NM VM MIGRATE", instance=instance)
 		instance.state = InstanceState.MigrateTrans
 		self.instances[vmId] = instance
-		threading.Thread(target=self.__migrateVmHelper, args=(instance, target, transportCookie)).start()
+		threading.Thread(name="migrateVmHelper", target=self.__migrateVmHelper, args=(instance, target, transportCookie)).start()
 		return
-	
+
 	# called by receiveVm as thread
 	# XXXstroucki migrate in?
 	def __receiveVmHelper(self, instance, transportCookie):
@@ -364,15 +393,16 @@
 		self.instances[vmId] = instance
 		newInstance = Instance(d={'id':instance.id,'state':instance.state,'vmId':instance.vmId,'hostId':instance.hostId})
 		success = lambda: None
-		self.notifyCM.append((newInstance.id, newInstance, InstanceState.Running, success))
+		self.notifyCM.append((newInstance.id, newInstance, InstanceState.MigrateTrans, success))
 		self.__flushNotifyCM()
 
 	# remote
 	# XXXstroucki migrate in?
 	def receiveVm(self, instance, transportCookie):
 		instance.state = InstanceState.MigrateTrans
-		vmId = instance.vmId
-		self.instances[vmId] = instance
+		# XXXstroucki new vmId is not known yet until VM is received
+		#vmId = instance.vmId
+		#self.instances[vmId] = instance
 		self.__ACCOUNT("NM VM MIGRATE RECEIVE", instance=instance)
 		threading.Thread(target=self.__receiveVmHelper, args=(instance, transportCookie)).start()
 		return
@@ -429,4 +459,3 @@
 	# remote
 	def liveCheck(self):
 		return "alive"
-	
diff --git a/src/tashi/nodemanager/vmcontrol/qemu.py b/src/tashi/nodemanager/vmcontrol/qemu.py
index 7806f4b..d00d07d 100644
--- a/src/tashi/nodemanager/vmcontrol/qemu.py
+++ b/src/tashi/nodemanager/vmcontrol/qemu.py
@@ -50,12 +50,12 @@
 			output = child.monitorFd
 			#print "listen"
 			select.select([ls], [], [])
-			(s, clientAddr) = listenSocket.accept()
+			(s, __clientAddr) = listenSocket.accept()
 			while s:
 				if (output != -1):
-					(rl, wl, el) = select.select([s, output], [], [])
+					(rl, __wl, __el) = select.select([s, output], [], [])
 				else:
-					(rl, wl, el) = select.select([s], [], [])
+					(rl, __wl, __el) = select.select([s], [], [])
 				if (len(rl) > 0):
 					if (rl[0] == s):
 						#print "from s"
@@ -87,17 +87,19 @@
 	
 	def __init__(self, config, dfs, nm):
 		VmControlInterface.__init__(self, config, dfs, nm)
-		self.QEMU_BIN = self.config.get("Qemu", "qemuBin")
-		self.INFO_DIR = self.config.get("Qemu", "infoDir")
-		self.POLL_DELAY = float(self.config.get("Qemu", "pollDelay"))
-		self.migrationRetries = int(self.config.get("Qemu", "migrationRetries"))
-		self.monitorTimeout = float(self.config.get("Qemu", "monitorTimeout"))
-		self.migrateTimeout = float(self.config.get("Qemu", "migrateTimeout"))
-		self.useMigrateArgument = boolean(self.config.get("Qemu", "useMigrateArgument"))
-		self.statsInterval = float(self.config.get("Qemu", "statsInterval"))
-		# XXXstroucki amount of reserved memory could be configurable
-		self.reservedMem = 512
-		# XXXstroucki perhaps make this configurable
+		self.QEMU_BIN = self.config.get("Qemu", "qemuBin", default = "/usr/bin/kvm")
+		self.INFO_DIR = self.config.get("Qemu", "infoDir", default = "/var/tmp/VmControlQemu/")
+		self.POLL_DELAY = float(self.config.get("Qemu", "pollDelay", default = 1))
+		self.migrationRetries = int(self.config.get("Qemu", "migrationRetries", default = 10))
+		self.monitorTimeout = float(self.config.get("Qemu", "monitorTimeout", default = 60))
+		self.migrateTimeout = float(self.config.get("Qemu", "migrateTimeout", default = 300))
+		self.useMigrateArgument = boolean(self.config.get("Qemu", "useMigrateArgument", default = False))
+		self.statsInterval = float(self.config.get("Qemu", "statsInterval", default = 0))
+		reservedMem = self.config.get("Qemu", "reservedMem", default = 512)
+		reservedMem = int(reservedMem)
+
+		self.reservedMem = reservedMem
+
 		self.ifPrefix = "tashi"
 		self.controlledVMs = {}
 		self.usedPorts = []
@@ -106,13 +108,20 @@
 		self.vncPortLock = threading.Lock()
 		self.consolePort = 10000
 		self.consolePortLock = threading.Lock()
-		self.migrationSemaphore = threading.Semaphore(int(self.config.get("Qemu", "maxParallelMigrations")))
+		maxParallelMigrations = self.config.get("Qemu", "maxParallelMigrations")
+		maxParallelMigrations = int(maxParallelMigrations)
+		if maxParallelMigrations < 1:
+			maxParallelMigrations = 1
+
+		self.migrationSemaphore = threading.Semaphore(maxParallelMigrations)
 		self.stats = {}
+
+		self.suspendHandler = self.config.get("Qemu", "suspendHandler", default = "gzip")
+		self.resumeHandler = self.config.get("Qemu", "resumeHandler", default = "zcat")
+
 		self.scratchVg = self.config.get("Qemu", "scratchVg")
-		# XXXstroucki revise
-		self.scratchDir = self.config.get("Qemu", "scratchDir")
-		if len(self.scratchDir) == 0:
-			self.scratchDir = "/tmp"
+
+		self.scratchDir = self.config.get("Qemu", "scratchDir", default = "/tmp")
 
 		try:
 			os.mkdir(self.INFO_DIR)
@@ -129,13 +138,20 @@
 		def __init__(self, **attrs):
 			self.__dict__.update(attrs)
 
+	def __dereferenceLink(self, spec):
+		newspec = os.path.realpath(spec)
+		return newspec
+
+
 	def __getHostPids(self):
 		"""Utility function to get a list of system PIDs that match the QEMU_BIN specified (/proc/nnn/exe)"""
 		pids = []
+		real_bin = self.__dereferenceLink(self.QEMU_BIN)
+
 		for f in os.listdir("/proc"):
 			try:
-				bin = os.readlink("/proc/%s/exe" % (f))
-				if (bin.find(self.QEMU_BIN) != -1):
+				binary = os.readlink("/proc/%s/exe" % (f))
+				if (binary.find(real_bin) != -1):
 					pids.append(int(f))
 			except Exception:
 				pass
@@ -146,14 +162,14 @@
 		"""Will return a dict of instances by vmId to the caller"""
 		return dict((x, self.controlledVMs[x].instance) for x in self.controlledVMs.keys())
 
-	def __matchHostPids(self, controlledVMs):
+	def __matchHostPids(self):
 		"""This is run in a separate polling thread and it must do things that are thread safe"""
 
-		vmIds = controlledVMs.keys()
+		vmIds = self.controlledVMs.keys()
 		pids = self.__getHostPids()
 
 		for vmId in vmIds:
-			child = controlledVMs[vmId]
+			child = self.controlledVMs[vmId]
 			instance = child.instance
 			name = instance.name
 
@@ -164,9 +180,9 @@
 				# remove info file
 				os.unlink(self.INFO_DIR + "/%d"%(vmId))
 
-				# XXXstroucki why not use self.controlledVMs
-				# argument, so why modify this fn's formal?
-				del controlledVMs[vmId]
+				# XXXstroucki python should handle
+				# locking here (?)
+				del self.controlledVMs[vmId]
 
 				# remove any stats (appropriate?)
 				try:
@@ -187,7 +203,7 @@
 					try:
 						os.waitpid(vmId, 0)
 					except:
-						log.exception("waitpid failed for vmId" % (vmId))
+						log.exception("waitpid failed for vmId %s" % (vmId))
 				# recover the child's stderr and monitor
 				# output if possible
 				if (child.errorBit):
@@ -204,17 +220,21 @@
 				# remove scratch storage
 				try:
 					if self.scratchVg is not None:
+						scratchName = "lv%s" % name
 						log.info("Removing any scratch for %s" % (name))
-						cmd = "/sbin/lvremove --quiet -f %s" % self.scratchVg
-    						result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE, stderr=open(os.devnull, "w"), close_fds=True).wait()
+						cmd = "/sbin/lvremove --quiet -f %s/%s" % (self.scratchVg, scratchName)
+						__result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE, stderr=open(os.devnull, "w"), close_fds=True).wait()
 				except:
 					log.warning("Problem cleaning scratch volumes")
 					pass
 
 				# let the NM know
 				try:
-					if (not child.migratingOut):
-						self.nm.vmStateChange(vmId, None, InstanceState.Exited)
+					# XXXstroucki: we don't want to treat
+					# the source VM of a migration exiting
+					# as an actual
+					# exit, but the NM should probably know.
+					self.nm.vmStateChange(vmId, None, InstanceState.Exited)
 				except Exception:
 					log.exception("vmStateChange failed for VM %s" % (name))
 			else:
@@ -273,7 +293,7 @@
 		while True:
 			try:
 				time.sleep(self.POLL_DELAY)
-				self.__matchHostPids(self.controlledVMs)
+				self.__matchHostPids()
 			except:
 				log.exception("Exception in poolVMsLoop")
 	
@@ -294,7 +314,7 @@
 		monitorFd = child.monitorFd
 		buf = ""
 		try:
-			(rlist, wlist, xlist) = select.select([monitorFd], [], [], 0.0)
+			(rlist, __wlist, __xlist) = select.select([monitorFd], [], [], 0.0)
 			while (len(rlist) > 0):
 				c = os.read(monitorFd, 1)
 				if (c == ""):
@@ -302,7 +322,7 @@
 					child.errorBit = True
 					raise RuntimeError
 				buf = buf + c
-				(rlist, wlist, xlist) = select.select([monitorFd], [], [], 0.0)
+				(rlist, __wlist, __xlist) = select.select([monitorFd], [], [], 0.0)
 		finally:
 			child.monitorHistory.append(buf)
 		return buf
@@ -317,14 +337,14 @@
 			while (buf[-(len(needle)):] != needle):
 				#print "[BUF]: %s" % (buf)
 				#print "[NEE]: %s" % (needle)
-				(rlist, wlist, xlist) = select.select([monitorFd], [], [], timeout)
+				(rlist, __wlist, __xlist) = select.select([monitorFd], [], [], timeout)
 				if (len(rlist) == 0):
-					log.error("Timeout getting results from monitor for vmId %d" % (child.pid))
+					log.error("Timeout getting results from monitor on FD %s for vmId %d" % (monitorFd, child.pid))
 					child.errorBit = True
 					raise RuntimeError
 				c = os.read(monitorFd, 1)
 				if (c == ""):
-					log.error("Early termination on monitor for vmId %d" % (child.pid))
+					log.error("Early termination on monitor FD %s for vmId %d" % (monitorFd, child.pid))
 					child.errorBit = True
 					raise RuntimeError
 				buf = buf + c
@@ -426,6 +446,7 @@
 			disk = instance.disks[index]
 			uri = scrubString(disk.uri)
 			imageLocal = self.dfs.getLocalHandle("images/" + uri)
+			imageLocal = self.__dereferenceLink(imageLocal)
 			thisDiskList = [ "file=%s" % imageLocal ]
 			thisDiskList.append("if=%s" % diskInterface)
 			thisDiskList.append("index=%d" % index)
@@ -466,7 +487,7 @@
 				# XXXstroucki check for capacity
 				cmd = "/sbin/lvcreate --quiet -n%s -L %dG %s" % (scratchName, scratchSize, self.scratchVg)
 				# XXXstroucki check result
-				result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE).wait()
+				__result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE).wait()
 				index += 1
 
 				thisDiskList = [ "file=/dev/%s/%s" % (self.scratchVg, scratchName) ]
@@ -500,8 +521,14 @@
 		nicModel = self.__stripSpace(nicModel)
 
 		nicString = ""
+		nicNetworks = {}
 		for i in range(0, len(instance.nics)):
+			# Don't allow more than one interface per vlan
 			nic = instance.nics[i]
+			if nicNetworks.has_key(nic.network):
+				continue
+			nicNetworks[nic.network] = True
+
 			nicString = nicString + "-net nic,macaddr=%s,model=%s,vlan=%d -net tap,ifname=%s%d.%d,vlan=%d,script=/etc/qemu-ifup.%d " % (nic.mac, nicModel, nic.network, self.ifPrefix, instance.id, i, nic.network, nic.network)
 
 		#  ACPI
@@ -593,10 +620,15 @@
 				# trying to restart the migration by running
 				# the command again (when qemu is ready to
 				# listen again) is probably not helpful
+				# XXXstroucki: failures observed:
+				# "migration failed"
+				# "Block format 'qcow' used by device '' does not support feature 'live migration'
 				success = False
+				# see if migration can be speeded up
+				res = self.__enterCommand(child, "migrate_set_speed 1g", timeout=self.migrateTimeout)
 				res = self.__enterCommand(child, "migrate -i %s" % (target), timeout=self.migrateTimeout)
 				retry = retry - 1
-				if (res.find("migration failed") == -1):
+				if (res.find("Block migration completed") != -1):
 					success = True
 					retry = 0
 					break
@@ -613,6 +645,8 @@
 
 	# extern	
 	def instantiateVm(self, instance):
+		# XXXstroucki: check capacity before instantiating
+
 		try:
 			(vmId, cmd) = self.__startVm(instance, None)
 			child = self.__getChildFromPid(vmId)
@@ -632,16 +666,23 @@
 	
 	# extern
 	def suspendVm(self, vmId, target):
-		tmpTarget = "/%s/tashi_qemu_suspend_%d_%d" % (self.scratchDir, os.getpid(), vmId)
 		# XXX: Use fifo to improve performance
-		vmId = self.__stopVm(vmId, "\"exec:gzip -c > %s\"" % (tmpTarget), True)
-		self.dfs.copyTo(tmpTarget, target)
-		os.unlink(tmpTarget)
+		# XXXstroucki: we could create a fifo on the local fs,
+		# then start a thread to copy it to dfs. But if we're
+		# reading from dfs directly on resume, why not write
+		# directly here?
+
+		#tmpTarget = "/%s/tashi_qemu_suspend_%d_%d" % (self.scratchDir, os.getpid(), vmId)
+		fn = self.dfs.getLocalHandle("%s" % target)
+		vmId = self.__stopVm(vmId, "\"exec:%s > %s\"" % (self.suspendHandler, fn), True)
+		#self.dfs.copyTo(tmpTarget, target)
+		#os.unlink(tmpTarget)
 		return vmId
 	
 	# extern
 	def resumeVmHelper(self, instance, source):
-		child = self.__getChildFromPid(instance.vmId)
+		vmId = instance.vmId
+		child = self.__getChildFromPid(vmId)
 		try:
 			self.__getPtyInfo(child, True)
 		except RuntimeError:
@@ -650,21 +691,25 @@
 			raise
 		status = "paused"
 		while ("running" not in status):
-			status = self.__enterCommand(child, "info status")
-			time.sleep(1)
+			try:
+				status = self.__enterCommand(child, "info status")
+			except RuntimeError:
+				pass
+			time.sleep(60)
+
+		self.nm.vmStateChange(vmId, None, InstanceState.Running)
 		child.instance.state = InstanceState.Running
 		self.__saveChildInfo(child)
 	
 	# extern
 	def resumeVm(self, instance, source):
 		fn = self.dfs.getLocalHandle("%s" % (source))
-		(vmId, cmd) = self.__startVm(instance, "exec:zcat %s" % (fn))
+		(vmId, cmd) = self.__startVm(instance, "exec:%s < %s" % (self.resumeHandler, fn))
 		child = self.__getChildFromPid(vmId)
 		child.cmd = cmd
 		return vmId
 
 	def __checkPortListening(self, port):
-		lc = 0
 		# XXXpipe: find whether something is listening yet on the port
 		(stdin, stdout) = os.popen2("netstat -ln | grep 0.0.0.0:%d | wc -l" % (port))
 		stdin.close()
@@ -720,7 +765,9 @@
 		try:
 			child = self.__getChildFromPid(vmId)
 		except:
-			log.error("Failed to get child info; transportCookie = %s; hostname = %s" % (str(cPickle.loads(transportCookie)), socket.hostname()))
+			# XXXstroucki: Does hostname contain the peer hostname?
+			log.error("Failed to get child info; transportCookie = %s; hostname = %s" %
+					(str(cPickle.loads(transportCookie)), _hostname))
 			raise
 		try:
 			self.__getPtyInfo(child, True)
@@ -808,6 +855,11 @@
 		threading.Thread(target=controlConsole, args=(child,consolePort)).start()
 		return "Control console listening on %s:%d" % (hostname, consolePort)
 
+	def __specificReset(self, vmId):
+		child = self.__getChildFromPid(vmId)
+		self.__enterCommand(child, "system_reset")
+		return "Sent reset signal to instance"
+
 	# extern
 	def vmmSpecificCall(self, vmId, arg):
 		arg = arg.lower()
@@ -826,12 +878,16 @@
 		elif (arg == "startconsole"):
 			return self.__specificStartConsole(vmId)
 
+		elif (arg == "reset"):
+			return self.__specificReset(vmId)
+
 		elif (arg == "list"):
 			commands = [
 				"startVnc",
 				"stopVnc",
 				"changeCdrom:<image.iso>",
 				"startConsole",
+				"reset",
 				]
 			return "\n".join(commands)
 				
@@ -842,11 +898,63 @@
 	def listVms(self):
 		return self.controlledVMs.keys()
 
+	def __processVmStats(self, vmId):
+		try:
+			f = open("/proc/%d/stat" % (vmId))
+			procData = f.read()
+			f.close()
+		except:
+			log.warning("Unable to get data for instance %d" % vmId)
+			return
+
+		ws = procData.strip().split()
+		userTicks = float(ws[13])
+		sysTicks = float(ws[14])
+		myTicks = userTicks + sysTicks
+		vsize = (int(ws[22]))/1024.0/1024.0
+		rss = (int(ws[23])*4096)/1024.0/1024.0
+		cpuSeconds = myTicks/self.ticksPerSecond
+		# XXXstroucki be more exact here?
+		last = time.time() - self.statsInterval
+		lastCpuSeconds = self.cpuStats.get(vmId, cpuSeconds)
+		if lastCpuSeconds is None:
+			lastCpuSeconds = cpuSeconds
+		cpuLoad = (cpuSeconds - lastCpuSeconds)/(time.time() - last)
+		self.cpuStats[vmId] = cpuSeconds
+		try:
+			child = self.controlledVMs[vmId]
+		except:
+			log.warning("Unable to obtain information on instance %d" % vmId)
+			return
+
+		(recvMBs, sendMBs, recvBytes, sendBytes) = (0.0, 0.0, 0.0, 0.0)
+		for i in range(0, len(child.instance.nics)):
+			netDev = "%s%d.%d" % (self.ifPrefix, child.instance.id, i)
+			(tmpRecvMBs, tmpSendMBs, tmpRecvBytes, tmpSendBytes) = self.netStats.get(netDev, (0.0, 0.0, 0.0, 0.0))
+			(recvMBs, sendMBs, recvBytes, sendBytes) = (recvMBs + tmpRecvMBs, sendMBs + tmpSendMBs, recvBytes + tmpRecvBytes, sendBytes + tmpSendBytes)
+		self.stats[vmId] = self.stats.get(vmId, {})
+		child = self.controlledVMs.get(vmId, None)
+		if (child):
+			res = self.__enterCommand(child, "info blockstats")
+			for l in res.split("\n"):
+				(device, __sep, data) = stringPartition(l, ": ")
+				if (data != ""):
+					for field in data.split(" "):
+						(label, __sep, val) = stringPartition(field, "=")
+						if (val != ""):
+							self.stats[vmId]['%s_%s_per_s' % (device, label)] = (float(val) - float(self.stats[vmId].get('%s_%s' % (device, label), 0)))/self.statsInterval
+							self.stats[vmId]['%s_%s' % (device, label)] = int(val)
+		self.stats[vmId]['cpuLoad'] = cpuLoad
+		self.stats[vmId]['rss'] = rss
+		self.stats[vmId]['vsize'] = vsize
+		self.stats[vmId]['recvMBs'] = sendMBs
+		self.stats[vmId]['sendMBs'] = recvMBs
+
 	# thread
 	def statsThread(self):
-		ticksPerSecond = float(os.sysconf('SC_CLK_TCK'))
-		netStats = {}
-		cpuStats = {}
+		self.ticksPerSecond = float(os.sysconf('SC_CLK_TCK'))
+		self.netStats = {}
+		self.cpuStats = {}
 		# XXXstroucki be more exact here?
 		last = time.time() - self.statsInterval
 		while True:
@@ -857,12 +965,12 @@
 				f.close()
 				for l in netData:
 					if (l.find(self.ifPrefix) != -1):
-						(dev, sep, ld) = stringPartition(l, ":")
+						(dev, __sep, ld) = stringPartition(l, ":")
 						dev = dev.strip()
 						ws = ld.split()
 						recvBytes = float(ws[0])
 						sendBytes = float(ws[8])
-						(recvMBs, sendMBs, lastRecvBytes, lastSendBytes) = netStats.get(dev, (0.0, 0.0, recvBytes, sendBytes))
+						(recvMBs, sendMBs, lastRecvBytes, lastSendBytes) = self.netStats.get(dev, (0.0, 0.0, recvBytes, sendBytes))
 						if (recvBytes < lastRecvBytes):
 							# We seem to have overflowed
 							# XXXstroucki How likely is this to happen?
@@ -878,44 +986,12 @@
 								lastSendBytes = lastSendBytes - 2**32
 						recvMBs = (recvBytes-lastRecvBytes)/(now-last)/1024.0/1024.0
 						sendMBs = (sendBytes-lastSendBytes)/(now-last)/1024.0/1024.0
-						netStats[dev] = (recvMBs, sendMBs, recvBytes, sendBytes)
+						self.netStats[dev] = (recvMBs, sendMBs, recvBytes, sendBytes)
+
+
 				for vmId in self.controlledVMs:
-					f = open("/proc/%d/stat" % (vmId))
-					procData = f.read()
-					f.close()
-					ws = procData.strip().split()
-					userTicks = float(ws[13])
-					sysTicks = float(ws[14])
-					myTicks = userTicks + sysTicks
-					vsize = (int(ws[22]))/1024.0/1024.0
-					rss = (int(ws[23])*4096)/1024.0/1024.0
-					cpuSeconds = myTicks/ticksPerSecond
-					lastCpuSeconds = cpuStats.get(vmId, cpuSeconds)
-					cpuLoad = (cpuSeconds - lastCpuSeconds)/(now - last)
-					cpuStats[vmId] = cpuSeconds
-					child = self.controlledVMs[vmId]
-					(recvMBs, sendMBs, recvBytes, sendBytes) = (0.0, 0.0, 0.0, 0.0)
-					for i in range(0, len(child.instance.nics)):
-						netDev = "%s%d.%d" % (self.ifPrefix, child.instance.id, i)
-						(tmpRecvMBs, tmpSendMBs, tmpRecvBytes, tmpSendBytes) = netStats.get(netDev, (0.0, 0.0, 0.0, 0.0))
-						(recvMBs, sendMBs, recvBytes, sendBytes) = (recvMBs + tmpRecvMBs, sendMBs + tmpSendMBs, recvBytes + tmpRecvBytes, sendBytes + tmpSendBytes)
-					self.stats[vmId] = self.stats.get(vmId, {})
-					child = self.controlledVMs.get(vmId, None)
-					if (child):
-						res = self.__enterCommand(child, "info blockstats")
-						for l in res.split("\n"):
-							(device, sep, data) = stringPartition(l, ": ")
-							if (data != ""):
-								for field in data.split(" "):
-									(label, sep, val) = stringPartition(field, "=")
-									if (val != ""):
-										self.stats[vmId]['%s_%s_per_s' % (device, label)] = (float(val) - float(self.stats[vmId].get('%s_%s' % (device, label), 0)))/self.statsInterval
-										self.stats[vmId]['%s_%s' % (device, label)] = int(val)
-					self.stats[vmId]['cpuLoad'] = cpuLoad
-					self.stats[vmId]['rss'] = rss
-					self.stats[vmId]['vsize'] = vsize
-					self.stats[vmId]['recvMBs'] = sendMBs
-					self.stats[vmId]['sendMBs'] = recvMBs
+					self.__processVmStats(vmId)
+
 			except:
 				log.exception("statsThread threw an exception")
 			last = now
diff --git a/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py b/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py
index cd4fde8..19447f4 100644
--- a/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py
+++ b/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py
@@ -28,8 +28,8 @@
 		self.dfs = dfs
 		self.nm = nm
 
-        def getInstances(self):
-                """Will return a dict of instances by vmId to the caller"""
+	def getInstances(self):
+		"""Will return a dict of instances by vmId to the caller"""
 		raise NotImplementedError
 	
 	def instantiateVm(self, instance):
diff --git a/src/tashi/nodemanager/vmcontrol/xenpv.py b/src/tashi/nodemanager/vmcontrol/xenpv.py
index 8bf4a29..f0e9c06 100644
--- a/src/tashi/nodemanager/vmcontrol/xenpv.py
+++ b/src/tashi/nodemanager/vmcontrol/xenpv.py
@@ -15,37 +15,35 @@
 # specific language governing permissions and limitations
 # under the License.    
 
-import os
 import os.path
 import cPickle
-import subprocess		# FIXME: should switch os.system to this
+import subprocess
 import time 
 import threading
 import logging
 import socket
 
 from vmcontrolinterface import VmControlInterface
-from tashi.rpycservices.rpyctypes import Errors, InstanceState, TashiException
+from tashi.rpycservices.rpyctypes import InstanceState
 from tashi.rpycservices.rpyctypes import Instance, Host
-from tashi import boolean, convertExceptions, ConnectionManager, version
-from tashi.util import isolatedRPC, broken
+from tashi import version
+from tashi.util import broken
 
-import tashi.parallel
-from tashi.parallel import synchronized, synchronizedmethod
+from tashi.parallel import synchronizedmethod
 
 log = logging.getLogger(__file__)
 
 # FIXME: these should throw errors on failure
 def domIdToName(domid):
 # XXXpipe: get domain name from id
-	f = os.popen("/usr/sbin/xm domname %i"%domid)
+	f = os.popen("/usr/sbin/xm domname %i"% domid)
 	name = f.readline().strip()
 	f.close()
 	return name
 
 def domNameToId(domname):
 # XXXpipe: get domain id from name
-	f = os.popen("/usr/sbin/xm domid %s"%domname)
+	f = os.popen("/usr/sbin/xm domid %s"% domname)
 	name = f.readline().strip()
 	f.close()
 	return int(name)
@@ -55,10 +53,10 @@
 	if domname[0:(len(prefix))] != prefix:
 		return None
 	try:
-		id = int(domname[len(prefix):])
+		_id = int(domname[len(prefix):])
 	except:
 		return None
-	return id
+	return _id
 
 
 # Try to do a listVms call using info from xend
@@ -80,12 +78,12 @@
 			vminfo[fields[i]] = line[i]
 		# if the name begins with our prefix, get the id,
 		# otherwise skip this record
-		id = nameToId(vminfo['name'], prefix)
-		if id == None:
+		_id = nameToId(vminfo['name'], prefix)
+		if _id == None:
 			continue
 
 		# fill in the instance object
-		instance.id = int(id)
+		instance.id = int(_id)
 		instance.vmId = int(vminfo['vmId'])
 		instance.state = InstanceState.Running
 		if(vminfo['state'][2] !='-'):
@@ -142,8 +140,8 @@
 				self.nm.vmStateChange(a.vmId, a.state, InstanceState.Exited)
 		for vmId in vmlist.keys():
 			if not self.newvms.has_key(vmId):
+				# FIXME: log this
 				print 'WARNING: found vm that should be managed, but is not'
-				# FIXME: log that
 			
 
 	def run(self):
@@ -155,7 +153,7 @@
 # a lot easier
 ########################################
 	def createXenConfig(self, vmName, 
-	                    image, macAddr, netID, memory, cores, hints, id):
+	                    image, macAddr, netID, memory, cores, hints, _id):
 		bootstr = None
 		rootconfig = None
 		diskconfig = None
@@ -168,9 +166,10 @@
 		vmType = hints.get('vmtype', self.defaultVmType)
 		print 'starting vm with type: ', vmType
 
-                disk0 = 'tap:%s' % self.disktype
+		disk0 = 'tap:%s' % self.disktype
 		diskU = 'xvda1'
 
+		# XXXstroucki: use soft config
 		try:
 			bridgeformat = self.config.get('XenPV', 'defaultBridgeFormat')
 		except:
@@ -313,10 +312,10 @@
 	@synchronizedmethod
 	def instantiateVm(self, instance):
 
-                try:
-                   disktype = self.config.get('XenPV', 'defaultDiskType')
-                except:
-                   disktype = 'vhd'
+		try:
+			disktype = self.config.get('XenPV', 'defaultDiskType')
+		except:
+			disktype = 'vhd'
 
 		# FIXME: this is NOT the right way to get out hostId
 		self.hostId = instance.hostId
@@ -346,6 +345,8 @@
 				instance.disks[i].local = newdisk
 
 
+		# XXXstroucki if ever supporting multiple nics,
+		# ensure more than one isn't put on the same network.
 		fn = self.createXenConfig(name, 
 					  instance.disks[0].local, 
 					  instance.nics[0].mac, 
@@ -358,9 +359,9 @@
 		r = os.system(cmd)
 #		self.deleteXenConfig(name)
 		if r != 0:
+			# FIXME: log/handle error
 			print 'WARNING: "%s" returned %i' % ( cmd, r)
 			raise Exception, 'WARNING: "%s" returned %i' % ( cmd, r)
-			# FIXME: log/handle error
 		vmId = domNameToId(name)
 		self.newvms[vmId] = instance
 		instance.vmId = vmId
@@ -385,7 +386,7 @@
 		instance = self.newvms[vmId]
 		instance.suspendCookie = suspendCookie
 		infof = self.dfs.open(infofile, "w")
-		name = domIdToName(vmId)
+		#name = domIdToName(vmId)
 		cPickle.dump(instance, infof)
 		infof.close()
 		
@@ -414,7 +415,7 @@
 		self.dfs.unlink(infofile)
 
 		self.dfs.copyFrom(source, tmpfile)
-		r = os.system("/usr/sbin/xm restore %s"%(tmpfile))
+		__r = os.system("/usr/sbin/xm restore %s"%(tmpfile))
 		os.unlink(tmpfile)
 		
 		# FIXME: if the vmName function changes, suspended vms will become invalid
diff --git a/src/tashi/parallel.py b/src/tashi/parallel.py
index 09fe57e..03832bd 100644
--- a/src/tashi/parallel.py
+++ b/src/tashi/parallel.py
@@ -34,7 +34,8 @@
 	def __init__(self, size=8, maxsize=0):
 		Queue.Queue.__init__(self, maxsize)
 		for i in range(size):
-			thread = threading.Thread(target=self._worker)
+			name = "parallel.ThreadPool#%s" % (i)
+			thread = threading.Thread(name=name, target=self._worker)
 			thread.setDaemon(True)
 			thread.start()
 	def _worker(self):
@@ -129,9 +130,9 @@
 			time.sleep(sleep)
 			queue.put(None)
 		tt = time.time()
-		for i in range(4):
+		for _ in range(4):
 			slowfunc()
-		for i in range(4):
+		for _ in range(4):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 4, 1) 
@@ -143,9 +144,9 @@
 			time.sleep(sleep)
 			queue.put(None)
 		tt = time.time()
-		for i in range(8):
+		for _ in range(8):
 			slowthreadfunc()
-		for i in range(8):
+		for _ in range(8):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 1, 1) 
@@ -158,9 +159,9 @@
 			time.sleep(sleep)
 			queue.put(None)
 		tt = time.time()
-		for i in range(8):
+		for _ in range(8):
 			slowpoolfunc()
-		for i in range(8):
+		for _ in range(8):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 2, 1) 
@@ -175,9 +176,9 @@
 				queue.put(None)
 		sc = slowclass()
 		tt = time.time()
-		for i in range(4):
+		for _ in range(4):
 			sc.beslow()
-		for i in range(4):
+		for _ in range(4):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 4, 1)
@@ -193,9 +194,9 @@
 				queue.put(None)
 		sc = slowclass()
 		tt = time.time()
-		for i in range(4):
+		for _ in range(4):
 			sc.beslow()
-		for i in range(4):
+		for _ in range(4):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 1, 1)
@@ -211,9 +212,9 @@
 				queue.put(None)
 		sc = slowclass()
 		tt = time.time()
-		for i in range(16):
+		for _ in range(16):
 			sc.beslow()
-		for i in range(16):
+		for _ in range(16):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 2, 1)
@@ -228,9 +229,9 @@
 		def slowthreadfunc():
 			addtoqueue()
 		tt = time.time()
-		for i in range(4):
+		for _ in range(4):
 			slowthreadfunc()
-		for i in range(4):
+		for _ in range(4):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 4, 1) 
@@ -254,10 +255,10 @@
 		def slowthreadfunc2():
 			atc.addtoqueue2()
 		tt = time.time()
-		for i in range(4):
+		for _ in range(4):
 			slowthreadfunc1()
 			slowthreadfunc2()
-		for i in range(8):
+		for _ in range(8):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 8, 1) 
@@ -279,10 +280,10 @@
 		def slowthreadfunc2():
 			atc.addtoqueue2()
 		tt = time.time()
-		for i in range(4):
+		for _ in range(4):
 			slowthreadfunc1()
 			slowthreadfunc2()
-		for i in range(8):
+		for _ in range(8):
 			queue.get()
 		tt = time.time() - tt
 		self.assertAlmostEqual(tt, 1, 1) 
diff --git a/src/tashi/rpycservices/rpycservices.py b/src/tashi/rpycservices/rpycservices.py
index c66a40e..65460e3 100644
--- a/src/tashi/rpycservices/rpycservices.py
+++ b/src/tashi/rpycservices/rpycservices.py
@@ -19,7 +19,7 @@
 from tashi.rpycservices.rpyctypes import Instance, Host, User
 import cPickle
 
-clusterManagerRPCs = ['createVm', 'shutdownVm', 'destroyVm', 'suspendVm', 'resumeVm', 'migrateVm', 'pauseVm', 'unpauseVm', 'getHosts', 'getNetworks', 'getUsers', 'getInstances', 'vmmSpecificCall', 'registerNodeManager', 'vmUpdate', 'activateVm', 'registerHost', 'getImages', 'copyImage']
+clusterManagerRPCs = ['createVm', 'shutdownVm', 'destroyVm', 'suspendVm', 'resumeVm', 'migrateVm', 'pauseVm', 'unpauseVm', 'getHosts', 'getNetworks', 'getUsers', 'getInstances', 'vmmSpecificCall', 'registerNodeManager', 'vmUpdate', 'activateVm', 'registerHost', 'getImages', 'copyImage', 'setHostState']
 nodeManagerRPCs = ['instantiateVm', 'shutdownVm', 'destroyVm', 'suspendVm', 'resumeVm', 'prepReceiveVm', 'prepSourceVm', 'migrateVm', 'receiveVm', 'pauseVm', 'unpauseVm', 'getVmInfo', 'listVms', 'vmmSpecificCall', 'getHostInfo', 'liveCheck']
 accountingRPCs = ['record']
 
@@ -65,6 +65,9 @@
 		if name not in clusterManagerRPCs and name not in nodeManagerRPCs and name not in accountingRPCs:
 			return None
 		def connectWrap(*args):
+			# XXXstroucki: why not talk directly, instead
+			# of using rpyc? We're already using pickle to move
+			# args.
 			args = cPickle.dumps(clean(args))
 			try:
 				res = getattr(self.conn.root, name)(args)
diff --git a/src/tashi/rpycservices/rpyctypes.py b/src/tashi/rpycservices/rpyctypes.py
index e4d613f..2d65928 100644
--- a/src/tashi/rpycservices/rpyctypes.py
+++ b/src/tashi/rpycservices/rpyctypes.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# XXXstroucki: shouldn't this be tashitypes.py instead?
+
 class Errors(object):
 	ConvertedException = 1
 	NoSuchInstanceId = 2
diff --git a/src/tashi/thrift/build.py b/src/tashi/thrift/build.py
deleted file mode 100755
index 42b22fa..0000000
--- a/src/tashi/thrift/build.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-import shutil
-import os
-from os import path
-import re
-
-if __name__ == '__main__':
-	if (path.exists('gen-py')):
-		print 'Removing \'gen-py\' directory...'
-		shutil.rmtree('gen-py')
-		
-	if (path.exists('../services')):
-		print 'Removing \'../services\' directory...'
-		shutil.rmtree('../services')
-	
-	if (path.exists('../messaging/messagingthrift')):
-		print 'Removing \'../messaging/messagingthrift\' directory...'
-		shutil.rmtree('../messaging/messagingthrift')
-	
-	print 'Generating Python code for \'services.thrift\'...'
-	os.system('thrift --gen py:new_style services.thrift')
-	
-	print 'Copying generated code to \'tashi.services\' package...'
-	shutil.copytree('gen-py/services', '../services')
-	
-	print 'Generatign Python code for \'messagingthrift\'...'
-	os.system('rm -rf gen-py')
-	os.system('thrift --gen py messagingthrift.thrift')
-	
-	print 'Copying generated code to \'tashi.messaging.messagingthrift\' package...'
-	shutil.copytree(os.path.join('gen-py', 'messagingthrift'),
-			os.path.join('..', 'messaging', 'messagingthrift'))
-
-	print 'Generating Python code for \'layoutlocality.thrift\'...'
-	os.system('thrift --gen py:new_style layoutlocality.thrift')
- 
-	print 'Copying generated code to \'tashi.services\' package...'
-	shutil.copytree('gen-py/layoutlocality', '../services/layoutlocality')
diff --git a/src/tashi/thrift/layoutlocality.thrift b/src/tashi/thrift/layoutlocality.thrift
deleted file mode 100644
index e14910c..0000000
--- a/src/tashi/thrift/layoutlocality.thrift
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-struct BlockLocation {
-       list<string> hosts,           // hostnames of data nodes
-       list<i32> ports,              // ports for data nodes
-       list<string> names,           // hostname:port of data nodes
-       i64 blocknum,
-       i64 offset,
-       i64 length
-}
-
-struct Pathname {
-       string pathname
-}
-
-exception FileNotFoundException {
-       string message
-}
-
-service layoutservice {
-       list <BlockLocation> getFileBlockLocations(1:Pathname path, 2:i64 offset, 3:i64 length)
-                            throws (1:FileNotFoundException ouch),
-}
-
-service localityservice {
-       list <list<double>> getHopCountMatrix(1:list<string> sourceHosts, 2:list<string> destHosts),
-}
diff --git a/src/tashi/thrift/messagingthrift.thrift b/src/tashi/thrift/messagingthrift.thrift
deleted file mode 100644
index 401e9a1..0000000
--- a/src/tashi/thrift/messagingthrift.thrift
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-typedef map<string, string> strstrmap
-
-service SubscriberThrift{
-  # the async keyword seems to slow things down in the simple
-  # tests.  However, with non-trivial subscribers it will be 
-  # necessary to use async here.
-  async void publish(strstrmap message)
-  async void publishList(list<strstrmap> messages)
-}
-
-service MessageBrokerThrift{
-  void log(strstrmap message),
-  void addSubscriber(string host, i16 port)
-  void removeSubscriber(string host, i16 port)
-  async void publish(strstrmap message)
-  async void publishList(list<strstrmap> messages)
-
-}
-
diff --git a/src/tashi/thrift/services.thrift b/src/tashi/thrift/services.thrift
deleted file mode 100644
index fa29c30..0000000
--- a/src/tashi/thrift/services.thrift
+++ /dev/null
@@ -1,166 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.    
-
-enum Errors {
-	ConvertedException = 1,
-	NoSuchInstanceId = 2,
-	NoSuchVmId = 3,
-	IncorrectVmState = 4,
-	NoSuchHost = 5,
-	NoSuchHostId = 6,
-	InstanceIdAlreadyExists = 7,
-	HostNameMismatch = 8,
-	HostNotUp = 9,
-	HostStateError = 10,
-	InvalidInstance = 11,
-	UnableToResume = 12,
-	UnableToSuspend = 13,
-}
-
-enum InstanceState {
-	Pending = 1,		// Job submitted
-	Activating = 2,		// activateVm has been called, but instantiateVm hasn't finished yet
-	Running = 3,		// Normal state
-	Pausing = 4,		// Beginning pause sequence
-	Paused = 5		// Paused
-	Unpausing = 6,		// Beginning unpause sequence
-	Suspending = 7,		// Beginning suspend sequence
-	Resuming = 8,		// Beginning resume sequence
-	MigratePrep = 9,	// Migrate state #1
-	MigrateTrans = 10,	// Migrate state #2
-	ShuttingDown = 11,	// Beginning exit sequence
-	Destroying = 12,	// Beginning exit sequence
-	Orphaned = 13,		// Host is missing
-	Held = 14,		// Activation failed
-	Exited = 15,		// VM has exited
-	Suspended = 16,		// VM is suspended
-}
-
-enum HostState {
-	Normal = 1,
-	Drained = 2,
-	VersionMismatch = 3
-}
-
-exception TashiException {
-	1: Errors errno
-	2: string msg
-}
-
-struct Host {
-	1:i32 id,
-	2:string name,
-	3:bool up,
-	4:bool decayed,
-	5:HostState state,
-	6:i32 memory,
-	7:i32 cores,
-	8:string version
-	// Other properties (disk?)
-}
-
-struct Network {
-	1:i32 id
-	2:string name
-}
-
-struct User {
-	1:i32 id,
-	2:string name
-}
-
-struct DiskConfiguration {
-	1:string uri,
-	2:bool persistent
-}
-
-struct NetworkConfiguration {
-	1:i32 network,
-	2:string mac,
-	3:string ip
-}
-
-struct Instance {
-	1:i32 id,
-	2:i32 vmId,
-	3:i32 hostId,
-	4:bool decayed,
-	5:InstanceState state,
-	6:i32 userId,
-	7:string name, // User specified
-	8:i32 cores, // User specified
-	9:i32 memory, // User specified
-	10:list<DiskConfiguration> disks, // User specified
-	11:list<NetworkConfiguration> nics // User specified
-	12:map<string, string> hints // User specified
-}
-
-service clustermanagerservice {
-	// Client-facing RPCs
-	Instance createVm(1:Instance instance) throws (1:TashiException e)
-	
-	void shutdownVm(1:i32 instanceId) throws (1:TashiException e)
-	void destroyVm(1:i32 instanceId) throws (1:TashiException e)
-	
-	void suspendVm(1:i32 instanceId) throws (1:TashiException e)
-	Instance resumeVm(1:i32 instanceId) throws (1:TashiException e)
-	
-	void migrateVm(1:i32 instanceId, 2:i32 targetHostId) throws (1:TashiException e)
-	
-	void pauseVm(1:i32 instanceId) throws (1:TashiException e)
-	void unpauseVm(1:i32 instanceId) throws (1:TashiException e)
-	
-	list<Host> getHosts() throws (1:TashiException e)
-	list<Network> getNetworks() throws (1:TashiException e)
-	list<User> getUsers() throws (1:TashiException e)
-
-	list<Instance> getInstances() throws (1:TashiException e)
-	
-	string vmmSpecificCall(1:i32 instanceId, 2:string arg) throws (1:TashiException e)
-	
-	// NodeManager-facing RPCs
-	i32 registerNodeManager(1:Host host, 2:list<Instance> instances) throws (1:TashiException e)
-	void vmUpdate(1:i32 instanceId, 2:Instance instance, 3:InstanceState old) throws (1:TashiException e)
-
-	// Agent-facing RPCs
-	void activateVm(1:i32 instanceId, 2:Host host) throws (1:TashiException e)
-}
-
-service nodemanagerservice {
-	// ClusterManager-facing RPCs
-	i32 instantiateVm(1:Instance instance) throws (1:TashiException e)
-	
-	void shutdownVm(1:i32 vmId) throws (1:TashiException e)
-	void destroyVm(1:i32 vmId) throws (1:TashiException e)
-	
-	void suspendVm(1:i32 vmId, 2:string destination) throws (1:TashiException e)
-	i32 resumeVm(1:Instance instance, 2:string source) throws (1:TashiException e)
-	
-	string prepReceiveVm(1:Instance instance, 2:Host source) throws (1:TashiException e)
-	void migrateVm(1:i32 vmId, 2:Host target, 3:string transportCookie) throws (1:TashiException e)
-	void receiveVm(1:Instance instance, 2:string transportCookie) throws (1:TashiException e)
-	
-	void pauseVm(1:i32 vmId) throws (1:TashiException e)
-	void unpauseVm(1:i32 vmId) throws (1:TashiException e)
-
-	Instance getVmInfo(1:i32 vmId) throws (1:TashiException e)
-	list<i32> listVms() throws (1:TashiException e)
-	
-	string vmmSpecificCall(1:i32 vmId, 2:string arg) throws (1:TashiException e)
-	
-	// Host getHostInfo() throws (1:TashiException e)
-}
diff --git a/src/tashi/util.py b/src/tashi/util.py
index 4eb0981..c2bd6b8 100644
--- a/src/tashi/util.py
+++ b/src/tashi/util.py
@@ -15,6 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.    
 
+#XXXstroucki: for compatibility with python 2.5
+from __future__ import with_statement
+
 import ConfigParser
 #import cPickle
 import os
@@ -22,14 +25,15 @@
 import signal
 #import struct
 import sys
-import threading
+#import threading
 import time
 import traceback
 import types
-import getpass
+import functools
 
 from tashi.rpycservices import rpycservices
 from tashi.rpycservices.rpyctypes import TashiException, Errors, InstanceState, HostState
+from tashi.utils.timeout import *
 
 def broken(oldFunc):
 	"""Decorator that is used to mark a function as temporarily broken"""
@@ -85,14 +89,14 @@
 		return res
 	return newFunc
 
-def editAndContinue(file, mod, name):
+def editAndContinue(filespec, mod, name):
 	def wrapper(oldFunc):
 		persist = {}
 		persist['lastMod'] = time.time()
 		persist['oldFunc'] = oldFunc
 		persist['func'] = oldFunc
 		def newFunc(*args, **kw):
-			modTime = os.stat(file)[8]
+			modTime = os.stat(filespec)[8]
 			if (modTime > persist['lastMod']):
 				persist['lastMod'] = modTime
 				space = {}
@@ -148,14 +152,6 @@
 	def __delattr__(self, name):
 		return delattr(self.__dict__['__real_obj__'], name)
 
-def isolatedRPC(client, method, *args, **kw):
-	"""Opens and closes a thrift transport for a single RPC call"""
-	if (not client._iprot.trans.isOpen()):
-		client._iprot.trans.open()
-	res = getattr(client, method)(*args, **kw)
-	client._iprot.trans.close()
-	return res
-
 def signalHandler(signalNumber):
 	"""Used to denote a particular function as the signal handler for a 
 	   specific signal"""
@@ -170,6 +166,13 @@
 		return value
 	if (type(value) == types.IntType):
 		return (value != 0)
+
+	# See if it can be expressed as a string
+	try:
+		value = str(value)
+	except:
+		raise ValueError
+
 	lowercaseValue = value.lower()
 	if lowercaseValue in ['yes', 'true', '1']:
 		return True
@@ -186,13 +189,14 @@
 		cmd = "import %s\n" % (package)
 	else:
 		cmd = ""
-	cmd += "obj = %s(*args)\n" % (className)
+	cmd += "_obj = %s(*args)\n" % (className)
 	exec cmd in locals()
-	return obj
+	# XXXstroucki: this is correct, even though pydev complains
+	return _obj
 
 def convertExceptions(oldFunc):
 	"""This converts any exception type into a TashiException so that 
-	   it can be passed over a Thrift RPC"""
+	   it can be passed over an RPC"""
 	def newFunc(*args, **kw):
 		try:
 			return oldFunc(*args, **kw)
@@ -218,31 +222,46 @@
 		raise Exception("No config file could be found: %s" % (str(allLocations)))
 	return (config, configFiles)
 
+def __getShellFn():
+	try:
+		from IPython.Shell import IPShellEmbed
+		return (1, IPShellEmbed)
+	except ImportError:
+		import IPython
+		return (2, IPython.embed)
+
 def debugConsole(globalDict):
 	"""A debugging console that optionally uses pysh"""
 	def realDebugConsole(globalDict):
 		try :
 			import atexit
-			from IPython.Shell import IPShellEmbed
+			(calltype, shellfn) = __getShellFn()
 			def resetConsole():
 # XXXpipe: make input window sane
-				(stdin, stdout) = os.popen2("reset")
+				(__stdin, stdout) = os.popen2("reset")
 				stdout.read()
-			dbgshell = IPShellEmbed()
 			atexit.register(resetConsole)
-			dbgshell(local_ns=globalDict, global_ns=globalDict)
-		except Exception:
+			if calltype == 1:
+				dbgshell=shellfn(user_ns=globalDict)
+				dbgshell()
+			elif calltype == 2:
+				dbgshell=shellfn
+				dbgshell(user_ns=globalDict)
+		except Exception, e:
 			CONSOLE_TEXT=">>> "
-			input = " " 
-			while (input != ""):
+			inputline = " " 
+			while (inputline != ""):
 				sys.stdout.write(CONSOLE_TEXT)
-				input = sys.stdin.readline()
+				inputline = sys.stdin.readline()
 				try:
-					exec(input) in globalDict
+					exec(inputline) in globalDict
 				except Exception, e:
 					sys.stdout.write(str(e) + "\n")
+
+		os._exit(0)
+
 	if (os.getenv("DEBUG", "0") == "1"):
-		threading.Thread(target=lambda: realDebugConsole(globalDict)).start()
+		threading.Thread(name="debugConsole", target=lambda: realDebugConsole(globalDict)).start()
 
 def stringPartition(s, field):
 	index = s.find(field)
@@ -260,6 +279,82 @@
 			ns = ns + c
 	return ns
 
+class Connection:
+
+	def __init__(self, host, port, authAndEncrypt=False, credentials=None):
+		self.host = host
+		self.port = port
+		self.credentials = credentials
+		self.authAndEncrypt = authAndEncrypt
+		self.connection = None
+		# XXXstroucki some thing may still depend on this (client)
+		self.username = None
+		if credentials is not None:
+			self.username = credentials[0]
+
+	def __connect(self):
+		# create new connection
+
+		username = None
+		password = None
+
+		if self.credentials is not None:
+			username = self.credentials[0]
+			password = self.credentials[1]
+
+		if self.authAndEncrypt:
+			if username is None:
+				username = raw_input("Enter Username:")
+
+			if password is None:
+				password = raw_input("Enter Password:")
+
+			if self.credentials != (username, password):
+				self.credentials = (username, password)
+
+			client = rpycservices.client(self.host, self.port, username=username, password=password)
+		else:
+			client = rpycservices.client(self.host, self.port)
+
+		self.connection = client
+
+
+	def __do(self, name, *args, **kwargs):
+		if self.connection is None:
+			self.__connect()
+
+		threadname = "%s:%s" % (self.host, self.port)
+		# XXXstroucki: Use 10 second timeout, ok?
+		# XXXstroucki: does this fn touch the network?
+		t = TimeoutThread(getattr, (self.connection, name, None))
+		threading.Thread(name=threadname, target=t.run).start()
+
+		try:
+			remotefn = t.wait(timeout=10)
+		except TimeoutException:
+			self.connection = None
+			raise
+
+		try:
+			if callable(remotefn):
+				# XXXstroucki: Use 10 second timeout, ok?
+				t = TimeoutThread(remotefn, args, kwargs)
+				threading.Thread(name=threadname, target=t.run).start()
+				returns = t.wait(timeout=10.0)
+
+			else:
+				raise TashiException({'msg':'%s not callable' % name})
+
+		except:
+			self.connection = None
+			raise
+
+		return returns
+
+	def __getattr__(self, name):
+		return functools.partial(self.__do, name)
+
+
 def createClient(config):
 	cfgHost = config.get('Client', 'clusterManagerHost')
 	cfgPort = config.get('Client', 'clusterManagerPort')
@@ -273,14 +368,12 @@
 	authAndEncrypt = boolean(config.get('Security', 'authAndEncrypt'))
 	if authAndEncrypt:
 		username = config.get('AccessClusterManager', 'username')
-		if username == '':
-			username = raw_input('Enter Username:')
 		password = config.get('AccessClusterManager', 'password')
-		if password == '':
-			password = getpass.getpass('Enter Password:')
-		client = rpycservices.client(host, port, username=username, password=password)
+		client = Connection(host, port, authAndEncrypt, (username, password))
+
 	else:
-		client = rpycservices.client(host, port)
+		client = Connection(host, port)
+
 	return client
 
 def enumToStringDict(cls):
diff --git a/src/utils/Makefile b/src/tashi/utils/__init__.py
similarity index 92%
rename from src/utils/Makefile
rename to src/tashi/utils/__init__.py
index aea56ee..c33c252 100644
--- a/src/utils/Makefile
+++ b/src/tashi/utils/__init__.py
@@ -15,10 +15,3 @@
 # specific language governing permissions and limitations
 # under the License.    
 
-all: nmd
-
-clean:
-	rm -f ./nmd
-
-nmd: nmd.c
-	${CC} $< -o $@
diff --git a/src/tashi/utils/config.py b/src/tashi/utils/config.py
new file mode 100644
index 0000000..0843b1a
--- /dev/null
+++ b/src/tashi/utils/config.py
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrapper class for python configuration
+
+class Config:
+	def __init__(self, additionalNames=[], additionalFiles=[]):
+		from tashi.util import getConfig
+		(config, files) = getConfig(additionalNames = additionalNames, additionalFiles = additionalFiles)
+		self.config = config
+		self.files = files
+
+	def getFiles(self):
+		return self.files
+
+	def get(self, section, option, default = None):
+		# soft version of self.config.get. Returns configured
+		# value or default value (if specified) or None.
+		import ConfigParser
+
+		value = default
+		try:
+			value = self.config.get(section, option)
+		except ConfigParser.NoOptionError:
+			pass
+
+		return value
+
+	def getint(self, section, option, default = None):
+		# soft version of self.config.getint. Returns configured
+		# value forced to int or default value (as and if specified)
+		# or None.
+		import ConfigParser
+
+		value = default
+		try:
+			value = self.config.get(section, option)
+			value = int(value)
+		except ConfigParser.NoOptionError:
+			pass
+
+		return value
+
+	def items(self, *args, **kwargs):
+		return self.config.items(*args, **kwargs)
diff --git a/src/tashi/agents/pseudoDes.py b/src/tashi/utils/pseudoDes.py
similarity index 96%
rename from src/tashi/agents/pseudoDes.py
rename to src/tashi/utils/pseudoDes.py
index 3d3bf0b..6d7a800 100755
--- a/src/tashi/agents/pseudoDes.py
+++ b/src/tashi/utils/pseudoDes.py
@@ -1,5 +1,4 @@
-#! /usr/bin/env python
-
+#!/usr/bin/python
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,6 +16,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# XXXstroucki: why pseudo?
+
 values = {1:(0xcba4e531, 0x12be4590),
           2:(0x537158eb, 0xab54ce58),
           3:(0x145cdc3c, 0x6954c7a6),
@@ -26,7 +27,7 @@
 	short = short & 0xffff
 	char = char & 0xff
 	value = short ^ (char << 8)
-	for i in range(0, 8):
+	for __i in range(0, 8):
 		if value & 0x8000:
 			value = (value << 1) ^ 4129
 		else:
diff --git a/src/tashi/utils/timeout.py b/src/tashi/utils/timeout.py
new file mode 100644
index 0000000..cc9d850
--- /dev/null
+++ b/src/tashi/utils/timeout.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# module to provide a thread timeout monitor
+# by Alexey Tumanov and Michael Stroucken
+
+import threading
+
+class TimeoutException(Exception):
+	def __init__(self, string):
+		Exception.__init__(self,'Timeout: %s' % string)
+
+class TimeoutThread:
+	def __init__(self, function, args = (), kwargs = {}):
+		self.cv	   = threading.Condition()
+		self.function = function
+		self.args = args
+		self.kwargs = kwargs
+		self.finished = False
+		self.rval	 = None
+
+	def wait(self, timeout=None):
+		self.cv.acquire()
+		if not self.finished:
+			if timeout:
+				self.cv.wait(timeout)
+			else:
+				self.cv.wait()
+		finished = self.finished
+		rval	 = self.rval
+		self.cv.release()
+
+		#
+		# Raise an exception if a timeout occurred.
+		#
+		if finished:
+			return rval
+		else: # NOTE: timeout must be set for this to be true.
+			raise TimeoutException("function %s timed out after %f seconds" % (str(self.function), timeout))
+
+	def run(self):
+		try:
+			rval = self.function(*self.args, **self.kwargs)
+		except Exception, e:
+			rval = e
+
+		self.cv.acquire()
+		self.finished = True
+		self.rval	 = rval
+		self.cv.notify()
+		self.cv.release()
+
diff --git a/src/utils/getLocality.py b/src/utils/getLocality.py
deleted file mode 100755
index 49ecb11..0000000
--- a/src/utils/getLocality.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/python
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-import os
-from os import system
-
-import tashi.services.layoutlocality.localityservice as localityservice
-
-from thrift import Thrift
-from thrift.transport import TSocket
-from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
-
-from tashi.util import getConfig
-
-(config, configFiles) = getConfig(["Client"])
-host = config.get('LocalityService', 'host')
-port = int(config.get('LocalityService', 'port'))
-
-socket = TSocket.TSocket(host, port)
-transport = TTransport.TBufferedTransport(socket)
-protocol = TBinaryProtocol.TBinaryProtocol(transport)
-client = localityservice.Client(protocol)
-transport.open()
-
-while True:
-	line1 = "\n"
-	line2 = "\n"
-	while line1 != "":
-		line1 = sys.stdin.readline()
-		if line1 == "":
-			sys.exit(0)
-		if line1 != "\n":
-			break
-	line1 = line1.strip()
-	while line2 != "":
-		line2 = sys.stdin.readline()
-		if line2 == "":
-			sys.exit(0)
-		if line2 != "\n":
-			break
-	line2 = line2.strip()
-
-	sources = line1.split(" ")
-	destinations = line2.split(" ")
-
-	mat = client.getHopCountMatrix(sources, destinations)
-	for r in mat:
-		for c in r:
-			print '%f\t'%c,
-		print '\n',
-	print '\n',
diff --git a/src/utils/nmd.c b/src/utils/nmd.c
deleted file mode 100644
index effa1d2..0000000
--- a/src/utils/nmd.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.    
- */
-
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <dirent.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <assert.h>
-
-#define SLEEP_INTERVAL 10
-#define TASHI_PATH "/usr/local/tashi/"
-#define LOG_FILE "/var/log/nodemanager.log"
-
-/* This function changes (on Linux!) its oom scoring, to make it
- * unattractive to kill
- */
-
-void make_invincible()
-{
-	int oom_adj_fd;
-	int r;
-
-	oom_adj_fd = open("/proc/self/oom_adj", O_WRONLY);
-	assert(oom_adj_fd != -1);
-	r = write(oom_adj_fd, "-17\n", 4);
-	assert(r == 4);
-	close(oom_adj_fd);
-
-}
-
-/* This function resets (on Linux!) its oom scoring to default
- */
-void make_vulnerable()
-{
-	int oom_adj_fd;
-	int r;
-
-	oom_adj_fd = open("/proc/self/oom_adj", O_WRONLY);
-	assert(oom_adj_fd != -1);
-	r = write(oom_adj_fd, "0\n", 2);
-	assert(r == 2);
-	close(oom_adj_fd);
-}
-
-int main(int argc, char **argv)
-{
-	char* env[2];
-	int status;
-	DIR* d;
-	int pid;
-	int lfd;
-	int foreground=0;
-
-/* If first argument is "-f", run in foreground */
-	if ((argc > 1) && (strncmp(argv[1], "-f", 3)==0)) {
-		foreground=1;
-	}
-/* If not running in foreground, fork off and exit the parent.
- * The child closes its default file descriptors.
- */
-	if (!foreground) {
-		pid = fork();
-		if (pid != 0) {
-			exit(0);
-		}
-		close(0);
-		close(1);
-		close(2);
-	}
-/* Adjust OOM preference */
-	make_invincible();
-/* Configure environment of children */
-	env[0] = "PYTHONPATH="TASHI_PATH"/src/";
-	env[1] = NULL;
-	while (1) {
-		pid = fork();
-		if (pid == 0) {
-			/* child */
-			/* nodemanagers are vulnerable. Not the supervisor. */
-			make_vulnerable();
-			if (!foreground) {
-				/* If not running fg, open log file */
-				lfd = open(LOG_FILE, O_WRONLY|O_APPEND|O_CREAT);
-				if (lfd < 0) {
-					/* If this failed, open something? */
-					lfd = open("/dev/null", O_WRONLY);
-				}
-				/* Make this fd stdout and stderr */
-				dup2(lfd, 2);
-				dup2(lfd, 1);
-				/* close stdin */
-				close(0);
-			}
-			chdir(TASHI_PATH);
-			/* start node manager with python environment */
-			execle("./bin/nodemanager.py", "./bin/nodemanager.py", NULL, env);
-			exit(-1);
-		}
-		/* sleep before checking for child's status */
-		sleep(SLEEP_INTERVAL);
-		/* catch child exiting and go through loop again */
-		waitpid(pid, &status, 0);
-	}	 /* while (1) */
-}
diff --git a/src/utils/nmd.py b/src/utils/nmd.py
index e74a82f..118aee8 100755
--- a/src/utils/nmd.py
+++ b/src/utils/nmd.py
@@ -16,9 +16,10 @@
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
- * under the License.    
+ * under the License.	 
  */
 """
+# XXXstroucki: why not use something like supervise instead?
 
 import os
 import sys
@@ -36,81 +37,81 @@
  */
 """
 def make_invincible():
-   # dependent on linux
-   try:
-      oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
-   except IOError:
-      pass
-   else:
-      os.write(oom_adj_fd, "-17\n")
-      os.close(oom_adj_fd)
+	# dependent on linux
+	try:
+		oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
+	except IOError:
+		pass
+	else:
+		os.write(oom_adj_fd, "-17\n")
+		os.close(oom_adj_fd)
 
 """
 /* This function resets (on Linux!) its oom scoring to default
  */
 """
 def make_vulnerable():
-   # dependent on linux
-   try:
-      oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
-   except IOError:
-      pass
-   else:
-      os.write(oom_adj_fd, "0\n")
-      os.close(oom_adj_fd)
+	# dependent on linux
+	try:
+		oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
+	except IOError:
+		pass
+	else:
+		os.write(oom_adj_fd, "0\n")
+		os.close(oom_adj_fd)
 
 def main(argv=None):
-   if argv is None:
-      argv = sys.argv
-   try:
-      opts, args = getopt.getopt(argv[1:], "f", ["foreground"])
-   except getopt.GetoptError, err:
-      # print help information and exit:
-      print str(err) # will print something like "option -a not recognized"
-      # usage()
-      return 2
-   foreground = False
-   for o, a in opts:
-      if o in ("-f", "--foreground"):
-         foreground = True
-      else:
-         assert False, "unhandled option"
-   if foreground == False:
-      pid = os.fork();
-      if pid != 0:
-         os._exit(0)
-      os.close(0)
-      os.close(1)
-      os.close(2)
+	if argv is None:
+		argv = sys.argv
+	try:
+		opts, args = getopt.getopt(argv[1:], "f", ["foreground"])
+	except getopt.GetoptError, err:
+		# print help information and exit:
+		print str(err) # will print something like "option -a not recognized"
+		# usage()
+		return 2
+	foreground = False
+	for o, a in opts:
+		if o in ("-f", "--foreground"):
+			foreground = True
+		else:
+			assert False, "unhandled option"
+	if foreground == False:
+		pid = os.fork();
+		if pid != 0:
+			os._exit(0)
+		os.close(0)
+		os.close(1)
+		os.close(2)
 
-   # adjust oom preference
-   make_invincible()
+	# adjust oom preference
+	make_invincible()
 
-   # configure environment of children
-   env = {"PYTHONPATH":TASHI_PATH+"/src"}
-   while True:
-      pid = os.fork();
-      if pid == 0:
-         # child
-         # nodemanagers are vulnerable, not the supervisor
-         make_vulnerable()
-         if foreground == False:
-            try:
-               lfd = os.open(LOG_FILE, os.O_APPEND|os.O_CREAT|os.O_WRONLY)
-            except IOError:
-               lfd = os.open("/dev/null", os.O_WRONLY)
-            # make this fd stdout and stderr
-            os.dup2(lfd, 1)
-            os.dup2(lfd, 2)
-            # close stdin
-            os.close(0)
-         os.chdir(TASHI_PATH)
-         os.execle("./bin/nodemanager.py", "./bin/nodemanager.py", env)
-         os._exit(-1)
-      # sleep before checking child status
-      time.sleep(SLEEP_INTERVAL)
-      os.waitpid(pid, 0)
-   return 0
+	# configure environment of children
+	env = {"PYTHONPATH":TASHI_PATH+"/src"}
+	while True:
+		pid = os.fork();
+		if pid == 0:
+			# child
+			# nodemanagers are vulnerable, not the supervisor
+			make_vulnerable()
+			if foreground == False:
+				try:
+					lfd = os.open(LOG_FILE, os.O_APPEND|os.O_CREAT|os.O_WRONLY)
+				except IOError:
+					lfd = os.open("/dev/null", os.O_WRONLY)
+				# make this fd stdout and stderr
+				os.dup2(lfd, 1)
+				os.dup2(lfd, 2)
+				# close stdin
+				os.close(0)
+			os.chdir(TASHI_PATH)
+			os.execle("./bin/nodemanager.py", "./bin/nodemanager.py", env)
+			os._exit(-1)
+		# sleep before checking child status
+		time.sleep(SLEEP_INTERVAL)
+		os.waitpid(pid, 0)
+	return 0
 
 if __name__ == "__main__":
-   sys.exit(main())
+	sys.exit(main())
diff --git a/src/zoni/agents/dhcpdns.py b/src/zoni/agents/dhcpdns.py
index ece9e29..48cc2b7 100644
--- a/src/zoni/agents/dhcpdns.py
+++ b/src/zoni/agents/dhcpdns.py
@@ -27,6 +27,7 @@
 #from instancehook import InstanceHook
 #from tashi.services.ttypes import Instance, NetworkConfiguration
 #from tashi import boolean
+from tashi.rpycservices.rpyctypes import Instance
 
 
 class DhcpDns():
diff --git a/src/zoni/bootstrap/bootstrapinterface.py b/src/zoni/bootstrap/bootstrapinterface.py
index 54b2ef8..35de879 100644
--- a/src/zoni/bootstrap/bootstrapinterface.py
+++ b/src/zoni/bootstrap/bootstrapinterface.py
@@ -18,10 +18,6 @@
 #  $Id$
 #
 
-import sys
-import os 
-import optparse
-
 
 class BootStrapInterface(object):
 	"""  Interface description for booting
diff --git a/src/zoni/bootstrap/pxe.py b/src/zoni/bootstrap/pxe.py
index 34c13f4..37d8c25 100644
--- a/src/zoni/bootstrap/pxe.py
+++ b/src/zoni/bootstrap/pxe.py
@@ -22,7 +22,6 @@
 import sys
 import string
 import datetime
-import subprocess
 import MySQLdb
 import traceback
 import logging
diff --git a/src/zoni/client/zoni-cli.py b/src/zoni/client/zoni-cli.py
index 060ec16..9ddf52f 100755
--- a/src/zoni/client/zoni-cli.py
+++ b/src/zoni/client/zoni-cli.py
@@ -20,43 +20,36 @@
 #
 #  $Id$
 #
-import os
-import sys
+
 import optparse
 import socket
 import logging.config
 import getpass
+import os
+import sys
 import re
+import string
 import subprocess
 
-
-
 #from zoni import *
 #from zoni.data.resourcequerysql import ResourceQuerySql
-import zoni
-from zoni.data.resourcequerysql import *
+#import zoni
+#from zoni.data.resourcequerysql import *
 
-from zoni.data.usermanagementinterface import UserManagementInterface
-from zoni.data.usermanagementinterface import UserManagementInterface
-
-from zoni.bootstrap.bootstrapinterface import BootStrapInterface
 from zoni.bootstrap.pxe import Pxe
 
-from zoni.hardware.systemmanagementinterface import SystemManagementInterface
 from zoni.hardware.ipmi import Ipmi
-from zoni.hardware.hwswitchinterface import HwSwitchInterface
 from zoni.hardware.dellswitch import HwDellSwitch
 from zoni.hardware.raritanpdu import raritanDominionPx
 from zoni.hardware.delldrac import dellDrac
+import zoni.hardware.systemmanagement
+from zoni.data import usermanagement
 from zoni.agents.dhcpdns import DhcpDns
 
-from zoni.hardware.systemmanagement import SystemManagement
+from zoni.extra.util import validIp, validMac 
+from zoni.version import version, revision
 
-
-from zoni.extra.util import * 
-from zoni.version import *
-
-from tashi.util import instantiateImplementation, signalHandler
+from tashi.util import instantiateImplementation, getConfig
 #import zoni.data.usermanagement 
 #from usermanagement import UserManagement
 
@@ -74,7 +67,7 @@
 	(configs, configFiles) = getConfig()
 
 	logging.config.fileConfig(configFiles)
-	log = logging.getLogger(os.path.basename(__file__))
+	#log = logging.getLogger(os.path.basename(__file__))
 	#logit(configs['logFile'], "Starting Zoni client")
 	#logit(configs['logFile'], "Loading config file")
 
@@ -359,11 +352,11 @@
 	if (options.nodeName):
 		cmdargs["sys_id"] = options.nodeName
 
-	if 	(options.numCores or options.clockSpeed or options.numMemory or options.numProcs or options.cpuFlags) and not options.showResources:
-			usage = "MISSING OPTION: When specifying hardware parameters, you need the -s or --showResources switch"
-			print usage
-			parser.print_help()	
-			exit()
+	if (options.numCores or options.clockSpeed or options.numMemory or options.numProcs or options.cpuFlags) and not options.showResources:
+		usage = "MISSING OPTION: When specifying hardware parameters, you need the -s or --showResources switch"
+		print usage
+		parser.print_help()	
+		exit()
 
 	if options.getResources:
 		print "ALL resources"
@@ -464,7 +457,7 @@
 			userId = usermgt.getUserId(options.userName)
 
 		if userId:
-			reservationId = reservation.createReservation(userId, options.reservationDuration, options.myNotes + " " + str(string.join(args[0:len(args)])))
+			__reservationId = reservation.createReservation(userId, options.reservationDuration, options.myNotes + " " + str(string.join(args[0:len(args)])))
 
 		else:
 			print "user doesn't exist"
@@ -771,7 +764,7 @@
 				try:
 					socket.gethostbyname(hostName)
 					sys.stdout.write("[Success]\n")
-				except Exception, e:
+				except Exception:
 					sys.stdout.write("[Fail]\n")
 			else:
 				mesg = "ERROR:  Malformed IP Address\n"
@@ -794,7 +787,7 @@
 				try:
 					socket.gethostbyname(hostName)
 					sys.stdout.write("[Fail]\n")
-				except Exception, e:
+				except Exception:
 					sys.stdout.write("[Success]\n")
 			if options.removeDhcp:	
 				dhcpdns.removeDhcp(hostName)
diff --git a/src/zoni/data/infostore.py b/src/zoni/data/infostore.py
index bd389c2..7651b43 100644
--- a/src/zoni/data/infostore.py
+++ b/src/zoni/data/infostore.py
@@ -18,10 +18,6 @@
 #  $Id$
 #
 
-import sys
-import os 
-import optparse
-
 
 class InfoStore (object):
 	"""  Interface description for query system resources
diff --git a/src/zoni/data/reservation.py b/src/zoni/data/reservation.py
index 3c53ec1..8a43fee 100644
--- a/src/zoni/data/reservation.py
+++ b/src/zoni/data/reservation.py
@@ -18,7 +18,6 @@
 #  $Id:$ 
 #
 
-import sys
 import os 
 import string
 import logging
@@ -88,9 +87,9 @@
 	def delReservation (self, userId):
 		raise NotImplementedError
 	
-	def defineReservation():
+	def defineReservation(self):
 		raise NotImplementedError
 
-	def showReservation():
+	def showReservation(self):
 		raise NotImplementedError
 
diff --git a/src/zoni/data/reservationmanagementinterface.py b/src/zoni/data/reservationmanagementinterface.py
index 1f90bc7..620d15e 100644
--- a/src/zoni/data/reservationmanagementinterface.py
+++ b/src/zoni/data/reservationmanagementinterface.py
@@ -18,9 +18,6 @@
 #  $Id:$
 #
 
-import sys
-import os 
-
 
 class ReservationManagementInterface(object):
 	"""  Interface description for reservation management
@@ -50,9 +47,9 @@
 	def delReservation (self, userId):
 		raise NotImplementedError
 	
-	def defineReservation():
+	def defineReservation(self):
 		raise NotImplementedError
 
-	def showReservation():
+	def showReservation(self):
 		raise NotImplementedError
 
diff --git a/src/zoni/data/resourcequerysql.py b/src/zoni/data/resourcequerysql.py
index 2beafd4..4ea3ed3 100644
--- a/src/zoni/data/resourcequerysql.py
+++ b/src/zoni/data/resourcequerysql.py
@@ -22,16 +22,13 @@
 import sys
 import string
 import MySQLdb
-import subprocess
 import traceback
 import logging
-import threading
 import time
 
 import usermanagement
 from zoni.data.infostore import InfoStore
-from zoni.extra.util import checkSuper, createKey
-from zoni.agents.dhcpdns import DhcpDns
+from zoni.extra.util import createKey
 
 class ResourceQuerySql(InfoStore):
 	def __init__(self, config, verbose=None):
@@ -80,7 +77,7 @@
 		vlans = []
 		for val in vlanInfo.split(","):
 			try:
-				ret = self.getVlanId(val.split(":")[0])
+				__ret = self.getVlanId(val.split(":")[0])
 				vlans.append(val)
 			except Exception, e:
 				print e
@@ -96,7 +93,7 @@
 		domainKey = createKey(name)
 		query = "insert into domaininfo (domain_name, domain_desc, domain_key) values ('%s','%s', '%s')" % (name, desc, domainKey)
 		try:
-			result = self.insertDb(query)
+			__result = self.insertDb(query)
 			mesg = "Adding domain %s(%s)" % (name, desc)
 			self.log.info(mesg)
 		except Exception, e:
@@ -112,7 +109,7 @@
 			vlanType = i.split(":")[1]
 			query = "insert into domainmembermap values (%d, %d, '%s')" % (domainId, vlanId, vlanType)
 			try:
-				result = self.insertDb(query)
+				__result = self.insertDb(query)
 			except Exception, e:
 				print e
 		
@@ -133,16 +130,16 @@
 		mesg = "Removing domain %s" % (name)
 		self.log.info(mesg)
 		query = "delete from domaininfo where domain_name = '%s'" % (name)
-		result = self.__deleteDb(query)
+		__result = self.__deleteDb(query)
 		#  Need to remove any vlans attached to this domain
 		query = "delete from domainmembermap where domain_id = '%s'" % (domainId)
-		result = self.__deleteDb(query)
+		__result = self.__deleteDb(query)
 
 	def showDomains(self):
 		usermgt = eval("usermanagement.%s" % (self.config['userManagement']) + "()")
 		query = "select r.reservation_id, r.user_id, d.domain_name, d.domain_desc from domaininfo d, allocationinfo a, reservationinfo r where d.domain_id = a.domain_id and a.reservation_id = r.reservation_id"
 		result = self.selectDb(query)
-		desc = result.description
+		#desc = result.description
 		if result.rowcount > 0:
 			print "%s\t%s\t%s\t%s" % (result.description[0][0], result.description[1][0], result.description[2][0], result.description[3][0])
 			print "------------------------------------------------------------"
@@ -173,7 +170,7 @@
 			return -1
 		query = "insert into vlaninfo (vlan_num, vlan_desc) values ('%s','%s')" % (vnumber, desc)
 		try:
-			result = self.insertDb(query)
+			__result = self.insertDb(query)
 			mesg = "Adding vlan %s(%s)" % (vnumber, desc)
 			self.log.info(mesg)
 		except Exception, e:
@@ -290,8 +287,6 @@
 		
 
 	def showResources(self, cmdargs):
-
-		queryopt = ""
 		defaultFields = "mac_addr, location, num_procs, num_cores, clock_speed, mem_total "
 		#defaultFields = "*"
 		
@@ -315,20 +310,21 @@
 		#  header
 		print line
 
-		sum = {}
+		_sum = {}
 		for row in result.fetchall():
 			line = ""
-			sum['totProc'] = sum.get('totProc', 0)
-			sum['totProc'] += int(row[2])
-			sum['totCores'] = sum.get('totCores', 0)
-			sum['totCores'] += int(row[3])
-			sum['totMemory'] = sum.get('totMemory', 0)
-			sum['totMemory'] += int(row[5])
+			_sum['totProc'] = _sum.get('totProc', 0)
+			_sum['totProc'] += int(row[2])
+			_sum['totCores'] = _sum.get('totCores', 0)
+			_sum['totCores'] += int(row[3])
+			_sum['totMemory'] = _sum.get('totMemory', 0)
+			_sum['totMemory'] += int(row[5])
 			for val in row:
 				line += str(val).center(20)
 			
 			print line
-		print "\n%s systems registered -  %d procs | %d cores | %d bytes RAM" % (str(result.rowcount), sum['totProc'], sum['totCores'], sum['totMemory'],)
+		print "\n%s systems registered -  %d procs | %d cores | %d bytes RAM" % \
+			(str(result.rowcount), _sum['totProc'], _sum['totCores'], _sum['totMemory'],)
 
 	def getAvailableResources(self):
 		#  Maybe should add a status flag?
@@ -513,7 +509,7 @@
 		result = self.selectDb(query)
 		
 		print "NODE ALLOCATION\n"
-		sum = {}
+		_sum = {}
 		if self.verbose:
 			print "%-5s%-10s%-10s%-10s%-13s%-12s%-10s%-34s%-20s%s" % ("Res", "User", "Host", "Domain", "Cores/Mem","Expiration", "Hostname", "Boot Image Name", "Vlan Member", "Notes")
 		else:
@@ -538,10 +534,10 @@
 			allocation_id = i[11]
 			userName = usermgt.getUserName(uid)
 			combined_notes = str(rnotes) + "|" + str(anotes)
-			sum['totCores'] = sum.get('totCores', 0)
-			sum['totCores'] += cores
-			sum['totMemory'] = sum.get('totMemory', 0)
-			sum['totMemory'] += memory
+			_sum['totCores'] = _sum.get('totCores', 0)
+			_sum['totCores'] += cores
+			_sum['totMemory'] = _sum.get('totMemory', 0)
+			_sum['totMemory'] += memory
 			if self.verbose:
 				query = "select v.vlan_num, m.vlan_type from vlaninfo v, vlanmembermap m where v.vlan_id = m.vlan_id and allocation_id = '%d' order by vlan_num asc" % allocation_id
 				vlanRes = self.selectDb(query)
@@ -555,7 +551,7 @@
 				print "%-5s%-10s%-10s%-10s%-2s/%-10s%-12s%-10s%-34s%-20s%s" % (resId, userName, host, domain, cores, memory,expire, hostname, image_name, vlanMember,combined_notes)
 			else:
 				print "%-10s%-10s%-10s%-2s/%-10s%-12s%s" % (userName, host, domain, cores, memory,expire, combined_notes)
-		print "\n%s systems allocated - %d cores| %d bytes RAM" % (str(result.rowcount), sum['totCores'], sum['totMemory'])
+		print "\n%s systems allocated - %d cores| %d bytes RAM" % (str(result.rowcount), _sum['totCores'], _sum['totMemory'])
 
 	def showReservation(self, userId=None):
 		#from IPython.Shell import IPShellEmbed
@@ -617,7 +613,7 @@
 		query = "select image_name from imageinfo"
 		result = self.selectDb(query)
 		row = result.fetchall()
-		desc = result.description
+		#desc = result.description
 
 		imagelist = []
 		for i in row:
@@ -630,7 +626,7 @@
 		query = "select image_name, dist, dist_ver  from imageinfo"
 		result = self.selectDb(query)
 		row = result.fetchall()
-		desc = result.description
+		#desc = result.description
 
 		for i in row:
 			print i
@@ -805,7 +801,7 @@
 			cursor.execute (query)
 			self.conn.commit()
 			row = cursor.fetchall()
-			desc = cursor.description
+			#desc = cursor.description
 		except MySQLdb.OperationalError, e:
 			msg = "%s : %s" % (e[1], query)
 			self.log.error(msg)
@@ -886,12 +882,12 @@
 		return cursor
 
 
-	def updateReservation (self, reservationId, userId=None, reservationDuration=None, vlanIsolate=None, allocationNotes=None):
+	def updateReservation (self, reservationId, userId=None, resDuration=None, vlanIsolate=None, allocationNotes=None):
 
 		mesg = "Updating reservation %s" % (str(reservationId))
 		self.log.info(mesg)
 
-		if reservationDuration:
+		if resDuration:
 			if len(resDuration) == 8:
 				expireDate = resDuration
 			elif len(resDuration) < 4:
@@ -906,7 +902,7 @@
 
 			mesg = "Updating reservationDuration :" + resDuration
 			self.log.info(mesg)
-			query = "update reservationinfo set reservation_exiration = \"" + expireDate_ + "\" where reservation_id = \"" + str(reservationId) + "\""
+			query = "update reservationinfo set reservation_expiration = \"" + expireDate + "\" where reservation_id = \"" + str(reservationId) + "\""
 			self.__updateDb(query)
 
 		if allocationNotes:
@@ -1062,7 +1058,7 @@
 		vId = self.getVlanId(v)
 		query = "delete from vlanmembermap where allocation_id = '%s' and vlan_id = '%s'" % (allocationId, vId)
 
-		result = self.insertDb(query)
+		__result = self.insertDb(query)
 		mesg = "Removing vlan %s from node %s" % (v, nodeName)
 		self.log.info(mesg)
 
@@ -1153,7 +1149,7 @@
 		#  imagemap db should be sys_id instead of mac_addr
 		#  change later
 
-		cur_image = host['pxe_image_name']
+		#cur_image = host['pxe_image_name']
 		#  Get the id of the new image
 		query = "select image_id from imageinfo where image_name = " + "\"" + image + "\""
 		row = self.__queryDb(query)
@@ -1234,7 +1230,7 @@
 		return cap
 
 	#  print out data in a consistent format
-	def __showIt(data):
+	def __showIt(self, data):
 		pass
 
 	
diff --git a/src/zoni/data/usermanagement.py b/src/zoni/data/usermanagement.py
index 4ccbd17..fad0c4e 100644
--- a/src/zoni/data/usermanagement.py
+++ b/src/zoni/data/usermanagement.py
@@ -18,7 +18,6 @@
 #  $Id$
 #
 
-import sys
 import os 
 
 from usermanagementinterface import UserManagementInterface
diff --git a/src/zoni/data/usermanagementinterface.py b/src/zoni/data/usermanagementinterface.py
index ec34331..c7386f1 100644
--- a/src/zoni/data/usermanagementinterface.py
+++ b/src/zoni/data/usermanagementinterface.py
@@ -18,9 +18,6 @@
 #  $Id$
 #
 
-import sys
-import os 
-
 
 class UserManagementInterface(object):
 	"""  Interface description for user management
diff --git a/src/zoni/extra/util.py b/src/zoni/extra/util.py
index 54fc6d0..0381823 100644
--- a/src/zoni/extra/util.py
+++ b/src/zoni/extra/util.py
@@ -19,6 +19,7 @@
 #
 
 import os
+import sys
 import string
 import ConfigParser
 import time
@@ -26,7 +27,6 @@
 import re
 import threading
 import subprocess
-import logging
 
 def loadConfigFile(parser):
 	#parser = ConfigParser.ConfigParser()
@@ -222,32 +222,43 @@
 	f.close()
 	return val
 	
-
-
+def __getShellFn():
+	try:
+		from IPython.Shell import IPShellEmbed
+		return (1, IPShellEmbed)
+	except ImportError:
+		import IPython
+		return (2, IPython.embed)
 
 def debugConsole(globalDict):
 	"""A debugging console that optionally uses pysh"""
 	def realDebugConsole(globalDict):
 		try :
 			import atexit
-			from IPython.Shell import IPShellEmbed
+			(calltype, shellfn) = __getShellFn()
 			def resetConsole():
 # XXXpipe: make input window sane
-				(stdin, stdout) = os.popen2("reset")
+				(__stdin, stdout) = os.popen2("reset")
 				stdout.read()
-			dbgshell = IPShellEmbed()
 			atexit.register(resetConsole)
-			dbgshell(local_ns=globalDict, global_ns=globalDict)
-		except Exception:
+			if calltype == 1:
+				dbgshell=shellfn(user_ns=globalDict)
+				dbgshell()
+			elif calltype == 2:
+				dbgshell=shellfn
+				dbgshell(user_ns=globalDict)
+		except Exception, e:
 			CONSOLE_TEXT=">>> "
-			input = " "
-			while (input != ""):
+			inputline = " " 
+			while (inputline != ""):
 				sys.stdout.write(CONSOLE_TEXT)
-				input = sys.stdin.readline()
+				inputline = sys.stdin.readline()
 				try:
-					exec(input) in globalDict
+					exec(inputline) in globalDict
 				except Exception, e:
 					sys.stdout.write(str(e) + "\n")
+
+		os._exit(0)
+
 	if (os.getenv("DEBUG", "0") == "1"):
 		threading.Thread(target=lambda: realDebugConsole(globalDict)).start()
-
diff --git a/src/zoni/hardware/apcswitchedrackpdu.py b/src/zoni/hardware/apcswitchedrackpdu.py
index 7c8ba9d..cf06725 100644
--- a/src/zoni/hardware/apcswitchedrackpdu.py
+++ b/src/zoni/hardware/apcswitchedrackpdu.py
@@ -18,8 +18,6 @@
 #  $Id$
 #
 
-import sys
-import os 
 import warnings
 warnings.filterwarnings("ignore")
 
@@ -46,7 +44,7 @@
 
 	def getPowerStatus(self):
 		thisoid = eval(str(self.oid_status) + "," + str(self.port))
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', self.user, 0), \
 		cmdgen.UdpTransportTarget((self.pdu_name, 161)), thisoid)
 		output = varBinds[0][1]
@@ -81,7 +79,7 @@
 
 	def powerOn(self):
 		thisoid = eval(str(self.oid_status) + "," + str(self.port)) 
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+		__errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
 		cmdgen.CommunityData('my-agent', self.user, 1), \
 		cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
 		(thisoid, rfc1902.Integer('1')))
@@ -89,7 +87,7 @@
 
 	def powerOff(self):
 		thisoid = eval(str(self.oid_status) + "," + str(self.port)) 
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+		__errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
 		cmdgen.CommunityData('my-agent', self.user, 1), \
 		cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
 		(thisoid, rfc1902.Integer('2')))
diff --git a/src/zoni/hardware/delldrac.py b/src/zoni/hardware/delldrac.py
index 7cb189f..7560666 100644
--- a/src/zoni/hardware/delldrac.py
+++ b/src/zoni/hardware/delldrac.py
@@ -19,14 +19,13 @@
 #
 
 import sys
-import os 
 import pexpect
 import time
 import logging
 import tempfile
 
 from systemmanagementinterface import SystemManagementInterface
-from zoni.extra.util import timeF, log
+from zoni.extra.util import timeF
 
 
 class dellDrac(SystemManagementInterface):
@@ -116,19 +115,19 @@
 			
 		child = self.__login()
 		child.logfile = fout
-		cmd = "racadm serveraction -m " +  self.server + " powerup"
+		cmd = "racadm serveraction -m %s powerup" % (self.server)
 		child.sendline(cmd)
-		i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+		__i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
 		fout.seek(0)
-		self.log.info("Hardware power on : %s", self.hostname)
+		self.log.info("Hardware power on : %s" % self.hostname)
 		for val in fout.readlines():
 			if "OK" in val:
 				code = 1 
 			if "ALREADY POWER-ON" in val:
 				code = 1 
-				self.log.info("Hardware already powered on : %s", self.hostname)
+				self.log.info("Hardware already powered on : %s" % self.hostname)
 		if code < 1:
-			self.log.info("Hardware power on failed : %s", self.hostname)
+			self.log.info("Hardware power on failed : %s" % self.hostname)
 		fout.close()
 		child.terminate()
 		return code
@@ -139,15 +138,15 @@
 		fout = tempfile.TemporaryFile()
 		child = self.__login()
 		child.logfile = fout
-		cmd = "racadm serveraction -m " + self.server + " powerdown"
+		cmd = "racadm serveraction -m %s powerdown" % (self.server)
 		child.sendline(cmd)
-		i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+		__i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
 		fout.seek(0)
-		self.log.info("Hardware power off : %s", self.hostname)
+		self.log.info("Hardware power off : %s" % self.hostname)
 		for val in fout.readlines():
 			if "OK" in val:
 				code = 1
- 			if "CURRENTLY POWER-OFF" in val:
+			if "CURRENTLY POWER-OFF" in val:
 				self.log.info("Hardware already power off : %s", self.hostname)
 				code = 1
 		if code < 1:
@@ -164,14 +163,14 @@
 		child.logfile = fout
 		cmd = "racadm serveraction -m " + self.server + " graceshutdown"
 		child.sendline(cmd)
-		i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+		__i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
 		fout.seek(0)
 		self.log.info("Hardware power off (soft): %s", self.hostname)
 
 		for val in fout.readlines():
 			if "OK" in val:
 				code = 1
- 			if "CURRENTLY POWER-OFF" in val:
+			if "CURRENTLY POWER-OFF" in val:
 				self.log.info("Hardware already power off : %s", self.hostname)
 				code = 1
 		if code < 1:
@@ -188,7 +187,7 @@
 		child.logfile = fout
 		cmd = "racadm serveraction -m " + self.server + " powercycle"
 		child.sendline(cmd)
-		i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+		__i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
 		fout.seek(0)
 		self.log.info("Hardware power cycle : %s", self.hostname)
 		for val in fout.readlines():
@@ -208,7 +207,7 @@
 		child.logfile = fout
 		cmd = "racadm serveraction -m " + self.server + " hardreset"
 		child.sendline(cmd)
-		i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+		__i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
 		fout.seek(0)
 		for val in fout.readlines():
 			if "OK" in val:
@@ -225,5 +224,5 @@
 		child = self.__login()
 		cmd = "connect -F " + self.server
 		child.sendline(cmd)
-		i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+		__i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
 		child.terminate()
diff --git a/src/zoni/hardware/dellswitch.py b/src/zoni/hardware/dellswitch.py
index 0ddf8aa..63f8b30 100644
--- a/src/zoni/hardware/dellswitch.py
+++ b/src/zoni/hardware/dellswitch.py
@@ -18,24 +18,22 @@
 #  $Id$
 #
 
-import os
-import sys
+
 import pexpect
 import datetime
-import time
 import thread
-import string
-import getpass
 import socket
 import tempfile
+import os
 import logging
+import sys
+import time
+import string
 
-#import zoni
-from zoni.data.resourcequerysql import *
 from zoni.hardware.hwswitchinterface import HwSwitchInterface
 from zoni.data.resourcequerysql import ResourceQuerySql
 from zoni.agents.dhcpdns import DhcpDns
-from zoni.extra.util import *
+from zoni.extra.util import normalizeMac
 
 
 '''  Using pexpect to control switches because couldn't get snmp to work 
@@ -54,7 +52,7 @@
 			pass
 
 
- 	def setVerbose(self, verbose):
+	def setVerbose(self, verbose):
 		self.verbose = verbose
 
 	def __login(self):
@@ -261,10 +259,10 @@
 				i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
 				i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
 				
-			except EOF:
+			except pexpect.EOF:
 				print "EOF", i
 				#child.sendline()
-			except TIMEOUT:
+			except pexpect.TIMEOUT:
 				print "TIMEOUT", i
 		#child.interact(escape_character='\x1d', input_filter=None, output_filter=None)
 
@@ -463,16 +461,16 @@
 		child = self.__login()
 		cmd = "copy running-config startup-config"
 		child.sendline(cmd)
-		i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
+		__i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
 		child.sendline("y")
 		child.terminate()
 
-	def __saveConfig(self):
-		cmd = "copy running-config startup-config"
-		child.sendline(cmd)
-		i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
-		child.sendline("y")
-		child.terminate()
+#	def __saveConfig(self):
+#		cmd = "copy running-config startup-config"
+#		child.sendline(cmd)
+#		__i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
+#		child.sendline("y")
+#		child.terminate()
 
 	
 	def registerToZoni(self, user, password, host):
@@ -511,7 +509,7 @@
 		child.sendline(cmd)
 		val = host + "#"
 		tval = host + ">"
-		i = child.expect([val, tval, '\n\r\n\r', "--More--",  pexpect.EOF, pexpect.TIMEOUT])
+		__i = child.expect([val, tval, '\n\r\n\r', "--More--",  pexpect.EOF, pexpect.TIMEOUT])
 		cmd = "show version"
 		child.sendline(cmd)
 		i = child.expect([val, tval, '\n\r\n\r', pexpect.EOF, pexpect.TIMEOUT])
@@ -547,19 +545,19 @@
 
 		user = "public"
 		oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,1,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		a['hw_model'] = str(varBinds[0][1])
 
 		oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,3,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		a['hw_make'] = str(varBinds[0][1])
 
 		oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,4,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		a['hw_version_sw'] = str(varBinds[0][1])
diff --git a/src/zoni/hardware/f10s50switch.py b/src/zoni/hardware/f10s50switch.py
index 81c08ff..2641274 100644
--- a/src/zoni/hardware/f10s50switch.py
+++ b/src/zoni/hardware/f10s50switch.py
@@ -26,15 +26,13 @@
 import time
 import thread
 import string
-import getpass
 import socket
 import tempfile
 import logging
 
 #import zoni
-from zoni.data.resourcequerysql import *
-from zoni.hardware.hwswitchinterface import HwSwitchInterface
 from zoni.data.resourcequerysql import ResourceQuerySql
+from zoni.hardware.hwswitchinterface import HwSwitchInterface
 from zoni.agents.dhcpdns import DhcpDns
 
 
@@ -49,7 +47,7 @@
 		self.log = logging.getLogger(os.path.basename(__file__))
 
 
- 	def setVerbose(self, verbose):
+	def setVerbose(self, verbose):
 		self.verbose = verbose
 
 	def __login(self):
@@ -138,7 +136,7 @@
 		child.expect(["conf-if", pexpect.EOF])
 		child.sendline("switchport")
 		child.sendline("exit")
-		child.sendline("interface vlan " + vlan")
+		child.sendline("interface vlan %s" % vlan)
 		child.expect(["conf-if", pexpect.EOF])
 		cmd = "tagged port-channel 1"
 		child.sendline(cmd)
@@ -214,10 +212,10 @@
 				i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
 				i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
 				
-			except EOF:
+			except pexpect.EOF:
 				print "EOF", i
 				#child.sendline()
-			except TIMEOUT:
+			except pexpect.TIMEOUT:
 				print "TIMEOUT", i
 		#child.interact(escape_character='\x1d', input_filter=None, output_filter=None)
 
@@ -237,7 +235,7 @@
 		child = self.__login()
 		child.logfile = sys.stdout
 		child.sendline('config')
-		cmd = "interface vlan " + vlan)
+		cmd = "interface vlan %s" % (vlan)
 		child.sendline(cmd)
 		i=child.expect(['conf-if', pexpect.EOF, pexpect.TIMEOUT])
 		if i > 0:
@@ -270,7 +268,7 @@
 		child.logfile = sys.stdout
 		cmd = "show interfaces g 0/" + str(self.host['hw_port'])
 		child.sendline(cmd)
-		i = child.expect(['#', pexpect.EOF, pexpect.TIMEOUT])
+		__i = child.expect(['#', pexpect.EOF, pexpect.TIMEOUT])
 		child.terminate()
 
 	def interactiveSwitchConfig(self):
@@ -374,12 +372,12 @@
 
 		user = "public"
 		oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,1,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		a['hw_model'] = str(varBinds[0][1])
 		oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,3,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		a['hw_make'] = str(varBinds[0][1])
diff --git a/src/zoni/hardware/hpilo.py b/src/zoni/hardware/hpilo.py
index 9523b4b..b02a1ec 100644
--- a/src/zoni/hardware/hpilo.py
+++ b/src/zoni/hardware/hpilo.py
@@ -19,7 +19,6 @@
 #
 
 import sys
-import os 
 import pexpect
 import time
 
@@ -29,6 +28,26 @@
 #XXX  Need to add more error checking!
 #XXX  Need to consider difference in responses between a rackmount server and a blade server - MIMOS
 
+def log(f):
+	def myF(*args, **kw):
+		print "calling %s%s" % (f.__name__, str(args))
+		res = f(*args, **kw)
+		print "returning from %s -> %s" % (f.__name__, str(res))
+		return res
+	myF.__name__ = f.__name__
+	return myF
+
+def timeF(f):
+	def myF(*args, **kw):
+		start = time.time()
+		res = f(*args, **kw)
+		end = time.time()
+		print "%s took %f" % (f.__name__, end-start)
+		return res
+	myF.__name__ = f.__name__
+	return myF
+
+
 class hpILo(SystemManagementInterface):
 	def __init__(self, config, nodeName, hostInfo):
 		self.config = config
diff --git a/src/zoni/hardware/hpswitch.py b/src/zoni/hardware/hpswitch.py
index ada83b9..cfb7195 100644
--- a/src/zoni/hardware/hpswitch.py
+++ b/src/zoni/hardware/hpswitch.py
@@ -25,13 +25,11 @@
 import pexpect
 import datetime
 import thread
-import time
 import threading
 import logging
 
 
 from hwswitchinterface import HwSwitchInterface
-from resourcequerysql import ResourceQuerySql
 
 
 class HwHPSwitch(HwSwitchInterface):
@@ -74,10 +72,10 @@
 		child.sendline(cmd)
 		opt = child.expect(["Confirm(.*)", "No save(.*)", pexpect.EOF, pexpect.TIMEOUT])
 		if opt == 0:
-				print "saving to flash"
-				child.sendline("y\n")
+			print "saving to flash"
+			child.sendline("y\n")
 		if opt == 1:
-				print "no save needed"
+			print "no save needed"
 		child.sendline('exit')
 		child.terminate()
 
@@ -169,10 +167,10 @@
 				i=child.expect(['console','sw', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
 				i=child.expect(['console','sw', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
 
-			except EOF:
+			except pexpect.EOF:
 				print "EOF", i
 				#child.sendline()
-			except TIMEOUT:
+			except pexpect.TIMEOUT:
 				print "TIMEOUT", i
 		#child.interact(escape_character='\x1d', input_filter=None, output_filter=None)
 
@@ -245,7 +243,7 @@
 		cmd = "/info/port " + str(self.host['hw_port'])
 		child.sendline(cmd)
 		child.logfile = sys.stdout
-		opt = child.expect(['Info(.*)', pexpect.EOF, pexpect.TIMEOUT])
+		__opt = child.expect(['Info(.*)', pexpect.EOF, pexpect.TIMEOUT])
 
 	#  this needs to be removed or rewritten
 	def interactiveSwitchConfig(self):
diff --git a/src/zoni/hardware/hwswitchinterface.py b/src/zoni/hardware/hwswitchinterface.py
index d0ed99a..bdcdb51 100644
--- a/src/zoni/hardware/hwswitchinterface.py
+++ b/src/zoni/hardware/hwswitchinterface.py
@@ -18,8 +18,6 @@
 #  $Id$
 #
 
-import sys
-import os 
 
 class HwSwitchInterface(object):
 	"""  Interface description for hardware switches
diff --git a/src/zoni/hardware/ipmi.py b/src/zoni/hardware/ipmi.py
index 649b8a2..abc4bf0 100644
--- a/src/zoni/hardware/ipmi.py
+++ b/src/zoni/hardware/ipmi.py
@@ -18,11 +18,8 @@
 #  $Id$
 #
 
-import sys
-import os 
 import subprocess
 import logging
-import string
 
 from systemmanagementinterface import SystemManagementInterface
 
diff --git a/src/zoni/hardware/raritanpdu.py b/src/zoni/hardware/raritanpdu.py
index 3d534b7..7cb3961 100644
--- a/src/zoni/hardware/raritanpdu.py
+++ b/src/zoni/hardware/raritanpdu.py
@@ -18,19 +18,19 @@
 #  $Id$
 #
 
-import sys
-import os 
-import string
 import warnings
 import logging
+import string
+import sys
 import time
+
 warnings.filterwarnings("ignore")
 
 from pysnmp.entity.rfc3413.oneliner import cmdgen
 from pysnmp.proto import rfc1902
-from zoni.data.resourcequerysql import *
+from zoni.data.resourcequerysql import ResourceQuerySql
 from zoni.hardware.systemmanagementinterface import SystemManagementInterface
-
+from zoni.agents.dhcpdns import DhcpDns
 
 #class systemmagement():
 	#def __init__(self, proto):
@@ -90,7 +90,7 @@
 	'''
 	def getOffset(self):
 		thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(0))
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', self.user, 0), \
 		cmdgen.UdpTransportTarget((self.pdu_name, 161)), thisoid)
 		output = varBinds[0][1]
@@ -102,7 +102,7 @@
 
 	def __setPowerStatus(self):
 		thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(self.port))
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', self.user, 0), \
 		cmdgen.UdpTransportTarget((self.pdu_name, 161)), thisoid)
 		output = varBinds[0][1]
@@ -134,7 +134,7 @@
 
 	def powerOn(self):
 		thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(self.port)) 
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+		__errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
 		cmdgen.CommunityData('my-agent', self.user, 1), \
 		cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
 		(thisoid, rfc1902.Integer('1')))
@@ -142,7 +142,7 @@
 
 	def powerOff(self):
 		thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(self.port)) 
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+		__errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
 		cmdgen.CommunityData('my-agent', self.user, 1), \
 		cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
 		(thisoid, rfc1902.Integer('0')))
@@ -181,7 +181,7 @@
 
 		a={}
 		oid = eval(str("1,3,6,1,2,1,1,1,0"))
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 
@@ -193,7 +193,7 @@
 		a['hw_make'] = str(varBinds[0][1])
 
 		oid = eval("1,3,6,1,4,1,13742,4,1,1,6,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		x = []
@@ -204,7 +204,7 @@
 		a['hw_mac'] = ":".join(['%s' % d for d in x])
 
 		oid = eval("1,3,6,1,4,1,13742,4,1,1,2,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		serial = str(varBinds[0][1])
@@ -214,13 +214,13 @@
 		a['hw_notes'] = val + "; Serial " + serial
 
 		oid = eval("1,3,6,1,4,1,13742,4,1,1,1,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		a['hw_version_fw'] = str(varBinds[0][1])
 
 		oid = eval("1,3,6,1,4,1,13742,4,1,1,12,0")
-		errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+		__errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
 		cmdgen.CommunityData('my-agent', user, 0), \
 		cmdgen.UdpTransportTarget((host, 161)), oid)
 		a['hw_model'] = str(varBinds[0][1])
diff --git a/src/zoni/hardware/systemmanagement.py b/src/zoni/hardware/systemmanagement.py
index 3333a3f..873e1bf 100644
--- a/src/zoni/hardware/systemmanagement.py
+++ b/src/zoni/hardware/systemmanagement.py
@@ -19,13 +19,11 @@
 #  $Id$  
 #
 
-import sys
-import os 
 import logging
 import threading
+import time
 
 from systemmanagementinterface import SystemManagementInterface
-from zoni.data.resourcequerysql import *
 
 from tashi.util import instantiateImplementation
 
@@ -41,7 +39,7 @@
 
 
 	def getInfo(self, nodeName):
-		self.host = self.data.getHostInfo(node)
+		self.host = self.data.getHostInfo(nodeName)
 		
 		
 	def setVerbose(self, verbose):
@@ -65,7 +63,7 @@
 		#  [2] = hw method password
 		success = 0
 		for i in hw:
-			inst = instantiateImplementation(self.config['hardwareControl'][i[0]]['class'], self.config, nodeName, self.host)
+			__inst = instantiateImplementation(self.config['hardwareControl'][i[0]]['class'], self.config, nodeName, self.host)
 			a = "inst.%s" % mycmd
 			for count in range(retries):
 				doit = eval(a)
@@ -89,7 +87,7 @@
 	def softPowerConfirm(self, method, nodeName):
 		#  using a sleep for now...
 		time.sleep(30)
-		inst = instantiateImplementation(self.config['hardwareControl'][method]['class'], self.config, nodeName, self.host)
+		__inst = instantiateImplementation(self.config['hardwareControl'][method]['class'], self.config, nodeName, self.host)
 		mycmd = "%s()" % ("powerOff")
 		a = "inst.%s" % mycmd
 		doit = eval(a)
diff --git a/src/zoni/hardware/systemmanagementinterface.py b/src/zoni/hardware/systemmanagementinterface.py
index 6d50a09..0d7901b 100644
--- a/src/zoni/hardware/systemmanagementinterface.py
+++ b/src/zoni/hardware/systemmanagementinterface.py
@@ -18,9 +18,6 @@
 #  $Id$
 #
 
-import sys
-import os 
-
 
 class SystemManagementInterface(object):
 	"""  Interface description for hardware management controllers 
diff --git a/src/zoni/install/db/zoniDbSetup.py b/src/zoni/install/db/zoniDbSetup.py
index 88998b4..5049046 100644
--- a/src/zoni/install/db/zoniDbSetup.py
+++ b/src/zoni/install/db/zoniDbSetup.py
@@ -20,15 +20,13 @@
 
 import os 
 import sys
-import string
 try:
 	import MySQLdb
-	import traceback
 	import optparse
 	import getpass
 except ImportError, e:
-        print "Module not installed : %s" % e
-        exit()
+	print "Module not installed : %s" % e
+	exit()
 
 
 a = os.path.join("../")
@@ -38,8 +36,8 @@
 a = os.path.join("../../..")
 sys.path.append(a)
 
-from zoni.version import *
-from zoni.extra.util import *
+from zoni.version import version, revision
+from zoni.extra.util import getConfig
 
 
 def main():
@@ -53,7 +51,7 @@
 	parser.add_option("-u", "--userName", "--username", dest="userName", help="Mysql username")
 	parser.add_option("-p", "--password", dest="password", help="Admin mysql password")
 	#parser.add_option("-v", "--verbose", dest="verbosity", help="Be verbose", action="store_true", default=False)
-	(options, args) = parser.parse_args()
+	(options, __args) = parser.parse_args()
 
 	if not options.userName:
 		parser.print_help()
@@ -63,7 +61,7 @@
 	if not options.password:
 		password = getpass.getpass()
 
-	(configs, configFiles) = getConfig()
+	(configs, __configFiles) = getConfig()
 
 	CreateZoniDb(configs, options.userName, password)
 
@@ -174,7 +172,7 @@
 	sys.stdout.write("    Creating sysdomainmembermap...")
 	execQuery(conn, "CREATE TABLE IF NOT EXISTS `sysdomainmembermap` (`sys_id` int(11) unsigned NOT NULL, `domain_id` int(11) NOT NULL)")
 	sys.stdout.write("Success\n")
- 	#  Create allocationinfo
+	#  Create allocationinfo
 	sys.stdout.write("    Creating allocationinfo...")
 	execQuery(conn, "CREATE TABLE IF NOT EXISTS `allocationinfo` ( `allocation_id` int(11) unsigned NOT NULL auto_increment, `sys_id` int(11) unsigned NOT NULL, `reservation_id` int(11) unsigned NOT NULL, `pool_id` int(11) unsigned NULL, `hostname` varchar(64) default NULL, `domain_id` int(11) unsigned NOT NULL, `notes` tinytext, `expire_time` timestamp default 0 NOT NULL, PRIMARY KEY  (`allocation_id`)) ENGINE=INNODB")
 	sys.stdout.write("Success\n")
@@ -224,10 +222,10 @@
 	if checkVal:
 		sys.stdout.write("    Kernel already exists in DB...\n")
 		#  Get the kernel_id
-		kernelId = str(checkVal[1][0][0])
+		#kernelId = str(checkVal[1][0][0])
 	else:
-		r = execQuery(conn, "INSERT into `kernelinfo` (kernel_name, kernel_release, kernel_arch) values ('linux-2.6.24-19-server', '2.6.24-19-server', 'x86_64' )")
-		kernelId = str(r.lastrowid)
+		__r = execQuery(conn, "INSERT into `kernelinfo` (kernel_name, kernel_release, kernel_arch) values ('linux-2.6.24-19-server', '2.6.24-19-server', 'x86_64' )")
+		#kernelId = str(r.lastrowid)
 		sys.stdout.write("    Success\n")
 
 	#  Initrd
@@ -325,7 +323,7 @@
 	if checkVal:
 		sys.stdout.write("Default Domain (ZoniHome) already linked to vlan " + config['zoniHomeDomain'] + "...\n")
 		#  Get the domainId 
-		valId = str(checkVal[1][0][0])
+		#valId = str(checkVal[1][0][0])
 	else:
 		r = execQuery(conn, "INSERT into `domainmembermap` (domain_id, vlan_id) values (" + domainId + ", " + vlanId + ")")
 		domainId = str(r.lastrowid)
@@ -358,7 +356,7 @@
 	if checkVal:
 		sys.stdout.write("Default pool (ZoniHome) already exists...\n")
 		#  Get the domainId 
-		poolId = str(checkVal[1][0][0])
+		#poolId = str(checkVal[1][0][0])
 	else:
 		r = execQuery(conn, "INSERT into `poolmap` (pool_id, vlan_id) values (" + zoniPoolId + ", " + vlanId + ")")
 		domainId = str(r.lastrowid)
@@ -370,7 +368,7 @@
 		sys.stdout.write("Default pool (ZoniHome) already exists...\n")
 		#  XXX probably should delete first then add, do it later
 		#  Get the domainId 
-		poolId = str(checkVal[1][0][0])
+		#poolId = str(checkVal[1][0][0])
 	else:
 		r = execQuery(conn, "INSERT into `poolmap` (pool_id, vlan_id) values (" + zoniIpmiId + ", " + vlanId + ")")
 		domainId = str(r.lastrowid)
@@ -406,7 +404,7 @@
 
 def entryExists(conn, table, col, checkVal):
 	query = "select * from " + table + " where " + col + " = '" + checkVal + "'"
- 	r = execQuery(conn, query)
+	r = execQuery(conn, query)
 	res = r.fetchall()
 	if len(res) > 0:
 		return (1, res)
@@ -415,5 +413,5 @@
 
 
 if __name__ == "__main__":
-    main()
+	main()
 
diff --git a/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py b/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py
index a68eb83..f4ba456 100755
--- a/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py
+++ b/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py
@@ -21,10 +21,7 @@
 
 import os 
 import sys
-import string
-import traceback
 import optparse
-import getpass
 
 a = os.path.join("../")
 sys.path.append(a)
@@ -33,8 +30,8 @@
 a = os.path.join("../../..")
 sys.path.append(a)
 
-from zoni.version import *
-from zoni.extra.util import *
+from zoni.version import version, revision
+from zoni.extra.util import createKey
 
 
 def main():
@@ -47,13 +44,13 @@
 	parser = optparse.OptionParser(usage="%prog -k keyname", version="%prog " + ver + " " + rev)
 	parser.add_option("-k", "--keyName", "--keyname", dest="keyName", help="Key name")
 	#parser.add_option("-v", "--verbose", dest="verbosity", help="Be verbose", action="store_true", default=False)
-	(options, args) = parser.parse_args()
+	(options, __args) = parser.parse_args()
 
 	if not options.keyName:
 		parser.print_help()
 		exit(1)
 
-	(configs, configFiles) = getConfig()
+	#(configs, configFiles) = getConfig()
 
 
 	key = createKey(options.keyName)
@@ -79,5 +76,5 @@
 
 
 if __name__ == "__main__":
-    main()
+	main()
 
diff --git a/src/zoni/install/pxe/zoniPxeSetup.py b/src/zoni/install/pxe/zoniPxeSetup.py
index dd46984..a9e06c8 100644
--- a/src/zoni/install/pxe/zoniPxeSetup.py
+++ b/src/zoni/install/pxe/zoniPxeSetup.py
@@ -21,9 +21,6 @@
 
 import os 
 import sys
-import string
-import traceback
-import optparse
 import shutil
 import urllib
 import tarfile
@@ -38,21 +35,21 @@
 a = os.path.join("../../..")
 sys.path.append(a)
 
-from zoni.extra.util import *
-from zoni.version import *
+from zoni.extra.util import getConfig, checkSuper, createDir
+#from zoni.version import version, revision
 from zoni.bootstrap.pxe import Pxe
 
 
 def main():
 	''' This file sets up PXE for Zoni '''
 
-	ver = version.split(" ")[0]
-	rev = revision
+	#ver = version.split(" ")[0]
+	#rev = revision
 
-	parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
-	(options, args) = parser.parse_args()
+	#parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
+	#(options, args) = parser.parse_args()
 
-	(configs, configFile) = getConfig()
+	(configs, __configFile) = getConfig()
 
 	ZoniPxeSetup(configs)
 	ZoniGetSyslinux(configs)
@@ -62,11 +59,11 @@
 	tftpRootDir = config['tftpRootDir']
 	tftpImageDir = config['tftpImageDir']
 	tftpBootOptionsDir = config['tftpBootOptionsDir']
-	tftpUpdateFile =  config['tftpUpdateFile'] 
+	#tftpUpdateFile =  config['tftpUpdateFile'] 
 	tftpBaseFile = config['tftpBaseFile'] 
 	tftpBaseMenuFile = config['tftpBaseMenuFile'] 
 	installBaseDir = config['installBaseDir']
-	registrationBaseDir = config['registrationBaseDir']
+	#registrationBaseDir = config['registrationBaseDir']
 	
 
 	#  Create the directory structure
diff --git a/src/zoni/install/www/zoniWebSetup.py b/src/zoni/install/www/zoniWebSetup.py
index 3b2fa1a..1eebf98 100644
--- a/src/zoni/install/www/zoniWebSetup.py
+++ b/src/zoni/install/www/zoniWebSetup.py
@@ -22,12 +22,8 @@
 import os 
 import sys
 import time
-import string
-import traceback
-import optparse
+#import optparse
 import shutil
-import urllib
-import tarfile
 
 a = os.path.join("../")
 sys.path.append(a)
@@ -36,21 +32,20 @@
 a = os.path.join("../../..")
 sys.path.append(a)
 
-from zoni.extra.util import *
-from zoni.version import *
-from zoni.bootstrap.pxe import Pxe
+from zoni.extra.util import getConfig, checkSuper, createDir
+#from zoni.version import version, revision
 
 
 def main():
 	''' This file sets up the web files for Zoni '''
 
-	ver = version.split(" ")[0]
-	rev = revision
+	#ver = version.split(" ")[0]
+	#rev = revision
 
-	parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
-	(options, args) = parser.parse_args()
+	#parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
+	#(options, args) = parser.parse_args()
 
-	(configs, configFiles) = getConfig()
+	(configs, __configFiles) = getConfig()
 
 	ZoniWebSetup(configs)
 	ZoniCreateWebConfigFile(configs)
diff --git a/src/zoni/services/pcvciservice.py b/src/zoni/services/pcvciservice.py
index 73ed819..9e9083f 100755
--- a/src/zoni/services/pcvciservice.py
+++ b/src/zoni/services/pcvciservice.py
@@ -19,7 +19,6 @@
 #  $Id$
 #
 
-import threading
 import logging
 
 from tashi.util import instantiateImplementation
@@ -49,10 +48,11 @@
 			
 	def requestResources(self, key, specs, quantity):
 		vcm = self.__key2vcm(key)
-		node = specs
+		#node = specs
 		'''  Check for keys later  '''
 		self.log.info("VCM_REQUEST_RESOURCE: VCM %s RESOURCE %s(%s)" % (vcm, specs, quantity))
-		# go to scheduler val = self.agent.requestResource(specs)
+		# go to scheduler
+		val = self.agent.requestResource(specs)
 		if val:
 			return 1
 		return 0
diff --git a/src/zoni/services/zonimanager.py b/src/zoni/services/zonimanager.py
index c43d05d..0eff740 100755
--- a/src/zoni/services/zonimanager.py
+++ b/src/zoni/services/zonimanager.py
@@ -20,18 +20,11 @@
 #
 
 import os
-import sys
-import threading
-import signal
 import logging.config
-import signal
 
-from tashi.util import instantiateImplementation, signalHandler
+from tashi.util import instantiateImplementation
 
-from zoni.extra.util import loadConfigFile, getConfig, debugConsole
-from zoni.version import *
-from zoni.services.hardwareservice import HardwareService
-from zoni.services.pcvciservice import pcmService
+from zoni.extra.util import getConfig, debugConsole
 from zoni.services.rpycservices import ManagerService
 
 from rpyc.utils.server import ThreadedServer
diff --git a/src/zoni/version.py b/src/zoni/version.py
index ea515d5..b34fd14 100644
--- a/src/zoni/version.py
+++ b/src/zoni/version.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.    
 
-id = "$Id: version.py 964467 2010-07-15 15:31:02Z rgass $"
+_id = "$Id: version.py 964467 2010-07-15 15:31:02Z rgass $"
 lastChangeDate = "$LastChangedDate$"
 lastChangeRevision = "$Rev: 964467 $"
 revision = lastChangeRevision